mirror of
https://github.com/Cisco-Talos/clamav.git
synced 2025-10-19 10:23:17 +00:00

Temp directory recursion in ClamAV is when each layer of a scan gets its own temp directory in the parent layer's temp directory. In addition to temp directory recursion, ClamAV has been creating a new subdirectory for each file scan as a risk-adverse method to ensure no temporary file leaks fill up the disk. Creating a directory is relatively slow on Windows in particular if scanning a lot of very small files. This commit: 1. Separates the temp directory recursion feature from the leave-temps feature so that libclamav can leave temp files without making subdirectories for each file scanned. 2. Makes it so that when temp directory recursion is off, libclamav will just use the configure temp directory for all files. The new option to enable temp directory recursion is for libclamav-only at this time. It is off by default, and you can enable it like this: ```c cl_engine_set_num(engine, CL_ENGINE_TMPDIR_RECURSION, 1); ``` For the `clamscan` and `clamd` programs, temp directory recursion will be enabled when `--leave-temps` / `LeaveTemporaryFiles` is enabled. The difference is that when disabled, it will return to using the configured temp directory without making a subdirectory for each file scanned, so as to improve scan performance for small files, mostly on Windows. Under the hood, this commit also: 1. Cleans up how we keep track of tmpdirs for each layer. The goal here is to align how we keep track of layer-specific stuff using the scan_layer structure. 2. Cleans up how we record metadata JSON for embedded files. Note: Embedded files being different from Contained files, as they are extracted not with a parser, but by finding them with file type magic signatures. CLAM-1583
916 lines
28 KiB
C
916 lines
28 KiB
C
/*
|
|
* Match a string against a list of patterns/regexes.
|
|
*
|
|
* Copyright (C) 2013-2025 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
|
|
* Copyright (C) 2007-2013 Sourcefire, Inc.
|
|
*
|
|
* Authors: Török Edvin
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
* MA 02110-1301, USA.
|
|
*/
|
|
|
|
#if HAVE_CONFIG_H
|
|
#include "clamav-config.h"
|
|
#endif
|
|
|
|
#ifdef CL_THREAD_SAFE
|
|
#ifndef _REENTRANT
|
|
#define _REENTRANT
|
|
#endif
|
|
#endif
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
#include <zlib.h>
|
|
|
|
#include <limits.h>
|
|
#include <sys/types.h>
|
|
|
|
#include "regex/regex.h"
|
|
|
|
#include "clamav.h"
|
|
#include "others.h"
|
|
#include "regex_list.h"
|
|
#include "matcher-ac.h"
|
|
#include "matcher.h"
|
|
#include "str.h"
|
|
#include "readdb.h"
|
|
#include "jsparse/textbuf.h"
|
|
#include "regex_suffix.h"
|
|
#include "default.h"
|
|
#include "hashtab.h"
|
|
|
|
#include "mpool.h"
|
|
|
|
/* Prototypes */
|
|
static regex_t *new_preg(struct regex_matcher *matcher);
|
|
static size_t reverse_string(char *pattern);
|
|
static cl_error_t add_pattern_suffix(void *cbdata, const char *suffix, size_t suffix_len, const struct regex_list *regex);
|
|
static cl_error_t add_static_pattern(struct regex_matcher *matcher, char *pattern);
|
|
/* ---------- */
|
|
|
|
#define MATCH_SUCCESS 0
|
|
#define MATCH_FAILED -1
|
|
|
|
/*
|
|
* Call this function when an unrecoverable error has occurred, (instead of exit).
|
|
*/
|
|
static void fatal_error(struct regex_matcher *matcher)
|
|
{
|
|
regex_list_done(matcher);
|
|
matcher->list_inited = -1; /* the phishing module will know we tried to load an allow list, and failed, so it will disable itself too*/
|
|
}
|
|
|
|
static inline char get_char_at_pos_with_skip(const struct pre_fixup_info *info, const char *buffer, size_t pos)
|
|
{
|
|
const char *str;
|
|
size_t realpos = 0;
|
|
if (!info) {
|
|
return (pos <= strlen(buffer)) ? buffer[pos > 0 ? pos - 1 : 0] : '\0';
|
|
}
|
|
str = info->pre_displayLink.data;
|
|
cli_dbgmsg("calc_pos_with_skip: skip:%llu, %llu - %llu \"%s\",\"%s\"\n", (long long unsigned)pos, (long long unsigned)info->host_start,
|
|
(long long unsigned)info->host_end, str, buffer);
|
|
pos += info->host_start;
|
|
while (str[realpos] && !isalnum(str[realpos])) realpos++;
|
|
for (; str[realpos] && (pos > 0); pos--) {
|
|
while (str[realpos] == ' ') realpos++;
|
|
realpos++;
|
|
}
|
|
while (str[realpos] == ' ') realpos++;
|
|
cli_dbgmsg("calc_pos_with_skip:%s\n", str + realpos);
|
|
return (pos > 0 && !str[realpos]) ? '\0' : str[realpos > 0 ? realpos - 1 : 0];
|
|
}
|
|
|
|
static int validate_subdomain(const struct regex_list *regex, const struct pre_fixup_info *pre_fixup, const char *buffer, size_t buffer_len, char *real_url, size_t real_len, char *orig_real_url)
|
|
{
|
|
char c;
|
|
size_t match_len;
|
|
|
|
if (!regex || !regex->pattern)
|
|
return 0;
|
|
match_len = strlen(regex->pattern);
|
|
if (((c = get_char_at_pos_with_skip(pre_fixup, buffer, buffer_len + 1)) == ' ' || c == '\0' || c == '/' || c == '?') &&
|
|
(match_len == buffer_len || /* full match */
|
|
(match_len < buffer_len &&
|
|
((c = get_char_at_pos_with_skip(pre_fixup, buffer, buffer_len - match_len)) == '.' || (c == ' ')))
|
|
/* subdomain matched*/)) {
|
|
/* we have an extra / at the end */
|
|
if (match_len > 0) match_len--;
|
|
cli_dbgmsg("Got a match: %s with %s\n", buffer, regex->pattern);
|
|
cli_dbgmsg("Before inserting .: %s\n", orig_real_url);
|
|
if (real_len >= match_len + 1) {
|
|
const size_t pos = real_len - match_len - 1;
|
|
if (real_url[pos] != '.') {
|
|
/* we need to shift left, and insert a '.'
|
|
* we have an extra '.' at the beginning inserted by get_host to have room,
|
|
* orig_real_url has to be used here,
|
|
* because we want to overwrite that extra '.' */
|
|
size_t orig_real_len = strlen(orig_real_url);
|
|
cli_dbgmsg("No dot here:%s\n", real_url + pos);
|
|
real_url = orig_real_url;
|
|
memmove(real_url, real_url + 1, orig_real_len - match_len - 1);
|
|
real_url[orig_real_len - match_len - 1] = '.';
|
|
cli_dbgmsg("After inserting .: %s\n", real_url);
|
|
}
|
|
}
|
|
return 1;
|
|
}
|
|
cli_dbgmsg("Ignoring false match: %s with %s, mismatched character: %c\n", buffer, regex->pattern, c);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* @matcher - matcher structure to use
|
|
* @real_url - href target
|
|
* @display_url - <a> tag contents
|
|
* @hostOnly - if you want to match only the host part
|
|
* @is_allow_list_lookup - is this a lookup in an allow list?
|
|
*
|
|
* @return - CL_SUCCESS - url doesn't match
|
|
* - CL_VIRUS - url matches list
|
|
*
|
|
* Do not send NULL pointers to this function!!
|
|
*
|
|
*/
|
|
cl_error_t regex_list_match(struct regex_matcher *matcher, char *real_url, const char *display_url, const struct pre_fixup_info *pre_fixup, int hostOnly, const char **info, int is_allow_list_lookup)
|
|
{
|
|
char *orig_real_url = real_url;
|
|
struct regex_list *regex;
|
|
size_t real_len, display_len, buffer_len;
|
|
|
|
char *buffer = NULL;
|
|
char *bufrev = NULL;
|
|
cl_error_t rc = CL_SUCCESS;
|
|
int filter_search_rc = 0;
|
|
int root;
|
|
struct cli_ac_data mdata;
|
|
struct cli_ac_result *res = NULL;
|
|
|
|
if (NULL == matcher) {
|
|
rc = CL_ENULLARG;
|
|
cli_errmsg("regex_list_match: matcher must be initialized\n");
|
|
goto done;
|
|
}
|
|
|
|
if (NULL == real_url) {
|
|
rc = CL_ENULLARG;
|
|
cli_errmsg("regex_list_match: real_url must be initialized\n");
|
|
goto done;
|
|
}
|
|
|
|
if (NULL == display_url) {
|
|
rc = CL_ENULLARG;
|
|
cli_errmsg("regex_list_match: display_url must be initialized\n");
|
|
goto done;
|
|
}
|
|
|
|
*info = NULL;
|
|
if (1 != matcher->list_inited) {
|
|
rc = CL_SUCCESS;
|
|
goto done;
|
|
}
|
|
if (0 == matcher->list_built) {
|
|
cli_errmsg("regex_list_match: matcher->list_built must be initialized\n");
|
|
rc = CL_ENULLARG;
|
|
goto done;
|
|
}
|
|
|
|
/* skip initial '.' inserted by get_host */
|
|
if (real_url[0] == '.') real_url++;
|
|
if (display_url[0] == '.') display_url++;
|
|
real_len = strlen(real_url);
|
|
display_len = strlen(display_url);
|
|
buffer_len = (hostOnly && !is_allow_list_lookup) ? real_len + 1 : real_len + display_len + 1 + 1;
|
|
if (buffer_len < 3) {
|
|
/* too short, no match possible */
|
|
return CL_SUCCESS;
|
|
}
|
|
buffer = cli_max_malloc(buffer_len + 1);
|
|
if (!buffer) {
|
|
cli_errmsg("regex_list_match: Unable to allocate memory for buffer\n");
|
|
return CL_EMEM;
|
|
}
|
|
|
|
strncpy(buffer, real_url, buffer_len);
|
|
buffer[real_len] = (!is_allow_list_lookup && hostOnly) ? '/' : ':';
|
|
|
|
/*
|
|
* For H-type PDB signatures, real_url is actually the DisplayedHostname.
|
|
* RealHostname is not used.
|
|
*/
|
|
if (!hostOnly || is_allow_list_lookup) {
|
|
/* For all other PDB and WDB signatures concatenate Real:Displayed. */
|
|
strncpy(buffer + real_len + 1, display_url, buffer_len - real_len);
|
|
}
|
|
buffer[buffer_len - 1] = '/';
|
|
buffer[buffer_len] = 0;
|
|
cli_dbgmsg("Looking up in regex_list: %s\n", buffer);
|
|
|
|
if (CL_SUCCESS != (rc = cli_ac_initdata(&mdata, 0, 0, 0, CLI_DEFAULT_AC_TRACKLEN)))
|
|
return rc;
|
|
|
|
bufrev = cli_safer_strdup(buffer);
|
|
if (!bufrev)
|
|
return CL_EMEM;
|
|
|
|
reverse_string(bufrev);
|
|
|
|
filter_search_rc = filter_search(&matcher->filter, (const unsigned char *)bufrev, buffer_len);
|
|
if (filter_search_rc == -1) {
|
|
free(buffer);
|
|
free(bufrev);
|
|
/* filter says this suffix doesn't match.
|
|
* The filter has false positives, but no false
|
|
* negatives */
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
rc = cli_ac_scanbuff((const unsigned char *)bufrev, buffer_len, NULL, (void *)®ex, &res, &matcher->suffixes, &mdata, 0, 0, NULL, AC_SCAN_VIR, NULL);
|
|
free(bufrev);
|
|
cli_ac_freedata(&mdata);
|
|
|
|
rc = CL_SUCCESS;
|
|
root = matcher->root_regex_idx;
|
|
while (res || root) {
|
|
struct cli_ac_result *q;
|
|
if (!res) {
|
|
regex = matcher->suffix_regexes[root].head;
|
|
root = 0;
|
|
} else {
|
|
regex = res->customdata;
|
|
}
|
|
while (!rc && regex) {
|
|
/* loop over multiple regexes corresponding to
|
|
* this suffix */
|
|
if (!regex->preg) {
|
|
/* we matched a static pattern */
|
|
rc = validate_subdomain(regex, pre_fixup, buffer, buffer_len, real_url, real_len, orig_real_url);
|
|
} else {
|
|
rc = !cli_regexec(regex->preg, buffer, 0, NULL, 0);
|
|
}
|
|
if (rc) *info = regex->pattern;
|
|
regex = regex->nxt;
|
|
}
|
|
if (res) {
|
|
q = res;
|
|
res = res->next;
|
|
free(q);
|
|
}
|
|
}
|
|
free(buffer);
|
|
if (!rc)
|
|
cli_dbgmsg("Lookup result: not in regex list\n");
|
|
else
|
|
cli_dbgmsg("Lookup result: in regex list\n");
|
|
done:
|
|
return rc;
|
|
}
|
|
|
|
/* Initialization & loading */
|
|
/* Initializes @matcher, allocating necessary substructures */
|
|
cl_error_t init_regex_list(struct regex_matcher *matcher, uint8_t dconf_prefiltering)
|
|
{
|
|
#ifdef USE_MPOOL
|
|
mpool_t *mp = NULL;
|
|
#endif
|
|
cl_error_t rc = CL_SUCCESS;
|
|
|
|
if (NULL == matcher) {
|
|
cli_errmsg("init_regex_list: matcher must be initialized\n");
|
|
rc = CL_ENULLARG;
|
|
goto done;
|
|
}
|
|
|
|
#ifdef USE_MPOOL
|
|
mp = matcher->mempool;
|
|
if (NULL == mp) {
|
|
cli_errmsg("init_regex_list: matcher->mempool must be initialized\n");
|
|
rc = CL_ENULLARG;
|
|
goto done;
|
|
}
|
|
#endif
|
|
|
|
memset(matcher, 0, sizeof(*matcher));
|
|
|
|
matcher->list_inited = 1;
|
|
matcher->list_built = 0;
|
|
matcher->list_loaded = 0;
|
|
cli_hashtab_init(&matcher->suffix_hash, 512);
|
|
#ifdef USE_MPOOL
|
|
matcher->mempool = mp;
|
|
matcher->suffixes.mempool = mp;
|
|
#endif
|
|
|
|
if ((rc = cli_ac_init(&matcher->suffixes, 2, 32, dconf_prefiltering))) {
|
|
goto done;
|
|
}
|
|
#ifdef USE_MPOOL
|
|
matcher->sha2_256_hashes.mempool = mp;
|
|
matcher->hostkey_prefix.mempool = mp;
|
|
#endif
|
|
if ((rc = cli_bm_init(&matcher->sha2_256_hashes))) {
|
|
goto done;
|
|
}
|
|
if ((rc = cli_bm_init(&matcher->hostkey_prefix))) {
|
|
goto done;
|
|
}
|
|
filter_init(&matcher->filter);
|
|
|
|
done:
|
|
return rc;
|
|
}
|
|
|
|
static int functionality_level_check(char *line)
|
|
{
|
|
char *ptmin;
|
|
char *ptmax;
|
|
size_t j;
|
|
|
|
ptmin = strrchr(line, ':');
|
|
if (!ptmin)
|
|
return CL_SUCCESS;
|
|
|
|
ptmin++;
|
|
|
|
ptmax = strchr(ptmin, '-');
|
|
if (!ptmax)
|
|
return CL_SUCCESS; /* there is no functionality level specified, so we're ok */
|
|
else {
|
|
size_t min, max;
|
|
ptmax++;
|
|
for (j = 0; j + ptmin + 1 < ptmax; j++)
|
|
if (!isdigit(ptmin[j]))
|
|
return CL_SUCCESS; /* not numbers, not functionality level */
|
|
for (j = 0; j < strlen(ptmax); j++)
|
|
if (!isdigit(ptmax[j]))
|
|
return CL_SUCCESS; /* see above */
|
|
ptmax[-1] = '\0';
|
|
min = atoi(ptmin);
|
|
if (strlen(ptmax) == 0)
|
|
max = INT_MAX;
|
|
else
|
|
max = atoi(ptmax);
|
|
|
|
if (min > cl_retflevel()) {
|
|
cli_dbgmsg("regex list line %s not loaded (required f-level: %u)\n", line, (unsigned int)min);
|
|
return CL_EMALFDB;
|
|
}
|
|
|
|
if (max < cl_retflevel())
|
|
return CL_EMALFDB;
|
|
ptmin[-1] = '\0';
|
|
return CL_SUCCESS;
|
|
}
|
|
}
|
|
|
|
static int add_hash(struct regex_matcher *matcher, char *pattern, const char fl, int is_prefix)
|
|
{
|
|
int rc = CL_SUCCESS;
|
|
struct cli_bm_patt *pat = NULL;
|
|
struct cli_matcher *bm = NULL;
|
|
const char *vname = NULL;
|
|
|
|
if (0 == strlen(pattern)) {
|
|
cli_errmsg("add_hash: Invalid pattern '%s' in database\n", pattern);
|
|
rc = CL_EMALFDB;
|
|
goto done;
|
|
}
|
|
|
|
pat = MPOOL_CALLOC(matcher->mempool, 1, sizeof(*pat));
|
|
if (!pat) {
|
|
rc = CL_EMEM;
|
|
goto done;
|
|
}
|
|
pat->pattern = (unsigned char *)CLI_MPOOL_HEX2STR(matcher->mempool, pattern);
|
|
if (!pat->pattern) {
|
|
rc = CL_EMALFDB;
|
|
goto done;
|
|
}
|
|
pat->length = 32;
|
|
if (is_prefix) {
|
|
pat->length = 4;
|
|
bm = &matcher->hostkey_prefix;
|
|
} else {
|
|
bm = &matcher->sha2_256_hashes;
|
|
}
|
|
|
|
if (!matcher->sha2_256_pfx_set.keys) {
|
|
if ((rc = cli_hashset_init(&matcher->sha2_256_pfx_set, 1048576, 90))) {
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
if (fl != 'W' && pat->length == 32 &&
|
|
cli_hashset_contains(&matcher->sha2_256_pfx_set, cli_readint32(pat->pattern)) &&
|
|
cli_bm_scanbuff(pat->pattern, 32, &vname, NULL, &matcher->sha2_256_hashes, 0, NULL, NULL, NULL) == CL_VIRUS) {
|
|
if (*vname == 'W') {
|
|
/* hash is allowed in local.gdb */
|
|
cli_dbgmsg("Skipping hash %s\n", pattern);
|
|
rc = CL_SUCCESS;
|
|
goto done;
|
|
}
|
|
}
|
|
pat->virname = MPOOL_MALLOC(matcher->mempool, 1);
|
|
if (!pat->virname) {
|
|
cli_errmsg("add_hash: Unable to allocate memory for path->virname\n");
|
|
rc = CL_EMEM;
|
|
goto done;
|
|
}
|
|
*pat->virname = fl;
|
|
cli_hashset_addkey(&matcher->sha2_256_pfx_set, cli_readint32(pat->pattern));
|
|
if ((rc = cli_bm_addpatt(bm, pat, "*"))) {
|
|
cli_errmsg("add_hash: failed to add BM pattern\n");
|
|
rc = CL_EMALFDB;
|
|
goto done;
|
|
}
|
|
|
|
pat = NULL;
|
|
done:
|
|
if (pat) {
|
|
if (pat->pattern) {
|
|
MPOOL_FREE(matcher->mempool, pat->pattern);
|
|
}
|
|
if (pat->virname) {
|
|
MPOOL_FREE(matcher->mempool, pat->virname);
|
|
}
|
|
MPOOL_FREE(matcher->mempool, pat);
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
/* Load patterns/regexes from file */
|
|
cl_error_t load_regex_matcher(struct cl_engine *engine, struct regex_matcher *matcher, FILE *fd, unsigned int *signo, unsigned int options, int is_allow_list_lookup, struct cli_dbio *dbio, uint8_t dconf_prefiltering)
|
|
{
|
|
cl_error_t rc;
|
|
int line = 0, entry = 0;
|
|
char buffer[FILEBUFF];
|
|
|
|
if (NULL == matcher) {
|
|
cli_errmsg("load_regex_matcher: matcher must be initialized\n");
|
|
return CL_ENULLARG;
|
|
}
|
|
|
|
if (matcher->list_inited == -1)
|
|
return CL_EMALFDB; /* already failed to load */
|
|
if (!fd && !dbio) {
|
|
cli_errmsg("Unable to load regex list (null file)\n");
|
|
return CL_ENULLARG;
|
|
}
|
|
|
|
cli_dbgmsg("Loading regex_list\n");
|
|
if (!matcher->list_inited) {
|
|
rc = init_regex_list(matcher, dconf_prefiltering);
|
|
if (!matcher->list_inited) {
|
|
cli_errmsg("Regex list failed to initialize!\n");
|
|
fatal_error(matcher);
|
|
return rc;
|
|
}
|
|
}
|
|
/*
|
|
* Regexlist db format, common to .wdb (allow list) and .pdb (domain list) files.
|
|
*
|
|
* Multiple lines of form, (empty lines are skipped):
|
|
* Flags RealURL DisplayedURL
|
|
* Where:
|
|
* Flags:
|
|
*
|
|
* .pdb files:
|
|
* R - regex, H - host-only, followed by (optional) 3-digit hexnumber representing
|
|
* flags that should be filtered.
|
|
* [i.e. phishcheck urls.flags that we don't want to be done for this particular host]
|
|
*
|
|
* .wdb files:
|
|
* X - full URL regex
|
|
* Y - host-only regex
|
|
* M - host simple pattern
|
|
*
|
|
* If a line in the file doesn't conform to this format, loading fails
|
|
*
|
|
*/
|
|
while (cli_dbgets(buffer, FILEBUFF, fd, dbio)) {
|
|
char *pattern;
|
|
char *flags;
|
|
size_t pattern_len;
|
|
|
|
cli_chomp(buffer);
|
|
line++;
|
|
if (!*buffer)
|
|
continue; /* skip empty lines */
|
|
|
|
if (buffer[0] == '#')
|
|
continue;
|
|
|
|
if (functionality_level_check(buffer))
|
|
continue;
|
|
|
|
if (engine->cb_sigload && engine->cb_sigload("phishing", buffer, ~options & CL_DB_OFFICIAL, engine->cb_sigload_ctx)) {
|
|
cli_dbgmsg("load_regex_matcher: skipping %s due to callback\n", buffer);
|
|
continue;
|
|
}
|
|
|
|
entry++;
|
|
pattern = strchr(buffer, ':');
|
|
if (!pattern) {
|
|
cli_errmsg("Malformed regex list line %d\n", line);
|
|
fatal_error(matcher);
|
|
return CL_EMALFDB;
|
|
}
|
|
/*pattern[0]='\0';*/
|
|
flags = buffer + 1;
|
|
pattern++;
|
|
|
|
pattern_len = strlen(pattern);
|
|
/* '-3' to leave room for the '/' and null being
|
|
* appended below.
|
|
*/
|
|
if ((pattern - buffer) + pattern_len < (FILEBUFF - 3)) {
|
|
pattern[pattern_len] = '/';
|
|
pattern[pattern_len + 1] = '\0';
|
|
} else {
|
|
cli_errmsg("Overlong regex line %d\n", line);
|
|
fatal_error(matcher);
|
|
return CL_EMALFDB;
|
|
}
|
|
|
|
if ((buffer[0] == 'R' && !is_allow_list_lookup) || ((buffer[0] == 'X' || buffer[0] == 'Y') && is_allow_list_lookup)) {
|
|
/* regex for hostname*/
|
|
if ((rc = regex_list_add_pattern(matcher, pattern))) {
|
|
return rc == CL_EMEM ? CL_EMEM : CL_EMALFDB;
|
|
}
|
|
} else if ((buffer[0] == 'H' && !is_allow_list_lookup) || (buffer[0] == 'M' && is_allow_list_lookup)) {
|
|
/*matches displayed host*/
|
|
if ((rc = add_static_pattern(matcher, pattern)))
|
|
return rc == CL_EMEM ? CL_EMEM : CL_EMALFDB;
|
|
} else if (buffer[0] == 'S' && (!is_allow_list_lookup || pattern[0] == 'W')) {
|
|
pattern[pattern_len] = '\0';
|
|
if (pattern[0] == 'W')
|
|
flags[0] = 'W';
|
|
if ((pattern[0] == 'W' || pattern[0] == 'F' || pattern[0] == 'P') && pattern[1] == ':') {
|
|
pattern += 2;
|
|
if ((rc = add_hash(matcher, pattern, flags[0], pattern[-2] == 'P'))) {
|
|
cli_errmsg("Error loading at line: %d\n", line);
|
|
return rc == CL_EMEM ? CL_EMEM : CL_EMALFDB;
|
|
}
|
|
} else {
|
|
cli_errmsg("Error loading line: %d, %c\n", line, *pattern);
|
|
return CL_EMALFDB;
|
|
}
|
|
} else {
|
|
return CL_EMALFDB;
|
|
}
|
|
}
|
|
matcher->list_loaded = 1;
|
|
if (signo)
|
|
*signo += entry;
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
/* Build the matcher list */
|
|
cl_error_t cli_build_regex_list(struct regex_matcher *matcher)
|
|
{
|
|
cl_error_t rc;
|
|
if (!matcher)
|
|
return CL_SUCCESS;
|
|
if (!matcher->list_inited || !matcher->list_loaded) {
|
|
cli_errmsg("Regex list not loaded!\n");
|
|
return -1; /*TODO: better error code */
|
|
}
|
|
cli_dbgmsg("Building regex list\n");
|
|
cli_hashtab_free(&matcher->suffix_hash);
|
|
if ((rc = cli_ac_buildtrie(&matcher->suffixes)))
|
|
return rc;
|
|
matcher->list_built = 1;
|
|
cli_hashset_destroy(&matcher->sha2_256_pfx_set);
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
/* Done with this matcher, free resources */
|
|
void regex_list_done(struct regex_matcher *matcher)
|
|
{
|
|
if (NULL == matcher) {
|
|
cli_errmsg("regex_list_done: matcher must be initialized\n");
|
|
goto done;
|
|
}
|
|
|
|
if (matcher->list_inited == 1) {
|
|
size_t i;
|
|
cli_ac_free(&matcher->suffixes);
|
|
if (matcher->suffix_regexes) {
|
|
for (i = 0; i < matcher->suffix_cnt; i++) {
|
|
struct regex_list *r = matcher->suffix_regexes[i].head;
|
|
while (r) {
|
|
struct regex_list *q = r;
|
|
r = r->nxt;
|
|
free(q->pattern);
|
|
free(q);
|
|
}
|
|
}
|
|
free(matcher->suffix_regexes);
|
|
matcher->suffix_regexes = NULL;
|
|
}
|
|
if (matcher->all_pregs) {
|
|
for (i = 0; i < matcher->regex_cnt; i++) {
|
|
regex_t *r = matcher->all_pregs[i];
|
|
cli_regfree(r);
|
|
MPOOL_FREE(matcher->mempool, r);
|
|
}
|
|
MPOOL_FREE(matcher->mempool, matcher->all_pregs);
|
|
}
|
|
cli_hashtab_free(&matcher->suffix_hash);
|
|
cli_bm_free(&matcher->sha2_256_hashes);
|
|
cli_bm_free(&matcher->hostkey_prefix);
|
|
}
|
|
|
|
done:
|
|
return;
|
|
}
|
|
|
|
int is_regex_ok(struct regex_matcher *matcher)
|
|
{
|
|
int ret = 0;
|
|
if (NULL == matcher) {
|
|
cli_errmsg("is_regex_ok: matcher must be initialized\n");
|
|
} else {
|
|
ret = (!matcher->list_inited || matcher->list_inited != -1); /* either we don't have a regexlist, or we initialized it successfully */
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static cl_error_t add_newsuffix(struct regex_matcher *matcher, struct regex_list *info, const char *suffix, size_t len)
|
|
{
|
|
struct cli_matcher *root = NULL;
|
|
struct cli_ac_patt *new = NULL;
|
|
size_t i;
|
|
cl_error_t ret = CL_SUCCESS;
|
|
|
|
if (NULL == matcher) {
|
|
cli_errmsg("add_newsuffix: matcher must be initialized\n");
|
|
ret = CL_ENULLARG;
|
|
goto done;
|
|
}
|
|
|
|
root = &matcher->suffixes;
|
|
if (NULL == root) {
|
|
cli_errmsg("add_newsuffix: root must be initialized\n");
|
|
ret = CL_ENULLARG;
|
|
goto done;
|
|
}
|
|
|
|
if (NULL == suffix) {
|
|
cli_errmsg("add_newsuffix: suffix must be initialized\n");
|
|
ret = CL_ENULLARG;
|
|
goto done;
|
|
}
|
|
|
|
new = MPOOL_CALLOC(matcher->mempool, 1, sizeof(*new));
|
|
if (!new) {
|
|
cli_errmsg("add_newsuffix: Unable to allocate memory for new\n");
|
|
ret = CL_EMEM;
|
|
goto done;
|
|
}
|
|
|
|
new->rtype = 0;
|
|
new->type = 0;
|
|
new->sigid = 0;
|
|
new->parts = 0;
|
|
new->partno = 0;
|
|
new->mindist = 0;
|
|
new->maxdist = 0;
|
|
new->offset_min = CLI_OFF_ANY;
|
|
new->length[0] = (uint16_t)len;
|
|
|
|
new->ch[0] = new->ch[1] |= CLI_MATCH_IGNORE;
|
|
if (new->length[0] > root->maxpatlen)
|
|
root->maxpatlen = new->length[0];
|
|
|
|
new->pattern = MPOOL_MALLOC(matcher->mempool, sizeof(new->pattern[0]) * len);
|
|
if (!new->pattern) {
|
|
cli_errmsg("add_newsuffix: Unable to allocate memory for new->pattern\n");
|
|
ret = CL_EMEM;
|
|
goto done;
|
|
}
|
|
for (i = 0; i < len; i++) {
|
|
new->pattern[i] = suffix[i]; /*new->pattern is short int* */
|
|
}
|
|
|
|
new->customdata = info;
|
|
new->virname = NULL;
|
|
if ((ret = cli_ac_addpatt(root, new))) {
|
|
goto done;
|
|
}
|
|
|
|
if (filter_add_static(&matcher->filter, (const unsigned char *)suffix, len, "regex") < 0) {
|
|
cli_errmsg("add_newsuffix: Unable to add filter\n");
|
|
ret = CL_ERROR;
|
|
goto done;
|
|
}
|
|
|
|
done:
|
|
|
|
if (CL_SUCCESS != ret) {
|
|
if (NULL != new) {
|
|
if (NULL != new->pattern) {
|
|
MPOOL_FREE(matcher->mempool, new->pattern);
|
|
}
|
|
MPOOL_FREE(matcher->mempool, new);
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
#define MODULE "regex_list: "
|
|
/* ------ load a regex, determine suffix, determine suffix2regexlist map ---- */
|
|
|
|
static void list_add_tail(struct regex_list_ht *ht, struct regex_list *regex)
|
|
{
|
|
if (!ht->head)
|
|
ht->head = regex;
|
|
if (ht->tail) {
|
|
ht->tail->nxt = regex;
|
|
}
|
|
ht->tail = regex;
|
|
}
|
|
|
|
static cl_error_t add_pattern_suffix(void *cbdata, const char *suffix, size_t suffix_len, const struct regex_list *iregex)
|
|
{
|
|
struct regex_matcher *matcher = cbdata;
|
|
struct regex_list *regex = NULL;
|
|
const struct cli_element *el = NULL;
|
|
cl_error_t ret = CL_SUCCESS;
|
|
|
|
if (NULL == matcher) {
|
|
cli_errmsg("add_pattern_suffix: matcher must be initialized\n");
|
|
ret = CL_ENULLARG;
|
|
goto done;
|
|
}
|
|
if (NULL == suffix) {
|
|
cli_errmsg("add_pattern_suffix: suffix must be initialized\n");
|
|
ret = CL_ENULLARG;
|
|
goto done;
|
|
}
|
|
if (NULL == iregex) {
|
|
cli_errmsg("add_pattern_suffix: iregex must be initialized\n");
|
|
ret = CL_ENULLARG;
|
|
goto done;
|
|
}
|
|
|
|
CLI_MALLOC_OR_GOTO_DONE(regex, sizeof(*regex),
|
|
cli_errmsg("add_pattern_suffix: Unable to allocate memory for regex\n");
|
|
ret = CL_EMEM);
|
|
|
|
if (NULL == iregex->pattern) {
|
|
regex->pattern = NULL;
|
|
} else {
|
|
CLI_SAFER_STRDUP_OR_GOTO_DONE(iregex->pattern, regex->pattern,
|
|
cli_errmsg("add_pattern_suffix: unable to strdup iregex->pattern");
|
|
ret = CL_EMEM);
|
|
}
|
|
regex->preg = iregex->preg;
|
|
regex->nxt = NULL;
|
|
el = cli_hashtab_find(&matcher->suffix_hash, suffix, suffix_len);
|
|
/* TODO: what if suffixes are prefixes of each other and only one will
|
|
* match? */
|
|
if (el) {
|
|
/* existing suffix */
|
|
if ((size_t)el->data >= matcher->suffix_cnt) {
|
|
cli_errmsg("add_pattern_suffix: el-> data too large");
|
|
ret = CL_ERROR;
|
|
goto done;
|
|
}
|
|
list_add_tail(&matcher->suffix_regexes[(size_t)el->data], regex);
|
|
} else {
|
|
/* new suffix */
|
|
size_t n = matcher->suffix_cnt;
|
|
el = cli_hashtab_insert(&matcher->suffix_hash, suffix, suffix_len, (cli_element_data)n);
|
|
CLI_MAX_REALLOC_OR_GOTO_DONE(matcher->suffix_regexes,
|
|
(n + 1) * sizeof(*matcher->suffix_regexes),
|
|
cli_errmsg("add_pattern_suffix: Unable to reallocate memory for matcher->suffix_regexes\n");
|
|
ret = CL_EMEM);
|
|
matcher->suffix_regexes[n].tail = regex;
|
|
matcher->suffix_regexes[n].head = regex;
|
|
if (suffix[0] == '/' && suffix[1] == '\0') {
|
|
matcher->root_regex_idx = n;
|
|
}
|
|
|
|
ret = add_newsuffix(matcher, regex, suffix, suffix_len);
|
|
|
|
if (CL_SUCCESS != ret) {
|
|
cli_hashtab_delete(&matcher->suffix_hash, suffix, suffix_len);
|
|
/*shrink the size back to what it was.*/
|
|
CLI_MAX_REALLOC_OR_GOTO_DONE(matcher->suffix_regexes, n * sizeof(*matcher->suffix_regexes));
|
|
} else {
|
|
matcher->suffix_cnt++;
|
|
}
|
|
}
|
|
|
|
done:
|
|
if (CL_SUCCESS != ret) {
|
|
CLI_FREE_AND_SET_NULL(regex->pattern);
|
|
CLI_FREE_AND_SET_NULL(regex);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static size_t reverse_string(char *pattern)
|
|
{
|
|
size_t len = strlen(pattern);
|
|
size_t i;
|
|
for (i = 0; i < (len / 2); i++) {
|
|
char aux = pattern[i];
|
|
pattern[i] = pattern[len - i - 1];
|
|
pattern[len - i - 1] = aux;
|
|
}
|
|
return len;
|
|
}
|
|
|
|
static regex_t *new_preg(struct regex_matcher *matcher)
|
|
{
|
|
regex_t *r;
|
|
matcher->all_pregs = MPOOL_REALLOC(matcher->mempool, matcher->all_pregs, ++matcher->regex_cnt * sizeof(*matcher->all_pregs));
|
|
if (!matcher->all_pregs) {
|
|
cli_errmsg("new_preg: Unable to reallocate memory\n");
|
|
return NULL;
|
|
}
|
|
r = MPOOL_MALLOC(matcher->mempool, sizeof(*r));
|
|
if (!r) {
|
|
cli_errmsg("new_preg: Unable to allocate memory\n");
|
|
return NULL;
|
|
}
|
|
matcher->all_pregs[matcher->regex_cnt - 1] = r;
|
|
return r;
|
|
}
|
|
|
|
static cl_error_t add_static_pattern(struct regex_matcher *matcher, char *pattern)
|
|
{
|
|
size_t len;
|
|
struct regex_list regex;
|
|
cl_error_t rc = CL_EMEM;
|
|
|
|
len = reverse_string(pattern);
|
|
regex.nxt = NULL;
|
|
CLI_SAFER_STRDUP_OR_GOTO_DONE(pattern, regex.pattern,
|
|
cli_errmsg("add_static_pattern: Cannot allocate memory for regex.pattern\n");
|
|
rc = CL_EMEM);
|
|
regex.preg = NULL;
|
|
rc = add_pattern_suffix(matcher, pattern, len, ®ex);
|
|
done:
|
|
CLI_FREE_AND_SET_NULL(regex.pattern);
|
|
return rc;
|
|
}
|
|
|
|
cl_error_t regex_list_add_pattern(struct regex_matcher *matcher, char *pattern)
|
|
{
|
|
cl_error_t rc;
|
|
regex_t *preg;
|
|
size_t len;
|
|
/* we only match the host, so remove useless stuff */
|
|
const char remove_end[] = "([/?].*)?/";
|
|
const char remove_end2[] = "([/?].*)/";
|
|
|
|
len = strlen(pattern);
|
|
if (len > sizeof(remove_end)) {
|
|
if (strncmp(&pattern[len - sizeof(remove_end) + 1], remove_end, sizeof(remove_end) - 1) == 0) {
|
|
len -= sizeof(remove_end) - 1;
|
|
pattern[len++] = '/';
|
|
}
|
|
}
|
|
if (len > sizeof(remove_end2)) {
|
|
if (strncmp(&pattern[len - sizeof(remove_end2) + 1], remove_end2, sizeof(remove_end2) - 1) == 0) {
|
|
len -= sizeof(remove_end2) - 1;
|
|
pattern[len++] = '/';
|
|
}
|
|
}
|
|
pattern[len] = '\0';
|
|
|
|
preg = new_preg(matcher);
|
|
if (!preg)
|
|
return CL_EMEM;
|
|
|
|
rc = cli_regex2suffix(pattern, preg, add_pattern_suffix, (void *)matcher);
|
|
if (rc) {
|
|
cli_regfree(preg);
|
|
}
|
|
|
|
return rc;
|
|
}
|