2006-09-12 19:38:39 +00:00
|
|
|
/*
|
|
|
|
* Match a string against a list of patterns/regexes.
|
|
|
|
*
|
2022-01-06 16:53:44 -08:00
|
|
|
* Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
|
2019-01-25 10:15:50 -05:00
|
|
|
* Copyright (C) 2007-2013 Sourcefire, Inc.
|
2008-04-02 15:24:51 +00:00
|
|
|
*
|
|
|
|
* Authors: Török Edvin
|
2006-09-12 19:38:39 +00:00
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
2008-04-02 15:24:51 +00:00
|
|
|
* it under the terms of the GNU General Public License version 2 as
|
2007-06-30 11:50:56 +00:00
|
|
|
* published by the Free Software Foundation.
|
2006-09-12 19:38:39 +00:00
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
|
|
* MA 02110-1301, USA.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#if HAVE_CONFIG_H
|
|
|
|
#include "clamav-config.h"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef CL_THREAD_SAFE
|
|
|
|
#ifndef _REENTRANT
|
|
|
|
#define _REENTRANT
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <ctype.h>
|
2007-12-18 19:23:56 +00:00
|
|
|
#include <zlib.h>
|
2006-09-12 19:38:39 +00:00
|
|
|
|
|
|
|
#include <limits.h>
|
|
|
|
#include <sys/types.h>
|
2008-07-23 13:51:57 +00:00
|
|
|
#include <assert.h>
|
|
|
|
|
2007-09-17 18:54:56 +00:00
|
|
|
#include "regex/regex.h"
|
2006-09-12 19:38:39 +00:00
|
|
|
|
|
|
|
#include "clamav.h"
|
|
|
|
#include "others.h"
|
|
|
|
#include "regex_list.h"
|
|
|
|
#include "matcher-ac.h"
|
2008-02-06 20:48:34 +00:00
|
|
|
#include "matcher.h"
|
2007-03-11 11:14:35 +00:00
|
|
|
#include "str.h"
|
2007-12-18 19:23:56 +00:00
|
|
|
#include "readdb.h"
|
2008-07-23 13:51:57 +00:00
|
|
|
#include "jsparse/textbuf.h"
|
2008-07-24 18:48:31 +00:00
|
|
|
#include "regex_suffix.h"
|
2008-12-29 17:55:30 +00:00
|
|
|
#include "default.h"
|
2009-07-17 12:49:04 +03:00
|
|
|
#include "hashtab.h"
|
2008-10-18 00:16:23 +00:00
|
|
|
|
|
|
|
#include "mpool.h"
|
|
|
|
|
2008-07-23 13:51:57 +00:00
|
|
|
/* Prototypes */
|
2008-07-29 10:36:26 +00:00
|
|
|
static regex_t *new_preg(struct regex_matcher *matcher);
|
2008-07-23 13:51:57 +00:00
|
|
|
static size_t reverse_string(char *pattern);
|
2019-02-27 00:47:38 -05:00
|
|
|
static cl_error_t add_pattern_suffix(void *cbdata, const char *suffix, size_t suffix_len, const struct regex_list *regex);
|
|
|
|
static cl_error_t add_static_pattern(struct regex_matcher *matcher, char *pattern);
|
2008-07-23 13:51:57 +00:00
|
|
|
/* ---------- */
|
2006-09-12 19:38:39 +00:00
|
|
|
|
2008-10-07 20:04:50 +00:00
|
|
|
#define MATCH_SUCCESS 0
|
2018-12-03 12:40:13 -05:00
|
|
|
#define MATCH_FAILED -1
|
2006-09-12 19:38:39 +00:00
|
|
|
|
|
|
|
/*
|
2016-10-19 15:57:45 -04:00
|
|
|
* Call this function when an unrecoverable error has occurred, (instead of exit).
|
2006-09-12 19:38:39 +00:00
|
|
|
*/
|
2018-12-03 12:40:13 -05:00
|
|
|
static void fatal_error(struct regex_matcher *matcher)
|
2006-09-12 19:38:39 +00:00
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
regex_list_done(matcher);
|
2021-05-27 13:15:52 -07:00
|
|
|
matcher->list_inited = -1; /* the phishing module will know we tried to load an allow list, and failed, so it will disable itself too*/
|
2006-09-12 19:38:39 +00:00
|
|
|
}
|
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
static inline char get_char_at_pos_with_skip(const struct pre_fixup_info *info, const char *buffer, size_t pos)
|
2007-09-27 21:27:37 +00:00
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
const char *str;
|
|
|
|
size_t realpos = 0;
|
|
|
|
if (!info) {
|
|
|
|
return (pos <= strlen(buffer)) ? buffer[pos > 0 ? pos - 1 : 0] : '\0';
|
|
|
|
}
|
|
|
|
str = info->pre_displayLink.data;
|
|
|
|
cli_dbgmsg("calc_pos_with_skip: skip:%llu, %llu - %llu \"%s\",\"%s\"\n", (long long unsigned)pos, (long long unsigned)info->host_start,
|
2016-02-22 13:26:15 -05:00
|
|
|
(long long unsigned)info->host_end, str, buffer);
|
2018-12-03 12:40:13 -05:00
|
|
|
pos += info->host_start;
|
|
|
|
while (str[realpos] && !isalnum(str[realpos])) realpos++;
|
|
|
|
for (; str[realpos] && (pos > 0); pos--) {
|
|
|
|
while (str[realpos] == ' ') realpos++;
|
|
|
|
realpos++;
|
|
|
|
}
|
|
|
|
while (str[realpos] == ' ') realpos++;
|
|
|
|
cli_dbgmsg("calc_pos_with_skip:%s\n", str + realpos);
|
|
|
|
return (pos > 0 && !str[realpos]) ? '\0' : str[realpos > 0 ? realpos - 1 : 0];
|
2007-09-27 21:27:37 +00:00
|
|
|
}
|
|
|
|
|
2008-07-23 13:51:57 +00:00
|
|
|
static int validate_subdomain(const struct regex_list *regex, const struct pre_fixup_info *pre_fixup, const char *buffer, size_t buffer_len, char *real_url, size_t real_len, char *orig_real_url)
|
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
char c;
|
|
|
|
size_t match_len;
|
|
|
|
|
|
|
|
if (!regex || !regex->pattern)
|
|
|
|
return 0;
|
|
|
|
match_len = strlen(regex->pattern);
|
|
|
|
if (((c = get_char_at_pos_with_skip(pre_fixup, buffer, buffer_len + 1)) == ' ' || c == '\0' || c == '/' || c == '?') &&
|
|
|
|
(match_len == buffer_len || /* full match */
|
|
|
|
(match_len < buffer_len &&
|
|
|
|
((c = get_char_at_pos_with_skip(pre_fixup, buffer, buffer_len - match_len)) == '.' || (c == ' ')))
|
|
|
|
/* subdomain matched*/)) {
|
|
|
|
/* we have an extra / at the end */
|
|
|
|
if (match_len > 0) match_len--;
|
|
|
|
cli_dbgmsg("Got a match: %s with %s\n", buffer, regex->pattern);
|
|
|
|
cli_dbgmsg("Before inserting .: %s\n", orig_real_url);
|
|
|
|
if (real_len >= match_len + 1) {
|
|
|
|
const size_t pos = real_len - match_len - 1;
|
|
|
|
if (real_url[pos] != '.') {
|
|
|
|
/* we need to shift left, and insert a '.'
|
2022-02-16 00:13:55 +01:00
|
|
|
* we have an extra '.' at the beginning inserted by get_host to have room,
|
|
|
|
* orig_real_url has to be used here,
|
|
|
|
* because we want to overwrite that extra '.' */
|
2018-12-03 12:40:13 -05:00
|
|
|
size_t orig_real_len = strlen(orig_real_url);
|
|
|
|
cli_dbgmsg("No dot here:%s\n", real_url + pos);
|
|
|
|
real_url = orig_real_url;
|
|
|
|
memmove(real_url, real_url + 1, orig_real_len - match_len - 1);
|
|
|
|
real_url[orig_real_len - match_len - 1] = '.';
|
|
|
|
cli_dbgmsg("After inserting .: %s\n", real_url);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
cli_dbgmsg("Ignoring false match: %s with %s, mismatched character: %c\n", buffer, regex->pattern, c);
|
|
|
|
return 0;
|
2008-07-23 13:51:57 +00:00
|
|
|
}
|
|
|
|
|
2006-09-12 19:38:39 +00:00
|
|
|
/*
|
|
|
|
* @matcher - matcher structure to use
|
|
|
|
* @real_url - href target
|
|
|
|
* @display_url - <a> tag contents
|
|
|
|
* @hostOnly - if you want to match only the host part
|
2021-05-27 13:15:52 -07:00
|
|
|
* @is_allow_list_lookup - is this a lookup in an allow list?
|
2006-09-12 19:38:39 +00:00
|
|
|
*
|
|
|
|
* @return - CL_SUCCESS - url doesn't match
|
|
|
|
* - CL_VIRUS - url matches list
|
|
|
|
*
|
|
|
|
* Do not send NULL pointers to this function!!
|
|
|
|
*
|
|
|
|
*/
|
2021-05-27 13:15:52 -07:00
|
|
|
cl_error_t regex_list_match(struct regex_matcher *matcher, char *real_url, const char *display_url, const struct pre_fixup_info *pre_fixup, int hostOnly, const char **info, int is_allow_list_lookup)
|
2006-09-12 19:38:39 +00:00
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
char *orig_real_url = real_url;
|
|
|
|
struct regex_list *regex;
|
|
|
|
size_t real_len, display_len, buffer_len;
|
|
|
|
|
2020-07-24 08:32:47 -07:00
|
|
|
char *buffer = NULL;
|
|
|
|
char *bufrev = NULL;
|
|
|
|
cl_error_t rc = CL_SUCCESS;
|
2022-02-16 00:13:55 +01:00
|
|
|
// int filter_search_rc = 0;
|
2019-02-27 00:47:38 -05:00
|
|
|
int root;
|
|
|
|
struct cli_ac_data mdata;
|
|
|
|
struct cli_ac_result *res = NULL;
|
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
assert(matcher);
|
|
|
|
assert(real_url);
|
|
|
|
assert(display_url);
|
|
|
|
*info = NULL;
|
|
|
|
if (!matcher->list_inited)
|
2019-02-27 00:47:38 -05:00
|
|
|
return CL_SUCCESS;
|
2018-12-03 12:40:13 -05:00
|
|
|
assert(matcher->list_built);
|
|
|
|
/* skip initial '.' inserted by get_host */
|
|
|
|
if (real_url[0] == '.') real_url++;
|
|
|
|
if (display_url[0] == '.') display_url++;
|
|
|
|
real_len = strlen(real_url);
|
|
|
|
display_len = strlen(display_url);
|
2021-05-27 13:15:52 -07:00
|
|
|
buffer_len = (hostOnly && !is_allow_list_lookup) ? real_len + 1 : real_len + display_len + 1 + 1;
|
2018-12-03 12:40:13 -05:00
|
|
|
if (buffer_len < 3) {
|
|
|
|
/* too short, no match possible */
|
2019-02-27 00:47:38 -05:00
|
|
|
return CL_SUCCESS;
|
|
|
|
}
|
|
|
|
buffer = cli_malloc(buffer_len + 1);
|
|
|
|
if (!buffer) {
|
|
|
|
cli_errmsg("regex_list_match: Unable to allocate memory for buffer\n");
|
|
|
|
return CL_EMEM;
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
|
|
|
|
2019-02-27 00:47:38 -05:00
|
|
|
strncpy(buffer, real_url, real_len);
|
2021-05-27 13:15:52 -07:00
|
|
|
buffer[real_len] = (!is_allow_list_lookup && hostOnly) ? '/' : ':';
|
2006-09-12 19:38:39 +00:00
|
|
|
|
2019-02-27 00:47:38 -05:00
|
|
|
/*
|
|
|
|
* For H-type PDB signatures, real_url is actually the DisplayedHostname.
|
|
|
|
* RealHostname is not used.
|
|
|
|
*/
|
2021-05-27 13:15:52 -07:00
|
|
|
if (!hostOnly || is_allow_list_lookup) {
|
2019-02-27 00:47:38 -05:00
|
|
|
/* For all other PDB and WDB signatures concatenate Real:Displayed. */
|
|
|
|
strncpy(buffer + real_len + 1, display_url, display_len);
|
|
|
|
}
|
|
|
|
buffer[buffer_len - 1] = '/';
|
|
|
|
buffer[buffer_len] = 0;
|
|
|
|
cli_dbgmsg("Looking up in regex_list: %s\n", buffer);
|
2019-06-30 13:37:27 -07:00
|
|
|
|
2019-02-27 00:47:38 -05:00
|
|
|
if (CL_SUCCESS != (rc = cli_ac_initdata(&mdata, 0, 0, 0, CLI_DEFAULT_AC_TRACKLEN)))
|
|
|
|
return rc;
|
2018-12-03 12:40:13 -05:00
|
|
|
|
2019-02-27 00:47:38 -05:00
|
|
|
bufrev = cli_strdup(buffer);
|
|
|
|
if (!bufrev)
|
|
|
|
return CL_EMEM;
|
2018-12-03 12:40:13 -05:00
|
|
|
|
2019-02-27 00:47:38 -05:00
|
|
|
reverse_string(bufrev);
|
Comment out the filter_search call in regex_list_match
Reviewing Coverity bug reports we found that the return value to this
filter_search call was effectively being ignored, causing no filtering
to occur. Fixing this issue resulted in a unit test that uses the
following match list regex to fail when searching for `ebay.com`.:
.+\\.paypal\\.(com|de|fr|it)([/?].*)?:.+\\.ebay\\.(at|be|ca|ch|co\\.uk|de|es|fr|ie|in|it|nl|ph|pl|com(\\.(au|cn|hk|my|sg))?)/
After investigating further, this is because the regex_list_add_pattern
call, which parses the regex for suffixes and attempts to add these to
the filter, can't handle the `com(\\.(au|cn|hk|my|sg))?` portion of
the regex. As a result, it only adds `ebay.at`, `ebay.be`, `ebay.ca`, up
through `ebay.pl` into the filter). With the code returning if no filter match
is found, the `ebay.com` suffix not existing in the filter causes incoming URLs
to be treated as if there are no corresponding regexes for ebay.com, which results
in no regex rules being evaluated against it.
We should get the regex parsing code working (and ensure it handles any
other complex cases in daily.cdb) before re-enabling this code. The code
has had no effect for 12+ years at this point, though, so it's probably
safe to wait a bit longer without it.
2020-06-16 12:26:15 -04:00
|
|
|
// TODO Add this back in once we improve the regex parsing code that finds
|
|
|
|
// suffixes to add to the filter.
|
|
|
|
//
|
|
|
|
// Reviewing Coverity bug reports we found that the return value to this
|
|
|
|
// filter_search call was effectively being ignored, causing no filtering
|
|
|
|
// to occur. Fixing this issue resulted in a unit test that uses the
|
|
|
|
// following match list regex to fail when searching for `ebay.com`.:
|
|
|
|
//
|
|
|
|
// .+\\.paypal\\.(com|de|fr|it)([/?].*)?:.+\\.ebay\\.(at|be|ca|ch|co\\.uk|de|es|fr|ie|in|it|nl|ph|pl|com(\\.(au|cn|hk|my|sg))?)/
|
|
|
|
//
|
|
|
|
// After investigating further, this is because the regex_list_add_pattern
|
|
|
|
// call, which parses the regex for suffixes and attempts to add these to
|
|
|
|
// the filter, can't handle the `com(\\.(au|cn|hk|my|sg))?` portion of
|
|
|
|
// the regex. As a result, it only adds `ebay.at`, `ebay.be`, `ebay.ca`, up
|
|
|
|
// through `ebay.pl` into the filter). With the commented out code below
|
|
|
|
// uncommented, these suffixes not existing in the filter are treated as
|
|
|
|
// there not being a corresponding regex for ebay.com, causing no regex
|
|
|
|
// rules to be evaluated against the URL.
|
|
|
|
//
|
|
|
|
// We should get the regex parsing code working (and ensure it handles any
|
|
|
|
// other complex cases in daily.cdb) before re-enabling this code. The code
|
|
|
|
// has had no effect for 12+ years at this point, though, so it's probably
|
|
|
|
// safe to wait a bit longer without it.
|
|
|
|
//
|
2022-02-16 00:13:55 +01:00
|
|
|
// filter_search_rc = filter_search(&matcher->filter, (const unsigned char *)bufrev, buffer_len);
|
|
|
|
// if (filter_search_rc == -1) {
|
Comment out the filter_search call in regex_list_match
Reviewing Coverity bug reports we found that the return value to this
filter_search call was effectively being ignored, causing no filtering
to occur. Fixing this issue resulted in a unit test that uses the
following match list regex to fail when searching for `ebay.com`.:
.+\\.paypal\\.(com|de|fr|it)([/?].*)?:.+\\.ebay\\.(at|be|ca|ch|co\\.uk|de|es|fr|ie|in|it|nl|ph|pl|com(\\.(au|cn|hk|my|sg))?)/
After investigating further, this is because the regex_list_add_pattern
call, which parses the regex for suffixes and attempts to add these to
the filter, can't handle the `com(\\.(au|cn|hk|my|sg))?` portion of
the regex. As a result, it only adds `ebay.at`, `ebay.be`, `ebay.ca`, up
through `ebay.pl` into the filter). With the code returning if no filter match
is found, the `ebay.com` suffix not existing in the filter causes incoming URLs
to be treated as if there are no corresponding regexes for ebay.com, which results
in no regex rules being evaluated against it.
We should get the regex parsing code working (and ensure it handles any
other complex cases in daily.cdb) before re-enabling this code. The code
has had no effect for 12+ years at this point, though, so it's probably
safe to wait a bit longer without it.
2020-06-16 12:26:15 -04:00
|
|
|
// free(buffer);
|
|
|
|
// free(bufrev);
|
|
|
|
// /* filter says this suffix doesn't match.
|
|
|
|
// * The filter has false positives, but no false
|
|
|
|
// * negatives */
|
|
|
|
// return CL_SUCCESS;
|
|
|
|
//}
|
2019-02-27 00:47:38 -05:00
|
|
|
|
|
|
|
rc = cli_ac_scanbuff((const unsigned char *)bufrev, buffer_len, NULL, (void *)®ex, &res, &matcher->suffixes, &mdata, 0, 0, NULL, AC_SCAN_VIR, NULL);
|
|
|
|
free(bufrev);
|
|
|
|
cli_ac_freedata(&mdata);
|
|
|
|
|
|
|
|
rc = CL_SUCCESS;
|
|
|
|
root = matcher->root_regex_idx;
|
|
|
|
while (res || root) {
|
|
|
|
struct cli_ac_result *q;
|
|
|
|
if (!res) {
|
|
|
|
regex = matcher->suffix_regexes[root].head;
|
|
|
|
root = 0;
|
|
|
|
} else {
|
|
|
|
regex = res->customdata;
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
2019-02-27 00:47:38 -05:00
|
|
|
while (!rc && regex) {
|
|
|
|
/* loop over multiple regexes corresponding to
|
2022-02-16 00:13:55 +01:00
|
|
|
* this suffix */
|
2019-02-27 00:47:38 -05:00
|
|
|
if (!regex->preg) {
|
|
|
|
/* we matched a static pattern */
|
|
|
|
rc = validate_subdomain(regex, pre_fixup, buffer, buffer_len, real_url, real_len, orig_real_url);
|
|
|
|
} else {
|
|
|
|
rc = !cli_regexec(regex->preg, buffer, 0, NULL, 0);
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
2019-02-27 00:47:38 -05:00
|
|
|
if (rc) *info = regex->pattern;
|
|
|
|
regex = regex->nxt;
|
|
|
|
}
|
|
|
|
if (res) {
|
|
|
|
q = res;
|
|
|
|
res = res->next;
|
|
|
|
free(q);
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
|
|
|
}
|
2019-02-27 00:47:38 -05:00
|
|
|
free(buffer);
|
|
|
|
if (!rc)
|
|
|
|
cli_dbgmsg("Lookup result: not in regex list\n");
|
|
|
|
else
|
|
|
|
cli_dbgmsg("Lookup result: in regex list\n");
|
|
|
|
return rc;
|
2006-09-12 19:38:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Initialization & loading */
|
2016-10-19 15:57:45 -04:00
|
|
|
/* Initializes @matcher, allocating necessary substructures */
|
2019-02-27 00:47:38 -05:00
|
|
|
cl_error_t init_regex_list(struct regex_matcher *matcher, uint8_t dconf_prefiltering)
|
2006-09-12 19:38:39 +00:00
|
|
|
{
|
2008-10-19 17:57:09 +00:00
|
|
|
#ifdef USE_MPOOL
|
2018-12-03 12:40:13 -05:00
|
|
|
mpool_t *mp = matcher->mempool;
|
2008-10-19 17:57:09 +00:00
|
|
|
#endif
|
2019-02-27 00:47:38 -05:00
|
|
|
cl_error_t rc;
|
2006-10-10 23:51:49 +00:00
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
assert(matcher);
|
|
|
|
memset(matcher, 0, sizeof(*matcher));
|
2006-09-12 19:38:39 +00:00
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
matcher->list_inited = 1;
|
|
|
|
matcher->list_built = 0;
|
|
|
|
matcher->list_loaded = 0;
|
|
|
|
cli_hashtab_init(&matcher->suffix_hash, 512);
|
2008-10-19 17:57:09 +00:00
|
|
|
#ifdef USE_MPOOL
|
2018-12-03 12:40:13 -05:00
|
|
|
matcher->mempool = mp;
|
|
|
|
matcher->suffixes.mempool = mp;
|
|
|
|
assert(mp && "mempool must be initialized");
|
2008-10-19 17:57:09 +00:00
|
|
|
#endif
|
2018-12-03 12:40:13 -05:00
|
|
|
if ((rc = cli_ac_init(&matcher->suffixes, 2, 32, dconf_prefiltering))) {
|
|
|
|
return rc;
|
|
|
|
}
|
2008-10-19 17:57:09 +00:00
|
|
|
#ifdef USE_MPOOL
|
2018-12-03 12:40:13 -05:00
|
|
|
matcher->sha256_hashes.mempool = mp;
|
|
|
|
matcher->hostkey_prefix.mempool = mp;
|
2008-10-19 17:57:09 +00:00
|
|
|
#endif
|
2018-12-03 12:40:13 -05:00
|
|
|
if ((rc = cli_bm_init(&matcher->sha256_hashes))) {
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
if ((rc = cli_bm_init(&matcher->hostkey_prefix))) {
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
filter_init(&matcher->filter);
|
|
|
|
return CL_SUCCESS;
|
2006-09-12 19:38:39 +00:00
|
|
|
}
|
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
static int functionality_level_check(char *line)
|
2006-12-02 00:42:44 +00:00
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
char *ptmin;
|
|
|
|
char *ptmax;
|
|
|
|
size_t j;
|
|
|
|
|
|
|
|
ptmin = strrchr(line, ':');
|
|
|
|
if (!ptmin)
|
|
|
|
return CL_SUCCESS;
|
|
|
|
|
|
|
|
ptmin++;
|
|
|
|
|
|
|
|
ptmax = strchr(ptmin, '-');
|
|
|
|
if (!ptmax)
|
|
|
|
return CL_SUCCESS; /* there is no functionality level specified, so we're ok */
|
|
|
|
else {
|
|
|
|
size_t min, max;
|
|
|
|
ptmax++;
|
|
|
|
for (j = 0; j + ptmin + 1 < ptmax; j++)
|
|
|
|
if (!isdigit(ptmin[j]))
|
|
|
|
return CL_SUCCESS; /* not numbers, not functionality level */
|
|
|
|
for (j = 0; j < strlen(ptmax); j++)
|
|
|
|
if (!isdigit(ptmax[j]))
|
|
|
|
return CL_SUCCESS; /* see above */
|
|
|
|
ptmax[-1] = '\0';
|
|
|
|
min = atoi(ptmin);
|
|
|
|
if (strlen(ptmax) == 0)
|
|
|
|
max = INT_MAX;
|
|
|
|
else
|
|
|
|
max = atoi(ptmax);
|
|
|
|
|
|
|
|
if (min > cl_retflevel()) {
|
|
|
|
cli_dbgmsg("regex list line %s not loaded (required f-level: %u)\n", line, (unsigned int)min);
|
|
|
|
return CL_EMALFDB;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (max < cl_retflevel())
|
|
|
|
return CL_EMALFDB;
|
|
|
|
ptmin[-1] = '\0';
|
|
|
|
return CL_SUCCESS;
|
|
|
|
}
|
2008-08-01 14:49:55 +00:00
|
|
|
}
|
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
static int add_hash(struct regex_matcher *matcher, char *pattern, const char fl, int is_prefix)
|
2008-08-01 14:49:55 +00:00
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
int rc;
|
2019-05-03 18:16:03 -04:00
|
|
|
struct cli_bm_patt *pat = MPOOL_CALLOC(matcher->mempool, 1, sizeof(*pat));
|
2018-12-03 12:40:13 -05:00
|
|
|
struct cli_matcher *bm;
|
|
|
|
const char *vname = NULL;
|
|
|
|
if (!pat)
|
|
|
|
return CL_EMEM;
|
2019-05-03 18:16:03 -04:00
|
|
|
pat->pattern = (unsigned char *)CLI_MPOOL_HEX2STR(matcher->mempool, pattern);
|
2018-12-03 12:40:13 -05:00
|
|
|
if (!pat->pattern)
|
|
|
|
return CL_EMALFDB;
|
|
|
|
pat->length = 32;
|
|
|
|
if (is_prefix) {
|
|
|
|
pat->length = 4;
|
|
|
|
bm = &matcher->hostkey_prefix;
|
|
|
|
} else {
|
|
|
|
bm = &matcher->sha256_hashes;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!matcher->sha256_pfx_set.keys) {
|
|
|
|
if ((rc = cli_hashset_init(&matcher->sha256_pfx_set, 1048576, 90))) {
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (fl != 'W' && pat->length == 32 &&
|
|
|
|
cli_hashset_contains(&matcher->sha256_pfx_set, cli_readint32(pat->pattern)) &&
|
|
|
|
cli_bm_scanbuff(pat->pattern, 32, &vname, NULL, &matcher->sha256_hashes, 0, NULL, NULL, NULL) == CL_VIRUS) {
|
|
|
|
if (*vname == 'W') {
|
2021-05-27 13:15:52 -07:00
|
|
|
/* hash is allowed in local.gdb */
|
2018-12-03 12:40:13 -05:00
|
|
|
cli_dbgmsg("Skipping hash %s\n", pattern);
|
2019-05-03 18:16:03 -04:00
|
|
|
MPOOL_FREE(matcher->mempool, pat->pattern);
|
|
|
|
MPOOL_FREE(matcher->mempool, pat);
|
2018-12-03 12:40:13 -05:00
|
|
|
return CL_SUCCESS;
|
|
|
|
}
|
|
|
|
}
|
2019-05-03 18:16:03 -04:00
|
|
|
pat->virname = MPOOL_MALLOC(matcher->mempool, 1);
|
2018-12-03 12:40:13 -05:00
|
|
|
if (!pat->virname) {
|
|
|
|
free(pat);
|
2013-03-01 13:51:15 -05:00
|
|
|
cli_errmsg("add_hash: Unable to allocate memory for path->virname\n");
|
2018-12-03 12:40:13 -05:00
|
|
|
return CL_EMEM;
|
|
|
|
}
|
|
|
|
*pat->virname = fl;
|
|
|
|
cli_hashset_addkey(&matcher->sha256_pfx_set, cli_readint32(pat->pattern));
|
|
|
|
if ((rc = cli_bm_addpatt(bm, pat, "*"))) {
|
|
|
|
cli_errmsg("add_hash: failed to add BM pattern\n");
|
|
|
|
free(pat->pattern);
|
|
|
|
free(pat->virname);
|
|
|
|
free(pat);
|
|
|
|
return CL_EMALFDB;
|
|
|
|
}
|
|
|
|
return CL_SUCCESS;
|
2006-12-02 00:42:44 +00:00
|
|
|
}
|
|
|
|
|
2006-09-12 19:38:39 +00:00
|
|
|
/* Load patterns/regexes from file */
|
2021-05-27 13:15:52 -07:00
|
|
|
cl_error_t load_regex_matcher(struct cl_engine *engine, struct regex_matcher *matcher, FILE *fd, unsigned int *signo, unsigned int options, int is_allow_list_lookup, struct cli_dbio *dbio, uint8_t dconf_prefiltering)
|
2006-09-12 19:38:39 +00:00
|
|
|
{
|
2019-02-27 00:47:38 -05:00
|
|
|
cl_error_t rc;
|
|
|
|
int line = 0, entry = 0;
|
2018-12-03 12:40:13 -05:00
|
|
|
char buffer[FILEBUFF];
|
|
|
|
|
|
|
|
assert(matcher);
|
|
|
|
|
|
|
|
if (matcher->list_inited == -1)
|
|
|
|
return CL_EMALFDB; /* already failed to load */
|
|
|
|
if (!fd && !dbio) {
|
|
|
|
cli_errmsg("Unable to load regex list (null file)\n");
|
|
|
|
return CL_ENULLARG;
|
|
|
|
}
|
|
|
|
|
|
|
|
cli_dbgmsg("Loading regex_list\n");
|
|
|
|
if (!matcher->list_inited) {
|
|
|
|
rc = init_regex_list(matcher, dconf_prefiltering);
|
|
|
|
if (!matcher->list_inited) {
|
|
|
|
cli_errmsg("Regex list failed to initialize!\n");
|
|
|
|
fatal_error(matcher);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/*
|
2022-02-16 00:13:55 +01:00
|
|
|
* Regexlist db format, common to .wdb (allow list) and .pdb (domain list) files.
|
2021-05-27 13:15:52 -07:00
|
|
|
*
|
2022-02-16 00:13:55 +01:00
|
|
|
* Multiple lines of form, (empty lines are skipped):
|
|
|
|
* Flags RealURL DisplayedURL
|
|
|
|
* Where:
|
|
|
|
* Flags:
|
|
|
|
*
|
|
|
|
* .pdb files:
|
|
|
|
* R - regex, H - host-only, followed by (optional) 3-digit hexnumber representing
|
|
|
|
* flags that should be filtered.
|
|
|
|
* [i.e. phishcheck urls.flags that we don't want to be done for this particular host]
|
|
|
|
*
|
|
|
|
* .wdb files:
|
|
|
|
* X - full URL regex
|
|
|
|
* Y - host-only regex
|
|
|
|
* M - host simple pattern
|
|
|
|
*
|
|
|
|
* If a line in the file doesn't conform to this format, loading fails
|
|
|
|
*
|
|
|
|
*/
|
2018-12-03 12:40:13 -05:00
|
|
|
while (cli_dbgets(buffer, FILEBUFF, fd, dbio)) {
|
|
|
|
char *pattern;
|
|
|
|
char *flags;
|
|
|
|
size_t pattern_len;
|
|
|
|
|
|
|
|
cli_chomp(buffer);
|
|
|
|
line++;
|
|
|
|
if (!*buffer)
|
|
|
|
continue; /* skip empty lines */
|
|
|
|
|
2020-07-15 08:39:32 -07:00
|
|
|
if (buffer[0] == '#')
|
|
|
|
continue;
|
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
if (functionality_level_check(buffer))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (engine->cb_sigload && engine->cb_sigload("phishing", buffer, ~options & CL_DB_OFFICIAL, engine->cb_sigload_ctx)) {
|
|
|
|
cli_dbgmsg("load_regex_matcher: skipping %s due to callback\n", buffer);
|
|
|
|
continue;
|
|
|
|
}
|
2006-09-12 19:38:39 +00:00
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
entry++;
|
|
|
|
pattern = strchr(buffer, ':');
|
|
|
|
if (!pattern) {
|
|
|
|
cli_errmsg("Malformed regex list line %d\n", line);
|
|
|
|
fatal_error(matcher);
|
|
|
|
return CL_EMALFDB;
|
|
|
|
}
|
|
|
|
/*pattern[0]='\0';*/
|
|
|
|
flags = buffer + 1;
|
|
|
|
pattern++;
|
|
|
|
|
|
|
|
pattern_len = strlen(pattern);
|
2022-02-22 20:46:03 -05:00
|
|
|
/* '-3' to leave room for the '/' and null being
|
|
|
|
* appended below.
|
|
|
|
*/
|
2022-04-01 16:39:57 -04:00
|
|
|
if ((pattern - buffer) + pattern_len < (FILEBUFF - 3)) {
|
2018-12-03 12:40:13 -05:00
|
|
|
pattern[pattern_len] = '/';
|
|
|
|
pattern[pattern_len + 1] = '\0';
|
|
|
|
} else {
|
|
|
|
cli_errmsg("Overlong regex line %d\n", line);
|
|
|
|
fatal_error(matcher);
|
|
|
|
return CL_EMALFDB;
|
|
|
|
}
|
|
|
|
|
2021-05-27 13:15:52 -07:00
|
|
|
if ((buffer[0] == 'R' && !is_allow_list_lookup) || ((buffer[0] == 'X' || buffer[0] == 'Y') && is_allow_list_lookup)) {
|
2018-12-03 12:40:13 -05:00
|
|
|
/* regex for hostname*/
|
|
|
|
if ((rc = regex_list_add_pattern(matcher, pattern)))
|
|
|
|
return rc == CL_EMEM ? CL_EMEM : CL_EMALFDB;
|
2021-05-27 13:15:52 -07:00
|
|
|
} else if ((buffer[0] == 'H' && !is_allow_list_lookup) || (buffer[0] == 'M' && is_allow_list_lookup)) {
|
2018-12-03 12:40:13 -05:00
|
|
|
/*matches displayed host*/
|
|
|
|
if ((rc = add_static_pattern(matcher, pattern)))
|
|
|
|
return rc == CL_EMEM ? CL_EMEM : CL_EMALFDB;
|
2021-05-27 13:15:52 -07:00
|
|
|
} else if (buffer[0] == 'S' && (!is_allow_list_lookup || pattern[0] == 'W')) {
|
2018-12-03 12:40:13 -05:00
|
|
|
pattern[pattern_len] = '\0';
|
|
|
|
if (pattern[0] == 'W')
|
|
|
|
flags[0] = 'W';
|
|
|
|
if ((pattern[0] == 'W' || pattern[0] == 'F' || pattern[0] == 'P') && pattern[1] == ':') {
|
|
|
|
pattern += 2;
|
|
|
|
if ((rc = add_hash(matcher, pattern, flags[0], pattern[-2] == 'P'))) {
|
|
|
|
cli_errmsg("Error loading at line: %d\n", line);
|
|
|
|
return rc == CL_EMEM ? CL_EMEM : CL_EMALFDB;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
cli_errmsg("Error loading line: %d, %c\n", line, *pattern);
|
|
|
|
return CL_EMALFDB;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
return CL_EMALFDB;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
matcher->list_loaded = 1;
|
|
|
|
if (signo)
|
|
|
|
*signo += entry;
|
|
|
|
|
|
|
|
return CL_SUCCESS;
|
|
|
|
}
|
2006-09-12 19:38:39 +00:00
|
|
|
|
|
|
|
/* Build the matcher list */
|
2019-02-27 00:47:38 -05:00
|
|
|
cl_error_t cli_build_regex_list(struct regex_matcher *matcher)
|
2006-09-12 19:38:39 +00:00
|
|
|
{
|
2019-02-27 00:47:38 -05:00
|
|
|
cl_error_t rc;
|
2018-12-03 12:40:13 -05:00
|
|
|
if (!matcher)
|
|
|
|
return CL_SUCCESS;
|
|
|
|
if (!matcher->list_inited || !matcher->list_loaded) {
|
|
|
|
cli_errmsg("Regex list not loaded!\n");
|
|
|
|
return -1; /*TODO: better error code */
|
|
|
|
}
|
|
|
|
cli_dbgmsg("Building regex list\n");
|
|
|
|
cli_hashtab_free(&matcher->suffix_hash);
|
|
|
|
if ((rc = cli_ac_buildtrie(&matcher->suffixes)))
|
|
|
|
return rc;
|
|
|
|
matcher->list_built = 1;
|
|
|
|
cli_hashset_destroy(&matcher->sha256_pfx_set);
|
|
|
|
|
|
|
|
return CL_SUCCESS;
|
2006-09-12 19:38:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Done with this matcher, free resources */
|
2018-12-03 12:40:13 -05:00
|
|
|
void regex_list_done(struct regex_matcher *matcher)
|
2006-09-12 19:38:39 +00:00
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
assert(matcher);
|
|
|
|
|
|
|
|
if (matcher->list_inited == 1) {
|
|
|
|
size_t i;
|
|
|
|
cli_ac_free(&matcher->suffixes);
|
|
|
|
if (matcher->suffix_regexes) {
|
|
|
|
for (i = 0; i < matcher->suffix_cnt; i++) {
|
|
|
|
struct regex_list *r = matcher->suffix_regexes[i].head;
|
|
|
|
while (r) {
|
|
|
|
struct regex_list *q = r;
|
|
|
|
r = r->nxt;
|
|
|
|
free(q->pattern);
|
|
|
|
free(q);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
free(matcher->suffix_regexes);
|
|
|
|
matcher->suffix_regexes = NULL;
|
|
|
|
}
|
|
|
|
if (matcher->all_pregs) {
|
|
|
|
for (i = 0; i < matcher->regex_cnt; i++) {
|
|
|
|
regex_t *r = matcher->all_pregs[i];
|
|
|
|
cli_regfree(r);
|
2019-05-03 18:16:03 -04:00
|
|
|
MPOOL_FREE(matcher->mempool, r);
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
2019-05-03 18:16:03 -04:00
|
|
|
MPOOL_FREE(matcher->mempool, matcher->all_pregs);
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
|
|
|
cli_hashtab_free(&matcher->suffix_hash);
|
|
|
|
cli_bm_free(&matcher->sha256_hashes);
|
|
|
|
cli_bm_free(&matcher->hostkey_prefix);
|
|
|
|
}
|
2006-09-12 19:38:39 +00:00
|
|
|
}
|
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
int is_regex_ok(struct regex_matcher *matcher)
|
2006-09-12 19:38:39 +00:00
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
assert(matcher);
|
|
|
|
return (!matcher->list_inited || matcher->list_inited != -1); /* either we don't have a regexlist, or we initialized it successfully */
|
2006-09-12 19:38:39 +00:00
|
|
|
}
|
|
|
|
|
2008-07-24 18:48:31 +00:00
|
|
|
static int add_newsuffix(struct regex_matcher *matcher, struct regex_list *info, const char *suffix, size_t len)
|
2006-09-12 19:38:39 +00:00
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
struct cli_matcher *root = &matcher->suffixes;
|
2019-05-03 18:16:03 -04:00
|
|
|
struct cli_ac_patt *new = MPOOL_CALLOC(matcher->mempool, 1, sizeof(*new));
|
2018-12-03 12:40:13 -05:00
|
|
|
size_t i;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!new)
|
|
|
|
return CL_EMEM;
|
|
|
|
assert(root && suffix);
|
|
|
|
|
|
|
|
new->rtype = 0;
|
|
|
|
new->type = 0;
|
|
|
|
new->sigid = 0;
|
|
|
|
new->parts = 0;
|
|
|
|
new->partno = 0;
|
|
|
|
new->mindist = 0;
|
|
|
|
new->maxdist = 0;
|
|
|
|
new->offset_min = CLI_OFF_ANY;
|
|
|
|
new->length[0] = (uint16_t)len;
|
|
|
|
|
|
|
|
new->ch[0] = new->ch[1] |= CLI_MATCH_IGNORE;
|
|
|
|
if (new->length[0] > root->maxpatlen)
|
|
|
|
root->maxpatlen = new->length[0];
|
|
|
|
|
2019-05-03 18:16:03 -04:00
|
|
|
new->pattern = MPOOL_MALLOC(matcher->mempool, sizeof(new->pattern[0]) * len);
|
2018-12-03 12:40:13 -05:00
|
|
|
if (!new->pattern) {
|
2019-05-03 18:16:03 -04:00
|
|
|
MPOOL_FREE(matcher->mempool, new);
|
2013-03-01 13:51:15 -05:00
|
|
|
cli_errmsg("add_newsuffix: Unable to allocate memory for new->pattern\n");
|
2018-12-03 12:40:13 -05:00
|
|
|
return CL_EMEM;
|
|
|
|
}
|
|
|
|
for (i = 0; i < len; i++)
|
|
|
|
new->pattern[i] = suffix[i]; /*new->pattern is short int* */
|
|
|
|
|
|
|
|
new->customdata = info;
|
|
|
|
new->virname = NULL;
|
|
|
|
if ((ret = cli_ac_addpatt(root, new))) {
|
2019-05-03 18:16:03 -04:00
|
|
|
MPOOL_FREE(matcher->mempool, new->pattern);
|
|
|
|
MPOOL_FREE(matcher->mempool, new);
|
2018-12-03 12:40:13 -05:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
filter_add_static(&matcher->filter, (const unsigned char *)suffix, len, "regex");
|
|
|
|
return CL_SUCCESS;
|
2006-09-12 19:38:39 +00:00
|
|
|
}
|
|
|
|
|
2008-07-23 13:51:57 +00:00
|
|
|
#define MODULE "regex_list: "
|
|
|
|
/* ------ load a regex, determine suffix, determine suffix2regexlist map ---- */
|
2006-09-12 19:38:39 +00:00
|
|
|
|
2008-07-29 15:37:23 +00:00
|
|
|
static void list_add_tail(struct regex_list_ht *ht, struct regex_list *regex)
|
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
if (!ht->head)
|
|
|
|
ht->head = regex;
|
|
|
|
if (ht->tail) {
|
|
|
|
ht->tail->nxt = regex;
|
|
|
|
}
|
|
|
|
ht->tail = regex;
|
2008-07-29 15:37:23 +00:00
|
|
|
}
|
|
|
|
|
2019-02-27 00:47:38 -05:00
|
|
|
static cl_error_t add_pattern_suffix(void *cbdata, const char *suffix, size_t suffix_len, const struct regex_list *iregex)
|
2006-09-12 19:38:39 +00:00
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
struct regex_matcher *matcher = cbdata;
|
|
|
|
struct regex_list *regex = cli_malloc(sizeof(*regex));
|
|
|
|
const struct cli_element *el;
|
|
|
|
void *tmp_matcher; /* save original address if OOM occurs */
|
2008-07-23 13:51:57 +00:00
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
assert(matcher);
|
|
|
|
if (!regex) {
|
2013-03-01 13:51:15 -05:00
|
|
|
cli_errmsg("add_pattern_suffix: Unable to allocate memory for regex\n");
|
2018-12-03 12:40:13 -05:00
|
|
|
return CL_EMEM;
|
2013-03-01 13:51:15 -05:00
|
|
|
}
|
2018-12-03 12:40:13 -05:00
|
|
|
regex->pattern = iregex->pattern ? cli_strdup(iregex->pattern) : NULL;
|
|
|
|
regex->preg = iregex->preg;
|
|
|
|
regex->nxt = NULL;
|
|
|
|
el = cli_hashtab_find(&matcher->suffix_hash, suffix, suffix_len);
|
|
|
|
/* TODO: what if suffixes are prefixes of eachother and only one will
|
2022-02-16 00:13:55 +01:00
|
|
|
* match? */
|
2018-12-03 12:40:13 -05:00
|
|
|
if (el) {
|
|
|
|
/* existing suffix */
|
|
|
|
assert((size_t)el->data < matcher->suffix_cnt);
|
CMake: Add CTest support to match Autotools checks
An ENABLE_TESTS CMake option is provided so that users can disable
testing if they don't want it. Instructions for how to use this
included in the INSTALL.cmake.md file.
If you run `ctest`, each testcase will write out a log file to the
<build>/unit_tests directory.
As with Autotools' make check, the test files are from test/.split
and unit_tests/.split files, but for CMake these are generated at
build time instead of at test time.
On Posix systems, sets the LD_LIBRARY_PATH so that ClamAV-compiled
libraries can be loaded when running tests.
On Windows systems, CTest will identify and collect all library
dependencies and assemble a temporarily install under the
build/unit_tests directory so that the libraries can be loaded when
running tests.
The same feature is used on Windows when using CMake to install to
collect all DLL dependencies so that users don't have to install them
manually afterwards.
Each of the CTest tests are run using a custom wrapper around Python's
unittest framework, which is also responsible for finding and inserting
valgrind into the valgrind tests on Posix systems.
Unlike with Autotools, the CMake CTest Valgrind-tests are enabled by
default, if Valgrind can be found. There's no need to set VG=1.
CTest's memcheck module is NOT supported, because we use Python to
orchestrate our tests.
Added a bunch of Windows compatibility changes to the unit tests.
These were primarily changing / to PATHSEP and making adjustments
to use Win32 C headers and ifdef out the POSIX ones which aren't
available on Windows. Also disabled a bunch of tests on Win32
that don't work on Windows, notably the mmap ones and FD-passing
(i.e. FILEDES) ones.
Add JSON_C_HAVE_INTTYPES_H definition to clamav-config.h to eliminate
warnings on Windows where json.h is included after inttypes.h because
json-c's inttypes replacement relies on it.
This is a it of a hack and may be removed if json-c fixes their
inttypes header stuff in the future.
Add preprocessor definitions on Windows to disable MSVC warnings about
CRT secure and nonstandard functions. While there may be a better
solution, this is needed to be able to see other more serious warnings.
Add missing file comment block and copyright statement for clamsubmit.c.
Also change json-c/json.h include filename to json.h in clamsubmit.c.
The directory name is not required.
Changed the hash table data integer type from long, which is poorly
defined, to size_t -- which is capable of storing a pointer. Fixed a
bunch of casts regarding this variable to eliminate warnings.
Fixed two bugs causing utf8 encoding unit tests to fail on Windows:
- The in_size variable should be the number of bytes, not the character
count. This was was causing the SHIFT_JIS (japanese codepage) to UTF8
transcoding test to only transcode half the bytes.
- It turns out that the MultiByteToWideChar() API can't transcode
UTF16-BE to UTF16-LE. The solution is to just iterate over the buffer
and flip the bytes on each uint16_t. This but was causing the UTF16-BE
to UTF8 tests to fail.
I also split up the utf8 transcoding tests into separate tests so I
could see all of the failures instead of just the first one.
Added a flags parameter to the unit test function to open testfiles
because it turns out that on Windows if a file contains the \r\n it will
replace it with just \n if you opened the file as a text file instead of
as binary. However, if we open the CBC files as binary, then a bunch of
bytecode tests fail. So I've changed the tests to open the CBC files in
the bytecode tests as text files and open all other files as binary.
Ported the feature tests from shell scripts to Python using a modified
version of our QA test-framework, which is largely compatible and will
allow us to migrate some QA tests into this repo. I'd like to add GitHub
Actions pipelines in the future so that all public PR's get some testing
before anyone has to manually review them.
The clamd --log option was missing from the help string, though it
definitely works. I've added it in this commit.
It appears that clamd.c was never clang-format'd, so this commit also
reformats clamd.c.
Some of the check_clamd tests expected the path returned by clamd to
match character for character with original path sent to clamd. However,
as we now evaluate real paths before a scan, the path returned by clamd
isn't going to match the relative (and possibly symlink-ridden) path
passed to clamdscan. I fixed this test by changing the test to search
for the basename: <signature> FOUND within the response instead of
matching the exact path.
Autotools: Link check_clamd with libclamav so we can use our utility
functions in check_clamd.c.
2020-08-25 23:14:23 -07:00
|
|
|
list_add_tail(&matcher->suffix_regexes[(size_t)el->data], regex);
|
2018-12-03 12:40:13 -05:00
|
|
|
} else {
|
|
|
|
/* new suffix */
|
2022-03-26 13:10:26 -04:00
|
|
|
size_t n = matcher->suffix_cnt;
|
CMake: Add CTest support to match Autotools checks
An ENABLE_TESTS CMake option is provided so that users can disable
testing if they don't want it. Instructions for how to use this
included in the INSTALL.cmake.md file.
If you run `ctest`, each testcase will write out a log file to the
<build>/unit_tests directory.
As with Autotools' make check, the test files are from test/.split
and unit_tests/.split files, but for CMake these are generated at
build time instead of at test time.
On Posix systems, sets the LD_LIBRARY_PATH so that ClamAV-compiled
libraries can be loaded when running tests.
On Windows systems, CTest will identify and collect all library
dependencies and assemble a temporarily install under the
build/unit_tests directory so that the libraries can be loaded when
running tests.
The same feature is used on Windows when using CMake to install to
collect all DLL dependencies so that users don't have to install them
manually afterwards.
Each of the CTest tests are run using a custom wrapper around Python's
unittest framework, which is also responsible for finding and inserting
valgrind into the valgrind tests on Posix systems.
Unlike with Autotools, the CMake CTest Valgrind-tests are enabled by
default, if Valgrind can be found. There's no need to set VG=1.
CTest's memcheck module is NOT supported, because we use Python to
orchestrate our tests.
Added a bunch of Windows compatibility changes to the unit tests.
These were primarily changing / to PATHSEP and making adjustments
to use Win32 C headers and ifdef out the POSIX ones which aren't
available on Windows. Also disabled a bunch of tests on Win32
that don't work on Windows, notably the mmap ones and FD-passing
(i.e. FILEDES) ones.
Add JSON_C_HAVE_INTTYPES_H definition to clamav-config.h to eliminate
warnings on Windows where json.h is included after inttypes.h because
json-c's inttypes replacement relies on it.
This is a it of a hack and may be removed if json-c fixes their
inttypes header stuff in the future.
Add preprocessor definitions on Windows to disable MSVC warnings about
CRT secure and nonstandard functions. While there may be a better
solution, this is needed to be able to see other more serious warnings.
Add missing file comment block and copyright statement for clamsubmit.c.
Also change json-c/json.h include filename to json.h in clamsubmit.c.
The directory name is not required.
Changed the hash table data integer type from long, which is poorly
defined, to size_t -- which is capable of storing a pointer. Fixed a
bunch of casts regarding this variable to eliminate warnings.
Fixed two bugs causing utf8 encoding unit tests to fail on Windows:
- The in_size variable should be the number of bytes, not the character
count. This was was causing the SHIFT_JIS (japanese codepage) to UTF8
transcoding test to only transcode half the bytes.
- It turns out that the MultiByteToWideChar() API can't transcode
UTF16-BE to UTF16-LE. The solution is to just iterate over the buffer
and flip the bytes on each uint16_t. This but was causing the UTF16-BE
to UTF8 tests to fail.
I also split up the utf8 transcoding tests into separate tests so I
could see all of the failures instead of just the first one.
Added a flags parameter to the unit test function to open testfiles
because it turns out that on Windows if a file contains the \r\n it will
replace it with just \n if you opened the file as a text file instead of
as binary. However, if we open the CBC files as binary, then a bunch of
bytecode tests fail. So I've changed the tests to open the CBC files in
the bytecode tests as text files and open all other files as binary.
Ported the feature tests from shell scripts to Python using a modified
version of our QA test-framework, which is largely compatible and will
allow us to migrate some QA tests into this repo. I'd like to add GitHub
Actions pipelines in the future so that all public PR's get some testing
before anyone has to manually review them.
The clamd --log option was missing from the help string, though it
definitely works. I've added it in this commit.
It appears that clamd.c was never clang-format'd, so this commit also
reformats clamd.c.
Some of the check_clamd tests expected the path returned by clamd to
match character for character with original path sent to clamd. However,
as we now evaluate real paths before a scan, the path returned by clamd
isn't going to match the relative (and possibly symlink-ridden) path
passed to clamdscan. I fixed this test by changing the test to search
for the basename: <signature> FOUND within the response instead of
matching the exact path.
Autotools: Link check_clamd with libclamav so we can use our utility
functions in check_clamd.c.
2020-08-25 23:14:23 -07:00
|
|
|
el = cli_hashtab_insert(&matcher->suffix_hash, suffix, suffix_len, (cli_element_data)n);
|
2018-12-03 12:40:13 -05:00
|
|
|
tmp_matcher = matcher->suffix_regexes; /* save the current value before cli_realloc() */
|
|
|
|
tmp_matcher = cli_realloc(matcher->suffix_regexes, (n + 1) * sizeof(*matcher->suffix_regexes));
|
|
|
|
if (!tmp_matcher) {
|
2022-03-26 13:10:26 -04:00
|
|
|
FREE(regex->pattern);
|
2018-12-03 12:40:13 -05:00
|
|
|
free(regex);
|
|
|
|
return CL_EMEM;
|
|
|
|
}
|
|
|
|
matcher->suffix_regexes = tmp_matcher; /* success, point at new memory location */
|
|
|
|
matcher->suffix_regexes[n].tail = regex;
|
|
|
|
matcher->suffix_regexes[n].head = regex;
|
2022-03-26 13:10:26 -04:00
|
|
|
matcher->suffix_cnt++;
|
2018-12-03 12:40:13 -05:00
|
|
|
if (suffix[0] == '/' && suffix[1] == '\0')
|
|
|
|
matcher->root_regex_idx = n;
|
|
|
|
add_newsuffix(matcher, regex, suffix, suffix_len);
|
|
|
|
}
|
2019-02-27 00:47:38 -05:00
|
|
|
return CL_SUCCESS;
|
2006-09-12 19:38:39 +00:00
|
|
|
}
|
|
|
|
|
2008-07-23 13:51:57 +00:00
|
|
|
static size_t reverse_string(char *pattern)
|
2006-09-12 19:38:39 +00:00
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
size_t len = strlen(pattern);
|
|
|
|
size_t i;
|
|
|
|
for (i = 0; i < (len / 2); i++) {
|
|
|
|
char aux = pattern[i];
|
|
|
|
pattern[i] = pattern[len - i - 1];
|
|
|
|
pattern[len - i - 1] = aux;
|
|
|
|
}
|
|
|
|
return len;
|
2006-09-12 19:38:39 +00:00
|
|
|
}
|
|
|
|
|
2008-07-29 10:36:26 +00:00
|
|
|
static regex_t *new_preg(struct regex_matcher *matcher)
|
2008-07-25 20:01:40 +00:00
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
regex_t *r;
|
2019-05-03 18:16:03 -04:00
|
|
|
matcher->all_pregs = MPOOL_REALLOC(matcher->mempool, matcher->all_pregs, ++matcher->regex_cnt * sizeof(*matcher->all_pregs));
|
2018-12-03 12:40:13 -05:00
|
|
|
if (!matcher->all_pregs) {
|
2013-03-01 13:51:15 -05:00
|
|
|
cli_errmsg("new_preg: Unable to reallocate memory\n");
|
2018-12-03 12:40:13 -05:00
|
|
|
return NULL;
|
2013-03-01 13:51:15 -05:00
|
|
|
}
|
2019-05-03 18:16:03 -04:00
|
|
|
r = MPOOL_MALLOC(matcher->mempool, sizeof(*r));
|
2018-12-03 12:40:13 -05:00
|
|
|
if (!r) {
|
2013-03-01 13:51:15 -05:00
|
|
|
cli_errmsg("new_preg: Unable to allocate memory\n");
|
2018-12-03 12:40:13 -05:00
|
|
|
return NULL;
|
2013-03-01 13:51:15 -05:00
|
|
|
}
|
2018-12-03 12:40:13 -05:00
|
|
|
matcher->all_pregs[matcher->regex_cnt - 1] = r;
|
|
|
|
return r;
|
2008-07-25 20:01:40 +00:00
|
|
|
}
|
|
|
|
|
2019-02-27 00:47:38 -05:00
|
|
|
static cl_error_t add_static_pattern(struct regex_matcher *matcher, char *pattern)
|
2006-09-12 19:38:39 +00:00
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
size_t len;
|
|
|
|
struct regex_list regex;
|
2019-02-27 00:47:38 -05:00
|
|
|
cl_error_t rc;
|
2018-12-03 12:40:13 -05:00
|
|
|
|
|
|
|
len = reverse_string(pattern);
|
|
|
|
regex.nxt = NULL;
|
|
|
|
regex.pattern = cli_strdup(pattern);
|
|
|
|
regex.preg = NULL;
|
|
|
|
rc = add_pattern_suffix(matcher, pattern, len, ®ex);
|
|
|
|
free(regex.pattern);
|
|
|
|
return rc;
|
2006-09-12 19:38:39 +00:00
|
|
|
}
|
|
|
|
|
2019-02-27 00:47:38 -05:00
|
|
|
cl_error_t regex_list_add_pattern(struct regex_matcher *matcher, char *pattern)
|
2006-09-12 19:38:39 +00:00
|
|
|
{
|
2019-02-27 00:47:38 -05:00
|
|
|
cl_error_t rc;
|
2018-12-03 12:40:13 -05:00
|
|
|
regex_t *preg;
|
|
|
|
size_t len;
|
|
|
|
/* we only match the host, so remove useless stuff */
|
|
|
|
const char remove_end[] = "([/?].*)?/";
|
|
|
|
const char remove_end2[] = "([/?].*)/";
|
|
|
|
|
|
|
|
len = strlen(pattern);
|
|
|
|
if (len > sizeof(remove_end)) {
|
|
|
|
if (strncmp(&pattern[len - sizeof(remove_end) + 1], remove_end, sizeof(remove_end) - 1) == 0) {
|
|
|
|
len -= sizeof(remove_end) - 1;
|
|
|
|
pattern[len++] = '/';
|
|
|
|
}
|
|
|
|
if (strncmp(&pattern[len - sizeof(remove_end2) + 1], remove_end2, sizeof(remove_end2) - 1) == 0) {
|
|
|
|
len -= sizeof(remove_end2) - 1;
|
|
|
|
pattern[len++] = '/';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
pattern[len] = '\0';
|
|
|
|
|
|
|
|
preg = new_preg(matcher);
|
|
|
|
if (!preg)
|
|
|
|
return CL_EMEM;
|
|
|
|
|
|
|
|
rc = cli_regex2suffix(pattern, preg, add_pattern_suffix, (void *)matcher);
|
|
|
|
if (rc) {
|
|
|
|
cli_regfree(preg);
|
|
|
|
}
|
|
|
|
|
|
|
|
return rc;
|
2006-09-12 19:38:39 +00:00
|
|
|
}
|