2006-09-12 19:38:39 +00:00
|
|
|
/*
|
|
|
|
* Match a string against a list of patterns/regexes.
|
|
|
|
*
|
2020-01-03 15:44:07 -05:00
|
|
|
* Copyright (C) 2013-2020 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
|
2019-01-25 10:15:50 -05:00
|
|
|
* Copyright (C) 2007-2013 Sourcefire, Inc.
|
2008-04-02 15:24:51 +00:00
|
|
|
*
|
|
|
|
* Authors: Török Edvin
|
2006-09-12 19:38:39 +00:00
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
2008-04-02 15:24:51 +00:00
|
|
|
* it under the terms of the GNU General Public License version 2 as
|
2007-06-30 11:50:56 +00:00
|
|
|
* published by the Free Software Foundation.
|
2006-09-12 19:38:39 +00:00
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
|
|
* MA 02110-1301, USA.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#if HAVE_CONFIG_H
|
|
|
|
#include "clamav-config.h"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef CL_THREAD_SAFE
|
|
|
|
#ifndef _REENTRANT
|
|
|
|
#define _REENTRANT
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <ctype.h>
|
2007-12-18 19:23:56 +00:00
|
|
|
#include <zlib.h>
|
2006-09-12 19:38:39 +00:00
|
|
|
|
|
|
|
#include <limits.h>
|
|
|
|
#include <sys/types.h>
|
2008-07-23 13:51:57 +00:00
|
|
|
#include <assert.h>
|
|
|
|
|
2007-09-17 18:54:56 +00:00
|
|
|
#include "regex/regex.h"
|
2006-09-12 19:38:39 +00:00
|
|
|
|
|
|
|
#include "clamav.h"
|
|
|
|
#include "others.h"
|
|
|
|
#include "regex_list.h"
|
|
|
|
#include "matcher-ac.h"
|
2008-02-06 20:48:34 +00:00
|
|
|
#include "matcher.h"
|
2007-03-11 11:14:35 +00:00
|
|
|
#include "str.h"
|
2007-12-18 19:23:56 +00:00
|
|
|
#include "readdb.h"
|
2008-07-23 13:51:57 +00:00
|
|
|
#include "jsparse/textbuf.h"
|
2008-07-24 18:48:31 +00:00
|
|
|
#include "regex_suffix.h"
|
2008-12-29 17:55:30 +00:00
|
|
|
#include "default.h"
|
2009-07-17 12:49:04 +03:00
|
|
|
#include "hashtab.h"
|
2008-10-18 00:16:23 +00:00
|
|
|
|
|
|
|
#include "mpool.h"
|
|
|
|
|
2008-07-23 13:51:57 +00:00
|
|
|
/* Prototypes */
|
2008-07-29 10:36:26 +00:00
|
|
|
static regex_t *new_preg(struct regex_matcher *matcher);
|
2008-07-23 13:51:57 +00:00
|
|
|
static size_t reverse_string(char *pattern);
|
2019-02-27 00:47:38 -05:00
|
|
|
static cl_error_t add_pattern_suffix(void *cbdata, const char *suffix, size_t suffix_len, const struct regex_list *regex);
|
|
|
|
static cl_error_t add_static_pattern(struct regex_matcher *matcher, char *pattern);
|
2008-07-23 13:51:57 +00:00
|
|
|
/* ---------- */
|
2006-09-12 19:38:39 +00:00
|
|
|
|
2008-10-07 20:04:50 +00:00
|
|
|
#define MATCH_SUCCESS 0
|
2018-12-03 12:40:13 -05:00
|
|
|
#define MATCH_FAILED -1
|
2006-09-12 19:38:39 +00:00
|
|
|
|
|
|
|
/*
|
2016-10-19 15:57:45 -04:00
|
|
|
* Call this function when an unrecoverable error has occurred, (instead of exit).
|
2006-09-12 19:38:39 +00:00
|
|
|
*/
|
2018-12-03 12:40:13 -05:00
|
|
|
static void fatal_error(struct regex_matcher *matcher)
|
2006-09-12 19:38:39 +00:00
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
regex_list_done(matcher);
|
|
|
|
matcher->list_inited = -1; /* the phishing module will know we tried to load a whitelist, and failed, so it will disable itself too*/
|
2006-09-12 19:38:39 +00:00
|
|
|
}
|
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
static inline char get_char_at_pos_with_skip(const struct pre_fixup_info *info, const char *buffer, size_t pos)
|
2007-09-27 21:27:37 +00:00
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
const char *str;
|
|
|
|
size_t realpos = 0;
|
|
|
|
if (!info) {
|
|
|
|
return (pos <= strlen(buffer)) ? buffer[pos > 0 ? pos - 1 : 0] : '\0';
|
|
|
|
}
|
|
|
|
str = info->pre_displayLink.data;
|
|
|
|
cli_dbgmsg("calc_pos_with_skip: skip:%llu, %llu - %llu \"%s\",\"%s\"\n", (long long unsigned)pos, (long long unsigned)info->host_start,
|
2016-02-22 13:26:15 -05:00
|
|
|
(long long unsigned)info->host_end, str, buffer);
|
2018-12-03 12:40:13 -05:00
|
|
|
pos += info->host_start;
|
|
|
|
while (str[realpos] && !isalnum(str[realpos])) realpos++;
|
|
|
|
for (; str[realpos] && (pos > 0); pos--) {
|
|
|
|
while (str[realpos] == ' ') realpos++;
|
|
|
|
realpos++;
|
|
|
|
}
|
|
|
|
while (str[realpos] == ' ') realpos++;
|
|
|
|
cli_dbgmsg("calc_pos_with_skip:%s\n", str + realpos);
|
|
|
|
return (pos > 0 && !str[realpos]) ? '\0' : str[realpos > 0 ? realpos - 1 : 0];
|
2007-09-27 21:27:37 +00:00
|
|
|
}
|
|
|
|
|
2008-07-23 13:51:57 +00:00
|
|
|
static int validate_subdomain(const struct regex_list *regex, const struct pre_fixup_info *pre_fixup, const char *buffer, size_t buffer_len, char *real_url, size_t real_len, char *orig_real_url)
|
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
char c;
|
|
|
|
size_t match_len;
|
|
|
|
|
|
|
|
if (!regex || !regex->pattern)
|
|
|
|
return 0;
|
|
|
|
match_len = strlen(regex->pattern);
|
|
|
|
if (((c = get_char_at_pos_with_skip(pre_fixup, buffer, buffer_len + 1)) == ' ' || c == '\0' || c == '/' || c == '?') &&
|
|
|
|
(match_len == buffer_len || /* full match */
|
|
|
|
(match_len < buffer_len &&
|
|
|
|
((c = get_char_at_pos_with_skip(pre_fixup, buffer, buffer_len - match_len)) == '.' || (c == ' ')))
|
|
|
|
/* subdomain matched*/)) {
|
|
|
|
/* we have an extra / at the end */
|
|
|
|
if (match_len > 0) match_len--;
|
|
|
|
cli_dbgmsg("Got a match: %s with %s\n", buffer, regex->pattern);
|
|
|
|
cli_dbgmsg("Before inserting .: %s\n", orig_real_url);
|
|
|
|
if (real_len >= match_len + 1) {
|
|
|
|
const size_t pos = real_len - match_len - 1;
|
|
|
|
if (real_url[pos] != '.') {
|
|
|
|
/* we need to shift left, and insert a '.'
|
2008-07-23 13:51:57 +00:00
|
|
|
* we have an extra '.' at the beginning inserted by get_host to have room,
|
2019-06-30 13:37:27 -07:00
|
|
|
* orig_real_url has to be used here,
|
2008-07-23 13:51:57 +00:00
|
|
|
* because we want to overwrite that extra '.' */
|
2018-12-03 12:40:13 -05:00
|
|
|
size_t orig_real_len = strlen(orig_real_url);
|
|
|
|
cli_dbgmsg("No dot here:%s\n", real_url + pos);
|
|
|
|
real_url = orig_real_url;
|
|
|
|
memmove(real_url, real_url + 1, orig_real_len - match_len - 1);
|
|
|
|
real_url[orig_real_len - match_len - 1] = '.';
|
|
|
|
cli_dbgmsg("After inserting .: %s\n", real_url);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
cli_dbgmsg("Ignoring false match: %s with %s, mismatched character: %c\n", buffer, regex->pattern, c);
|
|
|
|
return 0;
|
2008-07-23 13:51:57 +00:00
|
|
|
}
|
|
|
|
|
2006-09-12 19:38:39 +00:00
|
|
|
/*
|
|
|
|
* @matcher - matcher structure to use
|
|
|
|
* @real_url - href target
|
|
|
|
* @display_url - <a> tag contents
|
|
|
|
* @hostOnly - if you want to match only the host part
|
2006-10-10 23:51:49 +00:00
|
|
|
* @is_whitelist - is this a lookup in whitelist?
|
2006-09-12 19:38:39 +00:00
|
|
|
*
|
|
|
|
* @return - CL_SUCCESS - url doesn't match
|
|
|
|
* - CL_VIRUS - url matches list
|
|
|
|
*
|
|
|
|
* Do not send NULL pointers to this function!!
|
|
|
|
*
|
|
|
|
*/
|
2019-02-27 00:47:38 -05:00
|
|
|
cl_error_t regex_list_match(struct regex_matcher *matcher, char *real_url, const char *display_url, const struct pre_fixup_info *pre_fixup, int hostOnly, const char **info, int is_whitelist)
|
2006-09-12 19:38:39 +00:00
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
char *orig_real_url = real_url;
|
|
|
|
struct regex_list *regex;
|
|
|
|
size_t real_len, display_len, buffer_len;
|
|
|
|
|
2019-02-27 00:47:38 -05:00
|
|
|
char *buffer = NULL;
|
|
|
|
char *bufrev = NULL;
|
|
|
|
cl_error_t rc = CL_SUCCESS;
|
Comment out the filter_search call in regex_list_match
Reviewing Coverity bug reports we found that the return value to this
filter_search call was effectively being ignored, causing no filtering
to occur. Fixing this issue resulted in a unit test that uses the
following match list regex to fail when searching for `ebay.com`.:
.+\\.paypal\\.(com|de|fr|it)([/?].*)?:.+\\.ebay\\.(at|be|ca|ch|co\\.uk|de|es|fr|ie|in|it|nl|ph|pl|com(\\.(au|cn|hk|my|sg))?)/
After investigating further, this is because the regex_list_add_pattern
call, which parses the regex for suffixes and attempts to add these to
the filter, can't handle the `com(\\.(au|cn|hk|my|sg))?` portion of
the regex. As a result, it only adds `ebay.at`, `ebay.be`, `ebay.ca`, up
through `ebay.pl` into the filter). With the code returning if no filter match
is found, the `ebay.com` suffix not existing in the filter causes incoming URLs
to be treated as if there are no corresponding regexes for ebay.com, which results
in no regex rules being evaluated against it.
We should get the regex parsing code working (and ensure it handles any
other complex cases in daily.cdb) before re-enabling this code. The code
has had no effect for 12+ years at this point, though, so it's probably
safe to wait a bit longer without it.
2020-06-16 12:26:15 -04:00
|
|
|
//int filter_search_rc = 0;
|
2019-02-27 00:47:38 -05:00
|
|
|
int root;
|
|
|
|
struct cli_ac_data mdata;
|
|
|
|
struct cli_ac_result *res = NULL;
|
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
assert(matcher);
|
|
|
|
assert(real_url);
|
|
|
|
assert(display_url);
|
|
|
|
*info = NULL;
|
|
|
|
if (!matcher->list_inited)
|
2019-02-27 00:47:38 -05:00
|
|
|
return CL_SUCCESS;
|
2018-12-03 12:40:13 -05:00
|
|
|
assert(matcher->list_built);
|
|
|
|
/* skip initial '.' inserted by get_host */
|
|
|
|
if (real_url[0] == '.') real_url++;
|
|
|
|
if (display_url[0] == '.') display_url++;
|
|
|
|
real_len = strlen(real_url);
|
|
|
|
display_len = strlen(display_url);
|
|
|
|
buffer_len = (hostOnly && !is_whitelist) ? real_len + 1 : real_len + display_len + 1 + 1;
|
|
|
|
if (buffer_len < 3) {
|
|
|
|
/* too short, no match possible */
|
2019-02-27 00:47:38 -05:00
|
|
|
return CL_SUCCESS;
|
|
|
|
}
|
|
|
|
buffer = cli_malloc(buffer_len + 1);
|
|
|
|
if (!buffer) {
|
|
|
|
cli_errmsg("regex_list_match: Unable to allocate memory for buffer\n");
|
|
|
|
return CL_EMEM;
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
|
|
|
|
2019-02-27 00:47:38 -05:00
|
|
|
strncpy(buffer, real_url, real_len);
|
|
|
|
buffer[real_len] = (!is_whitelist && hostOnly) ? '/' : ':';
|
2006-09-12 19:38:39 +00:00
|
|
|
|
2019-02-27 00:47:38 -05:00
|
|
|
/*
|
|
|
|
* For H-type PDB signatures, real_url is actually the DisplayedHostname.
|
|
|
|
* RealHostname is not used.
|
|
|
|
*/
|
|
|
|
if (!hostOnly || is_whitelist) {
|
|
|
|
/* For all other PDB and WDB signatures concatenate Real:Displayed. */
|
|
|
|
strncpy(buffer + real_len + 1, display_url, display_len);
|
|
|
|
}
|
|
|
|
buffer[buffer_len - 1] = '/';
|
|
|
|
buffer[buffer_len] = 0;
|
|
|
|
cli_dbgmsg("Looking up in regex_list: %s\n", buffer);
|
2019-06-30 13:37:27 -07:00
|
|
|
|
2019-02-27 00:47:38 -05:00
|
|
|
if (CL_SUCCESS != (rc = cli_ac_initdata(&mdata, 0, 0, 0, CLI_DEFAULT_AC_TRACKLEN)))
|
|
|
|
return rc;
|
2018-12-03 12:40:13 -05:00
|
|
|
|
2019-02-27 00:47:38 -05:00
|
|
|
bufrev = cli_strdup(buffer);
|
|
|
|
if (!bufrev)
|
|
|
|
return CL_EMEM;
|
2018-12-03 12:40:13 -05:00
|
|
|
|
2019-02-27 00:47:38 -05:00
|
|
|
reverse_string(bufrev);
|
Comment out the filter_search call in regex_list_match
Reviewing Coverity bug reports we found that the return value to this
filter_search call was effectively being ignored, causing no filtering
to occur. Fixing this issue resulted in a unit test that uses the
following match list regex to fail when searching for `ebay.com`.:
.+\\.paypal\\.(com|de|fr|it)([/?].*)?:.+\\.ebay\\.(at|be|ca|ch|co\\.uk|de|es|fr|ie|in|it|nl|ph|pl|com(\\.(au|cn|hk|my|sg))?)/
After investigating further, this is because the regex_list_add_pattern
call, which parses the regex for suffixes and attempts to add these to
the filter, can't handle the `com(\\.(au|cn|hk|my|sg))?` portion of
the regex. As a result, it only adds `ebay.at`, `ebay.be`, `ebay.ca`, up
through `ebay.pl` into the filter). With the code returning if no filter match
is found, the `ebay.com` suffix not existing in the filter causes incoming URLs
to be treated as if there are no corresponding regexes for ebay.com, which results
in no regex rules being evaluated against it.
We should get the regex parsing code working (and ensure it handles any
other complex cases in daily.cdb) before re-enabling this code. The code
has had no effect for 12+ years at this point, though, so it's probably
safe to wait a bit longer without it.
2020-06-16 12:26:15 -04:00
|
|
|
// TODO Add this back in once we improve the regex parsing code that finds
|
|
|
|
// suffixes to add to the filter.
|
|
|
|
//
|
|
|
|
// Reviewing Coverity bug reports we found that the return value to this
|
|
|
|
// filter_search call was effectively being ignored, causing no filtering
|
|
|
|
// to occur. Fixing this issue resulted in a unit test that uses the
|
|
|
|
// following match list regex to fail when searching for `ebay.com`.:
|
|
|
|
//
|
|
|
|
// .+\\.paypal\\.(com|de|fr|it)([/?].*)?:.+\\.ebay\\.(at|be|ca|ch|co\\.uk|de|es|fr|ie|in|it|nl|ph|pl|com(\\.(au|cn|hk|my|sg))?)/
|
|
|
|
//
|
|
|
|
// After investigating further, this is because the regex_list_add_pattern
|
|
|
|
// call, which parses the regex for suffixes and attempts to add these to
|
|
|
|
// the filter, can't handle the `com(\\.(au|cn|hk|my|sg))?` portion of
|
|
|
|
// the regex. As a result, it only adds `ebay.at`, `ebay.be`, `ebay.ca`, up
|
|
|
|
// through `ebay.pl` into the filter). With the commented out code below
|
|
|
|
// uncommented, these suffixes not existing in the filter are treated as
|
|
|
|
// there not being a corresponding regex for ebay.com, causing no regex
|
|
|
|
// rules to be evaluated against the URL.
|
|
|
|
//
|
|
|
|
// We should get the regex parsing code working (and ensure it handles any
|
|
|
|
// other complex cases in daily.cdb) before re-enabling this code. The code
|
|
|
|
// has had no effect for 12+ years at this point, though, so it's probably
|
|
|
|
// safe to wait a bit longer without it.
|
|
|
|
//
|
|
|
|
//filter_search_rc = filter_search(&matcher->filter, (const unsigned char *)bufrev, buffer_len);
|
|
|
|
//if (filter_search_rc == -1) {
|
|
|
|
// free(buffer);
|
|
|
|
// free(bufrev);
|
|
|
|
// /* filter says this suffix doesn't match.
|
|
|
|
// * The filter has false positives, but no false
|
|
|
|
// * negatives */
|
|
|
|
// return CL_SUCCESS;
|
|
|
|
//}
|
2019-02-27 00:47:38 -05:00
|
|
|
|
|
|
|
rc = cli_ac_scanbuff((const unsigned char *)bufrev, buffer_len, NULL, (void *)®ex, &res, &matcher->suffixes, &mdata, 0, 0, NULL, AC_SCAN_VIR, NULL);
|
|
|
|
free(bufrev);
|
|
|
|
cli_ac_freedata(&mdata);
|
|
|
|
|
|
|
|
rc = CL_SUCCESS;
|
|
|
|
root = matcher->root_regex_idx;
|
|
|
|
while (res || root) {
|
|
|
|
struct cli_ac_result *q;
|
|
|
|
if (!res) {
|
|
|
|
regex = matcher->suffix_regexes[root].head;
|
|
|
|
root = 0;
|
|
|
|
} else {
|
|
|
|
regex = res->customdata;
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
2019-02-27 00:47:38 -05:00
|
|
|
while (!rc && regex) {
|
|
|
|
/* loop over multiple regexes corresponding to
|
2008-07-23 13:51:57 +00:00
|
|
|
* this suffix */
|
2019-02-27 00:47:38 -05:00
|
|
|
if (!regex->preg) {
|
|
|
|
/* we matched a static pattern */
|
|
|
|
rc = validate_subdomain(regex, pre_fixup, buffer, buffer_len, real_url, real_len, orig_real_url);
|
|
|
|
} else {
|
|
|
|
rc = !cli_regexec(regex->preg, buffer, 0, NULL, 0);
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
2019-02-27 00:47:38 -05:00
|
|
|
if (rc) *info = regex->pattern;
|
|
|
|
regex = regex->nxt;
|
|
|
|
}
|
|
|
|
if (res) {
|
|
|
|
q = res;
|
|
|
|
res = res->next;
|
|
|
|
free(q);
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
|
|
|
}
|
2019-02-27 00:47:38 -05:00
|
|
|
free(buffer);
|
|
|
|
if (!rc)
|
|
|
|
cli_dbgmsg("Lookup result: not in regex list\n");
|
|
|
|
else
|
|
|
|
cli_dbgmsg("Lookup result: in regex list\n");
|
|
|
|
return rc;
|
2006-09-12 19:38:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Initialization & loading */
|
2016-10-19 15:57:45 -04:00
|
|
|
/* Initializes @matcher, allocating necessary substructures */
|
2019-02-27 00:47:38 -05:00
|
|
|
cl_error_t init_regex_list(struct regex_matcher *matcher, uint8_t dconf_prefiltering)
|
2006-09-12 19:38:39 +00:00
|
|
|
{
|
2008-10-19 17:57:09 +00:00
|
|
|
#ifdef USE_MPOOL
|
2018-12-03 12:40:13 -05:00
|
|
|
mpool_t *mp = matcher->mempool;
|
2008-10-19 17:57:09 +00:00
|
|
|
#endif
|
2019-02-27 00:47:38 -05:00
|
|
|
cl_error_t rc;
|
2006-10-10 23:51:49 +00:00
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
assert(matcher);
|
|
|
|
memset(matcher, 0, sizeof(*matcher));
|
2006-09-12 19:38:39 +00:00
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
matcher->list_inited = 1;
|
|
|
|
matcher->list_built = 0;
|
|
|
|
matcher->list_loaded = 0;
|
|
|
|
cli_hashtab_init(&matcher->suffix_hash, 512);
|
2008-10-19 17:57:09 +00:00
|
|
|
#ifdef USE_MPOOL
|
2018-12-03 12:40:13 -05:00
|
|
|
matcher->mempool = mp;
|
|
|
|
matcher->suffixes.mempool = mp;
|
|
|
|
assert(mp && "mempool must be initialized");
|
2008-10-19 17:57:09 +00:00
|
|
|
#endif
|
2018-12-03 12:40:13 -05:00
|
|
|
if ((rc = cli_ac_init(&matcher->suffixes, 2, 32, dconf_prefiltering))) {
|
|
|
|
return rc;
|
|
|
|
}
|
2008-10-19 17:57:09 +00:00
|
|
|
#ifdef USE_MPOOL
|
2018-12-03 12:40:13 -05:00
|
|
|
matcher->sha256_hashes.mempool = mp;
|
|
|
|
matcher->hostkey_prefix.mempool = mp;
|
2008-10-19 17:57:09 +00:00
|
|
|
#endif
|
2018-12-03 12:40:13 -05:00
|
|
|
if ((rc = cli_bm_init(&matcher->sha256_hashes))) {
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
if ((rc = cli_bm_init(&matcher->hostkey_prefix))) {
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
filter_init(&matcher->filter);
|
|
|
|
return CL_SUCCESS;
|
2006-09-12 19:38:39 +00:00
|
|
|
}
|
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
static int functionality_level_check(char *line)
|
2006-12-02 00:42:44 +00:00
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
char *ptmin;
|
|
|
|
char *ptmax;
|
|
|
|
size_t j;
|
|
|
|
|
|
|
|
ptmin = strrchr(line, ':');
|
|
|
|
if (!ptmin)
|
|
|
|
return CL_SUCCESS;
|
|
|
|
|
|
|
|
ptmin++;
|
|
|
|
|
|
|
|
ptmax = strchr(ptmin, '-');
|
|
|
|
if (!ptmax)
|
|
|
|
return CL_SUCCESS; /* there is no functionality level specified, so we're ok */
|
|
|
|
else {
|
|
|
|
size_t min, max;
|
|
|
|
ptmax++;
|
|
|
|
for (j = 0; j + ptmin + 1 < ptmax; j++)
|
|
|
|
if (!isdigit(ptmin[j]))
|
|
|
|
return CL_SUCCESS; /* not numbers, not functionality level */
|
|
|
|
for (j = 0; j < strlen(ptmax); j++)
|
|
|
|
if (!isdigit(ptmax[j]))
|
|
|
|
return CL_SUCCESS; /* see above */
|
|
|
|
ptmax[-1] = '\0';
|
|
|
|
min = atoi(ptmin);
|
|
|
|
if (strlen(ptmax) == 0)
|
|
|
|
max = INT_MAX;
|
|
|
|
else
|
|
|
|
max = atoi(ptmax);
|
|
|
|
|
|
|
|
if (min > cl_retflevel()) {
|
|
|
|
cli_dbgmsg("regex list line %s not loaded (required f-level: %u)\n", line, (unsigned int)min);
|
|
|
|
return CL_EMALFDB;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (max < cl_retflevel())
|
|
|
|
return CL_EMALFDB;
|
|
|
|
ptmin[-1] = '\0';
|
|
|
|
return CL_SUCCESS;
|
|
|
|
}
|
2008-08-01 14:49:55 +00:00
|
|
|
}
|
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
static int add_hash(struct regex_matcher *matcher, char *pattern, const char fl, int is_prefix)
|
2008-08-01 14:49:55 +00:00
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
int rc;
|
2019-05-03 18:16:03 -04:00
|
|
|
struct cli_bm_patt *pat = MPOOL_CALLOC(matcher->mempool, 1, sizeof(*pat));
|
2018-12-03 12:40:13 -05:00
|
|
|
struct cli_matcher *bm;
|
|
|
|
const char *vname = NULL;
|
|
|
|
if (!pat)
|
|
|
|
return CL_EMEM;
|
2019-05-03 18:16:03 -04:00
|
|
|
pat->pattern = (unsigned char *)CLI_MPOOL_HEX2STR(matcher->mempool, pattern);
|
2018-12-03 12:40:13 -05:00
|
|
|
if (!pat->pattern)
|
|
|
|
return CL_EMALFDB;
|
|
|
|
pat->length = 32;
|
|
|
|
if (is_prefix) {
|
|
|
|
pat->length = 4;
|
|
|
|
bm = &matcher->hostkey_prefix;
|
|
|
|
} else {
|
|
|
|
bm = &matcher->sha256_hashes;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!matcher->sha256_pfx_set.keys) {
|
|
|
|
if ((rc = cli_hashset_init(&matcher->sha256_pfx_set, 1048576, 90))) {
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (fl != 'W' && pat->length == 32 &&
|
|
|
|
cli_hashset_contains(&matcher->sha256_pfx_set, cli_readint32(pat->pattern)) &&
|
|
|
|
cli_bm_scanbuff(pat->pattern, 32, &vname, NULL, &matcher->sha256_hashes, 0, NULL, NULL, NULL) == CL_VIRUS) {
|
|
|
|
if (*vname == 'W') {
|
|
|
|
/* hash is whitelisted in local.gdb */
|
|
|
|
cli_dbgmsg("Skipping hash %s\n", pattern);
|
2019-05-03 18:16:03 -04:00
|
|
|
MPOOL_FREE(matcher->mempool, pat->pattern);
|
|
|
|
MPOOL_FREE(matcher->mempool, pat);
|
2018-12-03 12:40:13 -05:00
|
|
|
return CL_SUCCESS;
|
|
|
|
}
|
|
|
|
}
|
2019-05-03 18:16:03 -04:00
|
|
|
pat->virname = MPOOL_MALLOC(matcher->mempool, 1);
|
2018-12-03 12:40:13 -05:00
|
|
|
if (!pat->virname) {
|
|
|
|
free(pat);
|
2013-03-01 13:51:15 -05:00
|
|
|
cli_errmsg("add_hash: Unable to allocate memory for path->virname\n");
|
2018-12-03 12:40:13 -05:00
|
|
|
return CL_EMEM;
|
|
|
|
}
|
|
|
|
*pat->virname = fl;
|
|
|
|
cli_hashset_addkey(&matcher->sha256_pfx_set, cli_readint32(pat->pattern));
|
|
|
|
if ((rc = cli_bm_addpatt(bm, pat, "*"))) {
|
|
|
|
cli_errmsg("add_hash: failed to add BM pattern\n");
|
|
|
|
free(pat->pattern);
|
|
|
|
free(pat->virname);
|
|
|
|
free(pat);
|
|
|
|
return CL_EMALFDB;
|
|
|
|
}
|
|
|
|
return CL_SUCCESS;
|
2006-12-02 00:42:44 +00:00
|
|
|
}
|
|
|
|
|
2006-09-12 19:38:39 +00:00
|
|
|
/* Load patterns/regexes from file */
|
2019-02-27 00:47:38 -05:00
|
|
|
cl_error_t load_regex_matcher(struct cl_engine *engine, struct regex_matcher *matcher, FILE *fd, unsigned int *signo, unsigned int options, int is_whitelist, struct cli_dbio *dbio, uint8_t dconf_prefiltering)
|
2006-09-12 19:38:39 +00:00
|
|
|
{
|
2019-02-27 00:47:38 -05:00
|
|
|
cl_error_t rc;
|
|
|
|
int line = 0, entry = 0;
|
2018-12-03 12:40:13 -05:00
|
|
|
char buffer[FILEBUFF];
|
|
|
|
|
|
|
|
assert(matcher);
|
|
|
|
|
|
|
|
if (matcher->list_inited == -1)
|
|
|
|
return CL_EMALFDB; /* already failed to load */
|
|
|
|
if (!fd && !dbio) {
|
|
|
|
cli_errmsg("Unable to load regex list (null file)\n");
|
|
|
|
return CL_ENULLARG;
|
|
|
|
}
|
|
|
|
|
|
|
|
cli_dbgmsg("Loading regex_list\n");
|
|
|
|
if (!matcher->list_inited) {
|
|
|
|
rc = init_regex_list(matcher, dconf_prefiltering);
|
|
|
|
if (!matcher->list_inited) {
|
|
|
|
cli_errmsg("Regex list failed to initialize!\n");
|
|
|
|
fatal_error(matcher);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/*
|
2006-09-12 19:38:39 +00:00
|
|
|
* Regexlist db format (common to .wdb(whitelist) and .pdb(domainlist) files:
|
|
|
|
* Multiple lines of form, (empty lines are skipped):
|
|
|
|
* Flags RealURL DisplayedURL
|
|
|
|
* Where:
|
2019-06-30 13:37:27 -07:00
|
|
|
* Flags:
|
2007-03-18 23:27:15 +00:00
|
|
|
*
|
|
|
|
* .pdb files:
|
2019-06-30 13:37:27 -07:00
|
|
|
* R - regex, H - host-only, followed by (optional) 3-digit hexnumber representing
|
2006-09-12 19:38:39 +00:00
|
|
|
* flags that should be filtered.
|
|
|
|
* [i.e. phishcheck urls.flags that we don't want to be done for this particular host]
|
2019-06-30 13:37:27 -07:00
|
|
|
*
|
2007-03-18 23:27:15 +00:00
|
|
|
* .wdb files:
|
2019-06-30 13:37:27 -07:00
|
|
|
* X - full URL regex
|
2007-03-18 23:27:15 +00:00
|
|
|
* Y - host-only regex
|
|
|
|
* M - host simple pattern
|
2006-09-12 19:38:39 +00:00
|
|
|
*
|
|
|
|
* If a line in the file doesn't conform to this format, loading fails
|
2019-06-30 13:37:27 -07:00
|
|
|
*
|
2006-09-12 19:38:39 +00:00
|
|
|
*/
|
2018-12-03 12:40:13 -05:00
|
|
|
while (cli_dbgets(buffer, FILEBUFF, fd, dbio)) {
|
|
|
|
char *pattern;
|
|
|
|
char *flags;
|
|
|
|
size_t pattern_len;
|
|
|
|
|
|
|
|
cli_chomp(buffer);
|
|
|
|
line++;
|
|
|
|
if (!*buffer)
|
|
|
|
continue; /* skip empty lines */
|
|
|
|
|
|
|
|
if (functionality_level_check(buffer))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (engine->cb_sigload && engine->cb_sigload("phishing", buffer, ~options & CL_DB_OFFICIAL, engine->cb_sigload_ctx)) {
|
|
|
|
cli_dbgmsg("load_regex_matcher: skipping %s due to callback\n", buffer);
|
|
|
|
continue;
|
|
|
|
}
|
2006-09-12 19:38:39 +00:00
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
entry++;
|
|
|
|
pattern = strchr(buffer, ':');
|
|
|
|
if (!pattern) {
|
|
|
|
cli_errmsg("Malformed regex list line %d\n", line);
|
|
|
|
fatal_error(matcher);
|
|
|
|
return CL_EMALFDB;
|
|
|
|
}
|
|
|
|
/*pattern[0]='\0';*/
|
|
|
|
flags = buffer + 1;
|
|
|
|
pattern++;
|
|
|
|
|
|
|
|
pattern_len = strlen(pattern);
|
|
|
|
if (pattern_len < FILEBUFF) {
|
|
|
|
pattern[pattern_len] = '/';
|
|
|
|
pattern[pattern_len + 1] = '\0';
|
|
|
|
} else {
|
|
|
|
cli_errmsg("Overlong regex line %d\n", line);
|
|
|
|
fatal_error(matcher);
|
|
|
|
return CL_EMALFDB;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((buffer[0] == 'R' && !is_whitelist) || ((buffer[0] == 'X' || buffer[0] == 'Y') && is_whitelist)) {
|
|
|
|
/* regex for hostname*/
|
|
|
|
if ((rc = regex_list_add_pattern(matcher, pattern)))
|
|
|
|
return rc == CL_EMEM ? CL_EMEM : CL_EMALFDB;
|
|
|
|
} else if ((buffer[0] == 'H' && !is_whitelist) || (buffer[0] == 'M' && is_whitelist)) {
|
|
|
|
/*matches displayed host*/
|
|
|
|
if ((rc = add_static_pattern(matcher, pattern)))
|
|
|
|
return rc == CL_EMEM ? CL_EMEM : CL_EMALFDB;
|
|
|
|
} else if (buffer[0] == 'S' && (!is_whitelist || pattern[0] == 'W')) {
|
|
|
|
pattern[pattern_len] = '\0';
|
|
|
|
if (pattern[0] == 'W')
|
|
|
|
flags[0] = 'W';
|
|
|
|
if ((pattern[0] == 'W' || pattern[0] == 'F' || pattern[0] == 'P') && pattern[1] == ':') {
|
|
|
|
pattern += 2;
|
|
|
|
if ((rc = add_hash(matcher, pattern, flags[0], pattern[-2] == 'P'))) {
|
|
|
|
cli_errmsg("Error loading at line: %d\n", line);
|
|
|
|
return rc == CL_EMEM ? CL_EMEM : CL_EMALFDB;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
cli_errmsg("Error loading line: %d, %c\n", line, *pattern);
|
|
|
|
return CL_EMALFDB;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
return CL_EMALFDB;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
matcher->list_loaded = 1;
|
|
|
|
if (signo)
|
|
|
|
*signo += entry;
|
|
|
|
|
|
|
|
return CL_SUCCESS;
|
|
|
|
}
|
2006-09-12 19:38:39 +00:00
|
|
|
|
|
|
|
/* Build the matcher list */
|
2019-02-27 00:47:38 -05:00
|
|
|
cl_error_t cli_build_regex_list(struct regex_matcher *matcher)
|
2006-09-12 19:38:39 +00:00
|
|
|
{
|
2019-02-27 00:47:38 -05:00
|
|
|
cl_error_t rc;
|
2018-12-03 12:40:13 -05:00
|
|
|
if (!matcher)
|
|
|
|
return CL_SUCCESS;
|
|
|
|
if (!matcher->list_inited || !matcher->list_loaded) {
|
|
|
|
cli_errmsg("Regex list not loaded!\n");
|
|
|
|
return -1; /*TODO: better error code */
|
|
|
|
}
|
|
|
|
cli_dbgmsg("Building regex list\n");
|
|
|
|
cli_hashtab_free(&matcher->suffix_hash);
|
|
|
|
if ((rc = cli_ac_buildtrie(&matcher->suffixes)))
|
|
|
|
return rc;
|
|
|
|
matcher->list_built = 1;
|
|
|
|
cli_hashset_destroy(&matcher->sha256_pfx_set);
|
|
|
|
|
|
|
|
return CL_SUCCESS;
|
2006-09-12 19:38:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Done with this matcher, free resources */
|
2018-12-03 12:40:13 -05:00
|
|
|
void regex_list_done(struct regex_matcher *matcher)
|
2006-09-12 19:38:39 +00:00
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
assert(matcher);
|
|
|
|
|
|
|
|
if (matcher->list_inited == 1) {
|
|
|
|
size_t i;
|
|
|
|
cli_ac_free(&matcher->suffixes);
|
|
|
|
if (matcher->suffix_regexes) {
|
|
|
|
for (i = 0; i < matcher->suffix_cnt; i++) {
|
|
|
|
struct regex_list *r = matcher->suffix_regexes[i].head;
|
|
|
|
while (r) {
|
|
|
|
struct regex_list *q = r;
|
|
|
|
r = r->nxt;
|
|
|
|
free(q->pattern);
|
|
|
|
free(q);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
free(matcher->suffix_regexes);
|
|
|
|
matcher->suffix_regexes = NULL;
|
|
|
|
}
|
|
|
|
if (matcher->all_pregs) {
|
|
|
|
for (i = 0; i < matcher->regex_cnt; i++) {
|
|
|
|
regex_t *r = matcher->all_pregs[i];
|
|
|
|
cli_regfree(r);
|
2019-05-03 18:16:03 -04:00
|
|
|
MPOOL_FREE(matcher->mempool, r);
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
2019-05-03 18:16:03 -04:00
|
|
|
MPOOL_FREE(matcher->mempool, matcher->all_pregs);
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
|
|
|
cli_hashtab_free(&matcher->suffix_hash);
|
|
|
|
cli_bm_free(&matcher->sha256_hashes);
|
|
|
|
cli_bm_free(&matcher->hostkey_prefix);
|
|
|
|
}
|
2006-09-12 19:38:39 +00:00
|
|
|
}
|
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
int is_regex_ok(struct regex_matcher *matcher)
|
2006-09-12 19:38:39 +00:00
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
assert(matcher);
|
|
|
|
return (!matcher->list_inited || matcher->list_inited != -1); /* either we don't have a regexlist, or we initialized it successfully */
|
2006-09-12 19:38:39 +00:00
|
|
|
}
|
|
|
|
|
2008-07-24 18:48:31 +00:00
|
|
|
static int add_newsuffix(struct regex_matcher *matcher, struct regex_list *info, const char *suffix, size_t len)
|
2006-09-12 19:38:39 +00:00
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
struct cli_matcher *root = &matcher->suffixes;
|
2019-05-03 18:16:03 -04:00
|
|
|
struct cli_ac_patt *new = MPOOL_CALLOC(matcher->mempool, 1, sizeof(*new));
|
2018-12-03 12:40:13 -05:00
|
|
|
size_t i;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!new)
|
|
|
|
return CL_EMEM;
|
|
|
|
assert(root && suffix);
|
|
|
|
|
|
|
|
new->rtype = 0;
|
|
|
|
new->type = 0;
|
|
|
|
new->sigid = 0;
|
|
|
|
new->parts = 0;
|
|
|
|
new->partno = 0;
|
|
|
|
new->mindist = 0;
|
|
|
|
new->maxdist = 0;
|
|
|
|
new->offset_min = CLI_OFF_ANY;
|
|
|
|
new->length[0] = (uint16_t)len;
|
|
|
|
|
|
|
|
new->ch[0] = new->ch[1] |= CLI_MATCH_IGNORE;
|
|
|
|
if (new->length[0] > root->maxpatlen)
|
|
|
|
root->maxpatlen = new->length[0];
|
|
|
|
|
2019-05-03 18:16:03 -04:00
|
|
|
new->pattern = MPOOL_MALLOC(matcher->mempool, sizeof(new->pattern[0]) * len);
|
2018-12-03 12:40:13 -05:00
|
|
|
if (!new->pattern) {
|
2019-05-03 18:16:03 -04:00
|
|
|
MPOOL_FREE(matcher->mempool, new);
|
2013-03-01 13:51:15 -05:00
|
|
|
cli_errmsg("add_newsuffix: Unable to allocate memory for new->pattern\n");
|
2018-12-03 12:40:13 -05:00
|
|
|
return CL_EMEM;
|
|
|
|
}
|
|
|
|
for (i = 0; i < len; i++)
|
|
|
|
new->pattern[i] = suffix[i]; /*new->pattern is short int* */
|
|
|
|
|
|
|
|
new->customdata = info;
|
|
|
|
new->virname = NULL;
|
|
|
|
if ((ret = cli_ac_addpatt(root, new))) {
|
2019-05-03 18:16:03 -04:00
|
|
|
MPOOL_FREE(matcher->mempool, new->pattern);
|
|
|
|
MPOOL_FREE(matcher->mempool, new);
|
2018-12-03 12:40:13 -05:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
filter_add_static(&matcher->filter, (const unsigned char *)suffix, len, "regex");
|
|
|
|
return CL_SUCCESS;
|
2006-09-12 19:38:39 +00:00
|
|
|
}
|
|
|
|
|
2008-07-23 13:51:57 +00:00
|
|
|
#define MODULE "regex_list: "
|
|
|
|
/* ------ load a regex, determine suffix, determine suffix2regexlist map ---- */
|
2006-09-12 19:38:39 +00:00
|
|
|
|
2008-07-29 15:37:23 +00:00
|
|
|
static void list_add_tail(struct regex_list_ht *ht, struct regex_list *regex)
|
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
if (!ht->head)
|
|
|
|
ht->head = regex;
|
|
|
|
if (ht->tail) {
|
|
|
|
ht->tail->nxt = regex;
|
|
|
|
}
|
|
|
|
ht->tail = regex;
|
2008-07-29 15:37:23 +00:00
|
|
|
}
|
|
|
|
|
2019-02-27 00:47:38 -05:00
|
|
|
static cl_error_t add_pattern_suffix(void *cbdata, const char *suffix, size_t suffix_len, const struct regex_list *iregex)
|
2006-09-12 19:38:39 +00:00
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
struct regex_matcher *matcher = cbdata;
|
|
|
|
struct regex_list *regex = cli_malloc(sizeof(*regex));
|
|
|
|
const struct cli_element *el;
|
|
|
|
void *tmp_matcher; /* save original address if OOM occurs */
|
2008-07-23 13:51:57 +00:00
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
assert(matcher);
|
|
|
|
if (!regex) {
|
2013-03-01 13:51:15 -05:00
|
|
|
cli_errmsg("add_pattern_suffix: Unable to allocate memory for regex\n");
|
2018-12-03 12:40:13 -05:00
|
|
|
return CL_EMEM;
|
2013-03-01 13:51:15 -05:00
|
|
|
}
|
2018-12-03 12:40:13 -05:00
|
|
|
regex->pattern = iregex->pattern ? cli_strdup(iregex->pattern) : NULL;
|
|
|
|
regex->preg = iregex->preg;
|
|
|
|
regex->nxt = NULL;
|
|
|
|
el = cli_hashtab_find(&matcher->suffix_hash, suffix, suffix_len);
|
|
|
|
/* TODO: what if suffixes are prefixes of eachother and only one will
|
2008-07-23 13:51:57 +00:00
|
|
|
* match? */
|
2018-12-03 12:40:13 -05:00
|
|
|
if (el) {
|
|
|
|
/* existing suffix */
|
|
|
|
assert((size_t)el->data < matcher->suffix_cnt);
|
|
|
|
list_add_tail(&matcher->suffix_regexes[el->data], regex);
|
|
|
|
} else {
|
|
|
|
/* new suffix */
|
|
|
|
size_t n = matcher->suffix_cnt++;
|
|
|
|
el = cli_hashtab_insert(&matcher->suffix_hash, suffix, suffix_len, n);
|
|
|
|
tmp_matcher = matcher->suffix_regexes; /* save the current value before cli_realloc() */
|
|
|
|
tmp_matcher = cli_realloc(matcher->suffix_regexes, (n + 1) * sizeof(*matcher->suffix_regexes));
|
|
|
|
if (!tmp_matcher) {
|
|
|
|
free(regex);
|
|
|
|
return CL_EMEM;
|
|
|
|
}
|
|
|
|
matcher->suffix_regexes = tmp_matcher; /* success, point at new memory location */
|
|
|
|
matcher->suffix_regexes[n].tail = regex;
|
|
|
|
matcher->suffix_regexes[n].head = regex;
|
|
|
|
if (suffix[0] == '/' && suffix[1] == '\0')
|
|
|
|
matcher->root_regex_idx = n;
|
|
|
|
add_newsuffix(matcher, regex, suffix, suffix_len);
|
|
|
|
}
|
2019-02-27 00:47:38 -05:00
|
|
|
return CL_SUCCESS;
|
2006-09-12 19:38:39 +00:00
|
|
|
}
|
|
|
|
|
2008-07-23 13:51:57 +00:00
|
|
|
static size_t reverse_string(char *pattern)
|
2006-09-12 19:38:39 +00:00
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
size_t len = strlen(pattern);
|
|
|
|
size_t i;
|
|
|
|
for (i = 0; i < (len / 2); i++) {
|
|
|
|
char aux = pattern[i];
|
|
|
|
pattern[i] = pattern[len - i - 1];
|
|
|
|
pattern[len - i - 1] = aux;
|
|
|
|
}
|
|
|
|
return len;
|
2006-09-12 19:38:39 +00:00
|
|
|
}
|
|
|
|
|
2008-07-29 10:36:26 +00:00
|
|
|
static regex_t *new_preg(struct regex_matcher *matcher)
|
2008-07-25 20:01:40 +00:00
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
regex_t *r;
|
2019-05-03 18:16:03 -04:00
|
|
|
matcher->all_pregs = MPOOL_REALLOC(matcher->mempool, matcher->all_pregs, ++matcher->regex_cnt * sizeof(*matcher->all_pregs));
|
2018-12-03 12:40:13 -05:00
|
|
|
if (!matcher->all_pregs) {
|
2013-03-01 13:51:15 -05:00
|
|
|
cli_errmsg("new_preg: Unable to reallocate memory\n");
|
2018-12-03 12:40:13 -05:00
|
|
|
return NULL;
|
2013-03-01 13:51:15 -05:00
|
|
|
}
|
2019-05-03 18:16:03 -04:00
|
|
|
r = MPOOL_MALLOC(matcher->mempool, sizeof(*r));
|
2018-12-03 12:40:13 -05:00
|
|
|
if (!r) {
|
2013-03-01 13:51:15 -05:00
|
|
|
cli_errmsg("new_preg: Unable to allocate memory\n");
|
2018-12-03 12:40:13 -05:00
|
|
|
return NULL;
|
2013-03-01 13:51:15 -05:00
|
|
|
}
|
2018-12-03 12:40:13 -05:00
|
|
|
matcher->all_pregs[matcher->regex_cnt - 1] = r;
|
|
|
|
return r;
|
2008-07-25 20:01:40 +00:00
|
|
|
}
|
|
|
|
|
2019-02-27 00:47:38 -05:00
|
|
|
static cl_error_t add_static_pattern(struct regex_matcher *matcher, char *pattern)
|
2006-09-12 19:38:39 +00:00
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
size_t len;
|
|
|
|
struct regex_list regex;
|
2019-02-27 00:47:38 -05:00
|
|
|
cl_error_t rc;
|
2018-12-03 12:40:13 -05:00
|
|
|
|
|
|
|
len = reverse_string(pattern);
|
|
|
|
regex.nxt = NULL;
|
|
|
|
regex.pattern = cli_strdup(pattern);
|
|
|
|
regex.preg = NULL;
|
|
|
|
rc = add_pattern_suffix(matcher, pattern, len, ®ex);
|
|
|
|
free(regex.pattern);
|
|
|
|
return rc;
|
2006-09-12 19:38:39 +00:00
|
|
|
}
|
|
|
|
|
2019-02-27 00:47:38 -05:00
|
|
|
cl_error_t regex_list_add_pattern(struct regex_matcher *matcher, char *pattern)
|
2006-09-12 19:38:39 +00:00
|
|
|
{
|
2019-02-27 00:47:38 -05:00
|
|
|
cl_error_t rc;
|
2018-12-03 12:40:13 -05:00
|
|
|
regex_t *preg;
|
|
|
|
size_t len;
|
|
|
|
/* we only match the host, so remove useless stuff */
|
|
|
|
const char remove_end[] = "([/?].*)?/";
|
|
|
|
const char remove_end2[] = "([/?].*)/";
|
|
|
|
|
|
|
|
len = strlen(pattern);
|
|
|
|
if (len > sizeof(remove_end)) {
|
|
|
|
if (strncmp(&pattern[len - sizeof(remove_end) + 1], remove_end, sizeof(remove_end) - 1) == 0) {
|
|
|
|
len -= sizeof(remove_end) - 1;
|
|
|
|
pattern[len++] = '/';
|
|
|
|
}
|
|
|
|
if (strncmp(&pattern[len - sizeof(remove_end2) + 1], remove_end2, sizeof(remove_end2) - 1) == 0) {
|
|
|
|
len -= sizeof(remove_end2) - 1;
|
|
|
|
pattern[len++] = '/';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
pattern[len] = '\0';
|
|
|
|
|
|
|
|
preg = new_preg(matcher);
|
|
|
|
if (!preg)
|
|
|
|
return CL_EMEM;
|
|
|
|
|
|
|
|
rc = cli_regex2suffix(pattern, preg, add_pattern_suffix, (void *)matcher);
|
|
|
|
if (rc) {
|
|
|
|
cli_regfree(preg);
|
|
|
|
}
|
|
|
|
|
|
|
|
return rc;
|
2006-09-12 19:38:39 +00:00
|
|
|
}
|