2008-07-24 18:48:31 +00:00
|
|
|
/*
|
|
|
|
* Parse a regular expression, and extract a static suffix.
|
|
|
|
*
|
2025-02-14 10:24:30 -05:00
|
|
|
* Copyright (C) 2013-2025 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
|
2019-01-25 10:15:50 -05:00
|
|
|
* Copyright (C) 2007-2013 Sourcefire, Inc.
|
2008-07-24 18:48:31 +00:00
|
|
|
*
|
|
|
|
* Authors: Török Edvin
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
|
|
* published by the Free Software Foundation.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
|
|
* MA 02110-1301, USA.
|
|
|
|
*/
|
|
|
|
#if HAVE_CONFIG_H
|
|
|
|
#include "clamav-config.h"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
2014-07-01 19:38:01 -04:00
|
|
|
#include "clamav.h"
|
2008-07-24 18:48:31 +00:00
|
|
|
#include "others.h"
|
|
|
|
#include "jsparse/textbuf.h"
|
|
|
|
#include "regex_suffix.h"
|
|
|
|
#define MODULE "regex_suffix: "
|
|
|
|
|
|
|
|
enum node_type {
|
2018-12-03 12:40:13 -05:00
|
|
|
root = 0,
|
|
|
|
concat,
|
|
|
|
alternate, /* | */
|
|
|
|
optional, /* ?, * */
|
|
|
|
leaf, /* a character */
|
|
|
|
leaf_class /* character class */
|
|
|
|
/* (x)+ is transformed into (x)*(x) */
|
2008-07-24 18:48:31 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
struct node {
|
2018-12-03 12:40:13 -05:00
|
|
|
enum node_type type; /* must be first field */
|
|
|
|
struct node *parent;
|
|
|
|
union {
|
|
|
|
struct {
|
|
|
|
struct node *left;
|
|
|
|
struct node *right;
|
|
|
|
} children;
|
|
|
|
uint8_t *leaf_class_bitmap;
|
|
|
|
uint8_t leaf_char;
|
|
|
|
} u;
|
2008-07-24 18:48:31 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
/* --- Prototypes --*/
|
2019-02-27 00:47:38 -05:00
|
|
|
static cl_error_t build_suffixtree_descend(struct node *n, struct text_buffer *buf, suffix_callback cb, void *cbdata, struct regex_list *regex);
|
2008-07-24 18:48:31 +00:00
|
|
|
/* -----------------*/
|
|
|
|
|
2008-08-04 12:44:16 +00:00
|
|
|
static uint8_t dot_bitmap[32] = {
|
2018-12-03 12:40:13 -05:00
|
|
|
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
|
|
|
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
|
|
|
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
|
|
|
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
|
2008-07-24 18:48:31 +00:00
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
static struct node *make_node(enum node_type type, struct node *left, struct node *right)
|
2008-07-24 18:48:31 +00:00
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
struct node *n;
|
|
|
|
if (type == concat) {
|
|
|
|
if (left == NULL)
|
|
|
|
return right;
|
|
|
|
if (right == NULL)
|
|
|
|
return left;
|
|
|
|
}
|
2022-05-08 14:59:09 -07:00
|
|
|
n = malloc(sizeof(*n));
|
2018-12-03 12:40:13 -05:00
|
|
|
if (!n) {
|
2013-03-01 13:51:15 -05:00
|
|
|
cli_errmsg("make_node: Unable to allocate memory for new node\n");
|
2018-12-03 12:40:13 -05:00
|
|
|
return NULL;
|
2013-03-01 13:51:15 -05:00
|
|
|
}
|
2018-12-03 12:40:13 -05:00
|
|
|
n->type = type;
|
|
|
|
n->parent = NULL;
|
|
|
|
n->u.children.left = left;
|
|
|
|
n->u.children.right = right;
|
|
|
|
if (left)
|
|
|
|
left->parent = n;
|
|
|
|
if (right)
|
|
|
|
right->parent = n;
|
|
|
|
return n;
|
2008-07-24 18:48:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct node *dup_node(struct node *p)
|
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
struct node *node_left, *node_right;
|
|
|
|
struct node *d;
|
2008-07-24 18:48:31 +00:00
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
if (!p)
|
|
|
|
return NULL;
|
2022-05-08 14:59:09 -07:00
|
|
|
d = malloc(sizeof(*d));
|
2018-12-03 12:40:13 -05:00
|
|
|
if (!d) {
|
2013-03-01 13:51:15 -05:00
|
|
|
cli_errmsg("dup_node: Unable to allocate memory for duplicate node\n");
|
2018-12-03 12:40:13 -05:00
|
|
|
return NULL;
|
2013-03-01 13:51:15 -05:00
|
|
|
}
|
2018-12-03 12:40:13 -05:00
|
|
|
d->type = p->type;
|
|
|
|
d->parent = NULL;
|
|
|
|
switch (p->type) {
|
|
|
|
case leaf:
|
|
|
|
d->u.leaf_char = p->u.leaf_char;
|
|
|
|
break;
|
|
|
|
case leaf_class:
|
2022-05-08 14:59:09 -07:00
|
|
|
d->u.leaf_class_bitmap = malloc(32);
|
2018-12-03 12:40:13 -05:00
|
|
|
if (!d->u.leaf_class_bitmap) {
|
2013-03-01 13:51:15 -05:00
|
|
|
cli_errmsg("make_node: Unable to allocate memory for leaf class\n");
|
2018-12-03 12:40:13 -05:00
|
|
|
free(d);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
memcpy(d->u.leaf_class_bitmap, p->u.leaf_class_bitmap, 32);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
node_left = dup_node(p->u.children.left);
|
|
|
|
node_right = dup_node(p->u.children.right);
|
|
|
|
d->u.children.left = node_left;
|
|
|
|
d->u.children.right = node_right;
|
|
|
|
if (node_left)
|
|
|
|
node_left->parent = d;
|
|
|
|
if (node_right)
|
|
|
|
node_right->parent = d;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return d;
|
2008-07-24 18:48:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct node *make_charclass(uint8_t *bitmap)
|
|
|
|
{
|
2023-02-03 15:04:04 -05:00
|
|
|
struct node *v = NULL;
|
|
|
|
if (NULL == bitmap) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2022-05-08 14:59:09 -07:00
|
|
|
v = malloc(sizeof(*v));
|
2018-12-03 12:40:13 -05:00
|
|
|
if (!v) {
|
2013-03-01 13:51:15 -05:00
|
|
|
cli_errmsg("make_charclass: Unable to allocate memory for character class\n");
|
2018-12-03 12:40:13 -05:00
|
|
|
return NULL;
|
2013-03-01 13:51:15 -05:00
|
|
|
}
|
2018-12-03 12:40:13 -05:00
|
|
|
v->type = leaf_class;
|
|
|
|
v->parent = NULL;
|
|
|
|
v->u.leaf_class_bitmap = bitmap;
|
|
|
|
return v;
|
2008-07-24 18:48:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct node *make_leaf(char c)
|
|
|
|
{
|
2022-05-08 14:59:09 -07:00
|
|
|
struct node *v = malloc(sizeof(*v));
|
2018-12-03 12:40:13 -05:00
|
|
|
if (!v)
|
|
|
|
return NULL;
|
|
|
|
v->type = leaf;
|
|
|
|
v->parent = NULL;
|
|
|
|
v->u.leaf_char = c;
|
|
|
|
return v;
|
2008-07-24 18:48:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void destroy_tree(struct node *n)
|
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
if (!n)
|
|
|
|
return;
|
|
|
|
switch (n->type) {
|
|
|
|
case concat:
|
|
|
|
case alternate:
|
|
|
|
case optional:
|
|
|
|
destroy_tree(n->u.children.left);
|
|
|
|
destroy_tree(n->u.children.right);
|
|
|
|
break;
|
|
|
|
case leaf_class:
|
|
|
|
if (n->u.leaf_class_bitmap != dot_bitmap)
|
2024-01-09 17:44:33 -05:00
|
|
|
CLI_FREE_AND_SET_NULL(n->u.leaf_class_bitmap);
|
2018-12-03 12:40:13 -05:00
|
|
|
break;
|
|
|
|
case root:
|
|
|
|
case leaf:
|
|
|
|
break;
|
|
|
|
}
|
2024-01-09 17:44:33 -05:00
|
|
|
CLI_FREE_AND_SET_NULL(n);
|
2008-07-24 18:48:31 +00:00
|
|
|
}
|
|
|
|
|
2023-02-03 15:04:04 -05:00
|
|
|
static uint8_t *parse_char_class(const uint8_t *pat, size_t patSize, size_t *pos)
|
2008-07-24 18:48:31 +00:00
|
|
|
{
|
2023-02-03 15:04:04 -05:00
|
|
|
|
|
|
|
#ifndef INC_POS
|
|
|
|
#define ADD_POS(posPtr, incVal, posMax) \
|
|
|
|
{ \
|
|
|
|
do { \
|
|
|
|
if (((*posPtr) + incVal) >= posMax) { \
|
|
|
|
cli_warnmsg("parse_char_class: Invalid char class\n"); \
|
2024-01-09 19:41:17 -05:00
|
|
|
CLI_FREE_AND_SET_NULL(bitmap); \
|
2023-02-03 15:04:04 -05:00
|
|
|
goto done; \
|
|
|
|
} \
|
|
|
|
(*posPtr)++; \
|
|
|
|
} while (0); \
|
|
|
|
}
|
|
|
|
|
|
|
|
#define INC_POS(posPtr, posMax) ADD_POS(posPtr, 1, posMax)
|
|
|
|
#endif
|
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
unsigned char range_start = 0;
|
|
|
|
int hasprev = 0;
|
2022-05-27 19:51:18 -04:00
|
|
|
uint8_t *bitmap = NULL;
|
|
|
|
|
2024-01-09 17:44:33 -05:00
|
|
|
CLI_MALLOC_OR_GOTO_DONE(bitmap, 32,
|
2024-01-09 19:41:17 -05:00
|
|
|
cli_errmsg("parse_char_class: Unable to allocate memory for bitmap\n"));
|
2022-05-27 19:51:18 -04:00
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
if (pat[*pos] == '^') {
|
|
|
|
memset(bitmap, 0xFF, 32); /*match chars not in brackets*/
|
2023-02-03 15:04:04 -05:00
|
|
|
INC_POS(pos, patSize);
|
2018-12-03 12:40:13 -05:00
|
|
|
} else
|
|
|
|
memset(bitmap, 0x00, 32);
|
|
|
|
do {
|
|
|
|
/* literal ] can be first character, so test for it at the end of the loop, for example: []] */
|
|
|
|
if (pat[*pos] == '-' && hasprev) {
|
|
|
|
/* it is a range*/
|
|
|
|
unsigned char range_end;
|
|
|
|
unsigned int c;
|
2022-05-16 21:29:25 -04:00
|
|
|
if (0 == range_start) {
|
2024-01-09 17:44:33 -05:00
|
|
|
CLI_FREE_AND_SET_NULL(bitmap);
|
2023-02-03 15:04:04 -05:00
|
|
|
cli_errmsg("parse_char_class: range_start not initialized\n");
|
2022-05-27 19:51:18 -04:00
|
|
|
goto done;
|
2022-05-16 21:29:25 -04:00
|
|
|
}
|
2023-02-03 15:04:04 -05:00
|
|
|
INC_POS(pos, patSize);
|
2018-12-03 12:40:13 -05:00
|
|
|
if (pat[*pos] == '[')
|
|
|
|
if (pat[*pos + 1] == '.') {
|
|
|
|
/* collating sequence not handled */
|
2024-01-09 17:44:33 -05:00
|
|
|
CLI_FREE_AND_SET_NULL(bitmap);
|
2018-12-03 12:40:13 -05:00
|
|
|
/* we are parsing the regex for a
|
2022-02-16 00:13:55 +01:00
|
|
|
* filter, be conservative and
|
|
|
|
* tell the filter that anything could
|
|
|
|
* match here */
|
2023-02-03 15:04:04 -05:00
|
|
|
while (pat[*pos] != ']') INC_POS(pos, patSize);
|
|
|
|
INC_POS(pos, patSize);
|
|
|
|
while (pat[*pos] != ']') INC_POS(pos, patSize);
|
2018-12-03 12:40:13 -05:00
|
|
|
return dot_bitmap;
|
|
|
|
} else
|
|
|
|
range_end = pat[*pos];
|
|
|
|
else
|
|
|
|
range_end = pat[*pos];
|
|
|
|
for (c = range_start + 1; c <= range_end; c++)
|
|
|
|
bitmap[c >> 3] ^= 1 << (c & 0x7);
|
|
|
|
hasprev = 0;
|
|
|
|
} else if (pat[*pos] == '[' && pat[*pos] == ':') {
|
|
|
|
/* char class */
|
2024-01-09 17:44:33 -05:00
|
|
|
CLI_FREE_AND_SET_NULL(bitmap);
|
2023-02-03 15:04:04 -05:00
|
|
|
while (pat[*pos] != ']') INC_POS(pos, patSize);
|
|
|
|
INC_POS(pos, patSize);
|
|
|
|
while (pat[*pos] != ']') INC_POS(pos, patSize);
|
2018-12-03 12:40:13 -05:00
|
|
|
return dot_bitmap;
|
|
|
|
} else {
|
|
|
|
bitmap[pat[*pos] >> 3] ^= 1 << (pat[*pos] & 0x7);
|
|
|
|
range_start = pat[*pos];
|
2023-02-03 15:04:04 -05:00
|
|
|
INC_POS(pos, patSize);
|
2018-12-03 12:40:13 -05:00
|
|
|
hasprev = 1;
|
|
|
|
}
|
|
|
|
} while (pat[*pos] != ']');
|
2022-05-27 19:51:18 -04:00
|
|
|
|
|
|
|
done:
|
2018-12-03 12:40:13 -05:00
|
|
|
return bitmap;
|
2023-02-03 15:04:04 -05:00
|
|
|
|
|
|
|
#undef ADD_POS
|
|
|
|
#undef INC_POS
|
2008-07-24 18:48:31 +00:00
|
|
|
}
|
|
|
|
|
2023-02-03 15:04:04 -05:00
|
|
|
static struct node *parse_regex(const uint8_t *p, const size_t pSize, size_t *last)
|
2008-07-24 18:48:31 +00:00
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
struct node *v = NULL;
|
|
|
|
struct node *right;
|
|
|
|
struct node *tmp;
|
2008-07-24 18:48:31 +00:00
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
while (p[*last] != '$' && p[*last] != '\0') {
|
|
|
|
switch (p[*last]) {
|
|
|
|
case '|':
|
|
|
|
++*last;
|
2023-02-03 15:04:04 -05:00
|
|
|
right = parse_regex(p, pSize, last);
|
2018-12-03 12:40:13 -05:00
|
|
|
v = make_node(alternate, v, right);
|
2022-05-27 19:51:18 -04:00
|
|
|
if (!v) {
|
|
|
|
destroy_tree(right);
|
2018-12-03 12:40:13 -05:00
|
|
|
return NULL;
|
2022-05-27 19:51:18 -04:00
|
|
|
}
|
2018-12-03 12:40:13 -05:00
|
|
|
break;
|
|
|
|
case '*':
|
|
|
|
case '?':
|
|
|
|
v = make_node(optional, v, NULL);
|
|
|
|
if (!v)
|
|
|
|
return NULL;
|
|
|
|
++*last;
|
|
|
|
break;
|
|
|
|
case '+':
|
|
|
|
/* (x)* */
|
|
|
|
tmp = make_node(optional, v, NULL);
|
2022-05-27 19:51:18 -04:00
|
|
|
if (!tmp) {
|
|
|
|
destroy_tree(v);
|
2018-12-03 12:40:13 -05:00
|
|
|
return NULL;
|
2022-05-27 19:51:18 -04:00
|
|
|
}
|
2018-12-03 12:40:13 -05:00
|
|
|
/* (x) */
|
|
|
|
right = dup_node(v);
|
2018-11-14 16:58:30 -05:00
|
|
|
if (!right) {
|
2022-05-27 19:51:18 -04:00
|
|
|
destroy_tree(tmp);
|
2018-12-03 12:40:13 -05:00
|
|
|
return NULL;
|
2018-11-14 16:58:30 -05:00
|
|
|
}
|
2018-12-03 12:40:13 -05:00
|
|
|
/* (x)*(x) => (x)+ */
|
|
|
|
v = make_node(concat, tmp, right);
|
2022-05-27 19:51:18 -04:00
|
|
|
if (!v) {
|
|
|
|
destroy_tree(right);
|
2018-12-03 12:40:13 -05:00
|
|
|
return NULL;
|
2022-05-27 19:51:18 -04:00
|
|
|
}
|
2018-12-03 12:40:13 -05:00
|
|
|
++*last;
|
|
|
|
break;
|
|
|
|
case '(':
|
|
|
|
++*last;
|
2023-02-03 15:04:04 -05:00
|
|
|
right = parse_regex(p, pSize, last);
|
2022-05-27 19:51:18 -04:00
|
|
|
if (!right) {
|
|
|
|
destroy_tree(v);
|
2018-12-03 12:40:13 -05:00
|
|
|
return NULL;
|
2022-05-27 19:51:18 -04:00
|
|
|
}
|
2018-12-03 12:40:13 -05:00
|
|
|
++*last;
|
|
|
|
v = make_node(concat, v, right);
|
|
|
|
break;
|
|
|
|
case ')':
|
|
|
|
return v;
|
|
|
|
case '.':
|
|
|
|
right = make_charclass(dot_bitmap);
|
2022-05-27 19:51:18 -04:00
|
|
|
if (!right) {
|
|
|
|
destroy_tree(v);
|
2018-12-03 12:40:13 -05:00
|
|
|
return NULL;
|
2022-05-27 19:51:18 -04:00
|
|
|
}
|
2018-12-03 12:40:13 -05:00
|
|
|
v = make_node(concat, v, right);
|
|
|
|
if (!v)
|
|
|
|
return NULL;
|
|
|
|
++*last;
|
|
|
|
break;
|
|
|
|
case '[':
|
|
|
|
++*last;
|
2023-02-03 15:04:04 -05:00
|
|
|
right = make_charclass(parse_char_class(p, pSize, last));
|
2022-05-27 19:51:18 -04:00
|
|
|
if (!right) {
|
|
|
|
destroy_tree(v);
|
2018-12-03 12:40:13 -05:00
|
|
|
return NULL;
|
2022-05-27 19:51:18 -04:00
|
|
|
}
|
2018-12-03 12:40:13 -05:00
|
|
|
v = make_node(concat, v, right);
|
2022-05-27 19:51:18 -04:00
|
|
|
if (!v) {
|
|
|
|
destroy_tree(right);
|
2018-12-03 12:40:13 -05:00
|
|
|
return NULL;
|
2022-05-27 19:51:18 -04:00
|
|
|
}
|
2018-12-03 12:40:13 -05:00
|
|
|
++*last;
|
|
|
|
break;
|
|
|
|
case '\\':
|
|
|
|
/* next char is escaped, advance pointer
|
2022-02-16 00:13:55 +01:00
|
|
|
* and let fall-through handle it */
|
2018-12-03 12:40:13 -05:00
|
|
|
++*last;
|
PE, ELF, Mach-O: code cleanup
The header parsing / executable metadata collecting functions for the
PE, ELF, and Mach-O file types were using `int` for the return type.
Mostly they were returning 0 for success and -1, -2, -3, or -4 for
failure. But in some cases they were returning cl_error_t enum values
for failure. Regardless, the function using them was treating 0 as
success and non-zero as failure, which it stored as -1 ... every time.
This commit switches them all to use cl_error_t. I am continuing to
storeo the final result as 0 / -1 in the `peinfo` struct, but outside of
that everything has been made consistent.
While I was working on that, I got a tad side tracked. I noticed that
the target type isn't an enum, or even a set of #defines. So I made an
enum and then changed the code that uses target types to use the enum.
I also removed the `target` parameter from a number of functions that
don't actually use it at all. Some recursion was masking the fact that
it was an unused parameter which is why there was no warning about it.
2022-08-28 18:41:04 -07:00
|
|
|
/* fall-through */
|
2018-12-03 12:40:13 -05:00
|
|
|
default:
|
|
|
|
right = make_leaf(p[*last]);
|
|
|
|
v = make_node(concat, v, right);
|
2022-05-27 19:51:18 -04:00
|
|
|
if (!v) {
|
|
|
|
destroy_tree(right);
|
2018-12-03 12:40:13 -05:00
|
|
|
return NULL;
|
2022-05-27 19:51:18 -04:00
|
|
|
}
|
2018-12-03 12:40:13 -05:00
|
|
|
++*last;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return v;
|
2008-07-24 18:48:31 +00:00
|
|
|
}
|
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
#define BITMAP_HASSET(b, i) (b[i >> 3] & (1 << (i & 7)))
|
2008-07-24 18:48:31 +00:00
|
|
|
|
2019-02-27 00:47:38 -05:00
|
|
|
static cl_error_t build_suffixtree_ascend(struct node *n, struct text_buffer *buf, struct node *prev, suffix_callback cb, void *cbdata, struct regex_list *regex)
|
2008-07-24 18:48:31 +00:00
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
size_t i, cnt;
|
|
|
|
while (n) {
|
|
|
|
struct node *q = n;
|
|
|
|
switch (n->type) {
|
|
|
|
case root:
|
|
|
|
textbuffer_putc(buf, '\0');
|
2019-05-04 16:00:29 -04:00
|
|
|
if (cb(cbdata, buf->data, buf->pos - 1, regex) != CL_SUCCESS)
|
2018-12-03 12:40:13 -05:00
|
|
|
return CL_EMEM;
|
2019-02-27 00:47:38 -05:00
|
|
|
return CL_SUCCESS;
|
2018-12-03 12:40:13 -05:00
|
|
|
case leaf:
|
|
|
|
textbuffer_putc(buf, n->u.leaf_char);
|
|
|
|
n = n->parent;
|
|
|
|
break;
|
|
|
|
case leaf_class:
|
|
|
|
cnt = 0;
|
|
|
|
for (i = 0; i < 255; i++)
|
|
|
|
if (BITMAP_HASSET(n->u.leaf_class_bitmap, i))
|
|
|
|
cnt++;
|
|
|
|
if (cnt > 16) {
|
|
|
|
textbuffer_putc(buf, '\0');
|
2019-05-04 16:00:29 -04:00
|
|
|
if (cb(cbdata, buf->data, buf->pos - 1, regex) != CL_SUCCESS)
|
2018-12-03 12:40:13 -05:00
|
|
|
return CL_EMEM;
|
2019-02-27 00:47:38 -05:00
|
|
|
return CL_SUCCESS;
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
|
|
|
/* handle small classes by expanding */
|
|
|
|
for (i = 0; i < 255; i++) {
|
|
|
|
if (BITMAP_HASSET(n->u.leaf_class_bitmap, i)) {
|
|
|
|
size_t pos;
|
|
|
|
pos = buf->pos;
|
|
|
|
textbuffer_putc(buf, (char)i);
|
2019-05-04 16:00:29 -04:00
|
|
|
if (build_suffixtree_ascend(n->parent, buf, n, cb, cbdata, regex) != CL_SUCCESS)
|
2018-12-03 12:40:13 -05:00
|
|
|
return CL_EMEM;
|
|
|
|
buf->pos = pos;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
case concat:
|
|
|
|
if (prev != n->u.children.left) {
|
2019-05-04 16:00:29 -04:00
|
|
|
if (build_suffixtree_descend(n->u.children.left, buf, cb, cbdata, regex) != CL_SUCCESS)
|
2018-12-03 12:40:13 -05:00
|
|
|
return CL_EMEM;
|
|
|
|
/* we're done here, descend will call
|
2022-02-16 00:13:55 +01:00
|
|
|
* ascend if needed */
|
2019-02-27 00:47:38 -05:00
|
|
|
return CL_SUCCESS;
|
2018-12-03 12:40:13 -05:00
|
|
|
} else {
|
|
|
|
n = n->parent;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case alternate:
|
|
|
|
n = n->parent;
|
|
|
|
break;
|
|
|
|
case optional:
|
|
|
|
textbuffer_putc(buf, '\0');
|
2019-05-04 16:00:29 -04:00
|
|
|
if (cb(cbdata, buf->data, buf->pos - 1, regex) != CL_SUCCESS)
|
2018-12-03 12:40:13 -05:00
|
|
|
return CL_EMEM;
|
2019-02-27 00:47:38 -05:00
|
|
|
return CL_SUCCESS;
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
|
|
|
prev = q;
|
|
|
|
}
|
2019-02-27 00:47:38 -05:00
|
|
|
return CL_SUCCESS;
|
2008-07-24 18:48:31 +00:00
|
|
|
}
|
|
|
|
|
2019-02-27 00:47:38 -05:00
|
|
|
static cl_error_t build_suffixtree_descend(struct node *n, struct text_buffer *buf, suffix_callback cb, void *cbdata, struct regex_list *regex)
|
2008-07-24 18:48:31 +00:00
|
|
|
{
|
2018-12-03 12:40:13 -05:00
|
|
|
size_t pos;
|
|
|
|
while (n && n->type == concat) {
|
|
|
|
n = n->u.children.right;
|
|
|
|
}
|
|
|
|
if (!n)
|
2019-02-27 00:47:38 -05:00
|
|
|
return CL_SUCCESS;
|
2018-12-03 12:40:13 -05:00
|
|
|
/* find out end of the regular expression,
|
2022-02-16 00:13:55 +01:00
|
|
|
* if it ends with a static pattern */
|
2018-12-03 12:40:13 -05:00
|
|
|
switch (n->type) {
|
|
|
|
case alternate:
|
|
|
|
/* save pos as restart point */
|
|
|
|
pos = buf->pos;
|
2019-05-04 16:00:29 -04:00
|
|
|
if (build_suffixtree_descend(n->u.children.left, buf, cb, cbdata, regex) != CL_SUCCESS)
|
2018-12-03 12:40:13 -05:00
|
|
|
return CL_EMEM;
|
|
|
|
buf->pos = pos;
|
2019-05-04 16:00:29 -04:00
|
|
|
if (build_suffixtree_descend(n->u.children.right, buf, cb, cbdata, regex) != CL_SUCCESS)
|
2018-12-03 12:40:13 -05:00
|
|
|
return CL_EMEM;
|
|
|
|
buf->pos = pos;
|
|
|
|
break;
|
|
|
|
case optional:
|
|
|
|
textbuffer_putc(buf, '\0');
|
2019-05-04 16:00:29 -04:00
|
|
|
if (cb(cbdata, buf->data, buf->pos - 1, regex) != CL_SUCCESS)
|
2018-12-03 12:40:13 -05:00
|
|
|
return CL_EMEM;
|
2019-02-27 00:47:38 -05:00
|
|
|
return CL_SUCCESS;
|
2018-12-03 12:40:13 -05:00
|
|
|
case leaf:
|
|
|
|
case leaf_class:
|
2019-05-04 16:00:29 -04:00
|
|
|
if (build_suffixtree_ascend(n, buf, NULL, cb, cbdata, regex) != CL_SUCCESS)
|
2018-12-03 12:40:13 -05:00
|
|
|
return CL_EMEM;
|
2019-02-27 00:47:38 -05:00
|
|
|
return CL_SUCCESS;
|
2018-12-03 12:40:13 -05:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
2019-02-27 00:47:38 -05:00
|
|
|
return CL_SUCCESS;
|
2008-07-24 18:48:31 +00:00
|
|
|
}
|
|
|
|
|
2019-02-27 00:47:38 -05:00
|
|
|
cl_error_t cli_regex2suffix(const char *pattern, regex_t *preg, suffix_callback cb, void *cbdata)
|
2008-07-24 18:48:31 +00:00
|
|
|
{
|
2022-05-16 21:29:25 -04:00
|
|
|
struct regex_list regex = {0};
|
|
|
|
struct text_buffer buf = {0};
|
|
|
|
struct node root_node = {0};
|
|
|
|
struct node *n = NULL;
|
|
|
|
size_t last = 0;
|
2018-12-03 12:40:13 -05:00
|
|
|
int rc;
|
2008-07-24 18:48:31 +00:00
|
|
|
|
2022-05-27 19:51:18 -04:00
|
|
|
if (NULL == pattern) {
|
2023-02-03 15:04:04 -05:00
|
|
|
cli_errmsg("cli_regex2suffix: pattern can't be NULL\n");
|
2022-05-27 19:51:18 -04:00
|
|
|
rc = REG_INVARG;
|
|
|
|
goto done;
|
|
|
|
}
|
2008-07-24 18:48:31 +00:00
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
regex.preg = preg;
|
|
|
|
rc = cli_regcomp(regex.preg, pattern, REG_EXTENDED);
|
|
|
|
if (rc) {
|
|
|
|
size_t buflen = cli_regerror(rc, regex.preg, NULL, 0);
|
2022-05-09 14:28:34 -07:00
|
|
|
char *errbuf = cli_max_malloc(buflen);
|
2018-12-03 12:40:13 -05:00
|
|
|
if (errbuf) {
|
|
|
|
cli_regerror(rc, regex.preg, errbuf, buflen);
|
|
|
|
cli_errmsg(MODULE "Error compiling regular expression %s: %s\n", pattern, errbuf);
|
|
|
|
free(errbuf);
|
|
|
|
} else {
|
|
|
|
cli_errmsg(MODULE "Error compiling regular expression: %s\n", pattern);
|
|
|
|
}
|
|
|
|
return rc;
|
|
|
|
}
|
2022-05-16 21:29:25 -04:00
|
|
|
regex.nxt = NULL;
|
2024-01-09 17:44:33 -05:00
|
|
|
CLI_SAFER_STRDUP_OR_GOTO_DONE(pattern, regex.pattern,
|
2024-01-09 19:41:17 -05:00
|
|
|
cli_errmsg("cli_regex2suffix: unable to strdup regex.pattern\n");
|
|
|
|
rc = REG_ESPACE);
|
2008-07-24 18:48:31 +00:00
|
|
|
|
2023-02-03 15:04:04 -05:00
|
|
|
n = parse_regex((const uint8_t *)pattern, strlen(pattern), &last);
|
2022-05-27 19:51:18 -04:00
|
|
|
if (!n) {
|
|
|
|
rc = REG_ESPACE;
|
|
|
|
goto done;
|
|
|
|
}
|
2018-12-03 12:40:13 -05:00
|
|
|
memset(&buf, 0, sizeof(buf));
|
2023-11-30 15:41:00 -08:00
|
|
|
memset(&root_node, 0, sizeof(root_node));
|
2018-12-03 12:40:13 -05:00
|
|
|
n->parent = &root_node;
|
2008-07-24 18:48:31 +00:00
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
rc = build_suffixtree_descend(n, &buf, cb, cbdata, ®ex);
|
2022-05-16 21:29:25 -04:00
|
|
|
|
|
|
|
done:
|
2024-01-09 17:44:33 -05:00
|
|
|
CLI_FREE_AND_SET_NULL(regex.pattern);
|
|
|
|
CLI_FREE_AND_SET_NULL(buf.data);
|
2018-12-03 12:40:13 -05:00
|
|
|
destroy_tree(n);
|
|
|
|
return rc;
|
2008-07-24 18:48:31 +00:00
|
|
|
}
|