2004-07-19 17:54:40 +00:00
/*
2025-02-14 10:24:30 -05:00
* Copyright ( C ) 2013 - 2025 Cisco Systems , Inc . and / or its affiliates . All rights reserved .
2019-01-25 10:15:50 -05:00
* Copyright ( C ) 2007 - 2013 Sourcefire , Inc .
2008-02-06 12:26:16 +00:00
*
2008-04-02 15:24:51 +00:00
* Authors : Tomasz Kojm
2004-07-19 17:54:40 +00:00
*
* This program is free software ; you can redistribute it and / or modify
2007-03-31 20:31:04 +00:00
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation .
2004-07-19 17:54:40 +00:00
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write to the Free Software
2006-04-09 19:59:28 +00:00
* Foundation , Inc . , 51 Franklin Street , Fifth Floor , Boston ,
* MA 02110 - 1301 , USA .
2004-07-19 17:54:40 +00:00
*/
# if HAVE_CONFIG_H
# include "clamav-config.h"
# endif
# include <stdio.h>
# include <string.h>
# include <stdlib.h>
2008-07-25 19:00:25 +00:00
# include <ctype.h>
2009-08-26 23:39:35 +02:00
# include <sys/stat.h>
2008-11-03 19:26:57 +00:00
# include <assert.h>
2015-05-14 12:23:56 -04:00
# ifdef HAVE_UNISTD_H
2004-07-19 17:54:40 +00:00
# include <unistd.h>
2006-10-09 15:23:50 +00:00
# endif
2004-07-19 17:54:40 +00:00
# include "clamav.h"
# include "others.h"
2004-09-14 01:33:32 +00:00
# include "matcher.h"
2004-07-19 17:54:40 +00:00
# include "matcher-ac.h"
# include "filetypes.h"
2007-04-28 18:40:59 +00:00
# include "str.h"
2008-07-03 11:19:21 +00:00
# include "readdb.h"
2008-12-29 17:55:30 +00:00
# include "default.h"
2010-02-10 11:39:47 +02:00
# include "filtering.h"
2004-07-19 17:54:40 +00:00
2008-10-17 17:00:13 +00:00
# include "mpool.h"
2018-12-03 12:37:58 -05:00
// clang-format off
2015-05-14 12:23:56 -04:00
# define AC_SPECIAL_ALT_CHAR 1
# define AC_SPECIAL_ALT_STR_FIXED 2
# define AC_SPECIAL_ALT_STR 3
# define AC_SPECIAL_LINE_MARKER 4
# define AC_SPECIAL_BOUNDARY 5
# define AC_SPECIAL_WORD_MARKER 6
# define AC_BOUNDARY_LEFT 0x0001
# define AC_BOUNDARY_LEFT_NEGATIVE 0x0002
# define AC_BOUNDARY_RIGHT 0x0004
# define AC_BOUNDARY_RIGHT_NEGATIVE 0x0008
# define AC_LINE_MARKER_LEFT 0x0010
# define AC_LINE_MARKER_LEFT_NEGATIVE 0x0020
# define AC_LINE_MARKER_RIGHT 0x0040
# define AC_LINE_MARKER_RIGHT_NEGATIVE 0x0080
# define AC_WORD_MARKER_LEFT 0x0100
# define AC_WORD_MARKER_LEFT_NEGATIVE 0x0200
# define AC_WORD_MARKER_RIGHT 0x0400
# define AC_WORD_MARKER_RIGHT_NEGATIVE 0x0800
2009-09-17 22:49:45 +02:00
static char boundary [ 256 ] = {
2019-05-03 18:16:03 -04:00
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 2 , 0 , 0 , 2 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
3 , 0 , 2 , 0 , 0 , 0 , 0 , 2 , 0 , 0 , 0 , 0 , 0 , 3 , 1 , 3 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 1 , 2 , 2 , 0 ,
1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 3 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
2009-09-17 22:49:45 +02:00
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0
} ;
2018-12-03 12:37:58 -05:00
// clang-format on
2015-02-10 09:23:51 -08:00
static inline int insert_list ( struct cli_matcher * root , struct cli_ac_patt * pattern , struct cli_ac_node * pt )
2004-07-19 17:54:40 +00:00
{
2020-07-31 16:04:38 -07:00
struct cli_ac_list * new ;
2015-02-10 09:23:51 -08:00
struct cli_ac_list * * newtable ;
2014-11-11 16:48:19 -05:00
2019-05-03 18:16:03 -04:00
new = ( struct cli_ac_list * ) MPOOL_CALLOC ( root - > mempool , 1 , sizeof ( struct cli_ac_list ) ) ;
2015-02-09 19:28:39 -08:00
if ( ! new ) {
2015-02-10 09:23:51 -08:00
cli_errmsg ( " cli_ac_addpatt: Can't allocate memory for list node \n " ) ;
2014-11-11 16:48:19 -05:00
return CL_EMEM ;
2007-04-28 18:40:59 +00:00
}
2019-11-08 14:05:08 -08:00
new - > me = pattern ;
new - > node = pt ;
2007-09-13 18:14:20 +00:00
2015-02-10 09:23:51 -08:00
root - > ac_lists + + ;
2019-05-03 18:16:03 -04:00
newtable = MPOOL_REALLOC ( root - > mempool , root - > ac_listtable , root - > ac_lists * sizeof ( struct cli_ac_list * ) ) ;
2018-12-03 12:40:13 -05:00
if ( ! newtable ) {
2015-02-10 09:23:51 -08:00
root - > ac_lists - - ;
cli_errmsg ( " cli_ac_addpatt: Can't realloc ac_listtable \n " ) ;
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , new ) ;
2015-02-10 09:23:51 -08:00
return CL_EMEM ;
}
2018-12-03 12:40:13 -05:00
root - > ac_listtable = newtable ;
2015-02-10 09:23:51 -08:00
root - > ac_listtable [ root - > ac_lists - 1 ] = new ;
2019-11-08 14:05:08 -08:00
return CL_SUCCESS ;
}
2015-02-10 09:23:51 -08:00
2019-11-08 14:05:08 -08:00
# define RETURN_RES_IF_NE(uia, uib) \
do { \
if ( uia < uib ) return - 1 ; \
if ( uia > uib ) return + 1 ; \
} while ( 0 )
2014-11-11 16:48:19 -05:00
2019-11-08 14:05:08 -08:00
static int patt_cmp_fn ( const struct cli_ac_patt * a , const struct cli_ac_patt * b )
{
unsigned int i ;
int res ;
RETURN_RES_IF_NE ( a - > length [ 0 ] , b - > length [ 0 ] ) ;
RETURN_RES_IF_NE ( a - > prefix_length [ 0 ] , b - > prefix_length [ 0 ] ) ;
RETURN_RES_IF_NE ( a - > ch [ 0 ] , b - > ch [ 0 ] ) ;
RETURN_RES_IF_NE ( a - > ch [ 1 ] , b - > ch [ 1 ] ) ;
RETURN_RES_IF_NE ( a - > boundary , b - > boundary ) ;
2022-06-01 16:46:36 -04:00
/*
* If the first two arguments to memcmp are NULL , clangs
* UndefinedBehaviorSanitizer will complain . It is legal if the length
* is zero , so don ' t call memcmp if the length is zero .
*/
if ( a - > length [ 0 ] > 0 ) {
res = memcmp ( a - > pattern , b - > pattern , a - > length [ 0 ] * sizeof ( uint16_t ) ) ;
if ( res ) {
return res ;
}
}
if ( a - > prefix_length [ 0 ] > 0 ) {
res = memcmp ( a - > prefix , b - > prefix , a - > prefix_length [ 0 ] * sizeof ( uint16_t ) ) ;
if ( res ) {
return res ;
}
}
2019-11-08 14:05:08 -08:00
RETURN_RES_IF_NE ( a - > special , b - > special ) ;
if ( ! a - > special & & ! b - > special )
return 0 ;
for ( i = 0 ; i < a - > special ; i + + ) {
struct cli_ac_special * spcl_a = a - > special_table [ i ] , * spcl_b = b - > special_table [ i ] ;
RETURN_RES_IF_NE ( spcl_a - > num , spcl_b - > num ) ;
RETURN_RES_IF_NE ( spcl_a - > negative , spcl_b - > negative ) ;
RETURN_RES_IF_NE ( spcl_a - > type , spcl_b - > type ) ;
if ( spcl_a - > type = = AC_SPECIAL_ALT_CHAR ) {
res = memcmp ( ( spcl_a - > alt ) . byte , ( spcl_b - > alt ) . byte , spcl_a - > num ) ;
if ( res ) return res ;
} else if ( spcl_a - > type = = AC_SPECIAL_ALT_STR_FIXED ) {
unsigned int j ;
RETURN_RES_IF_NE ( spcl_a - > len [ 0 ] , spcl_b - > len [ 0 ] ) ;
for ( j = 0 ; j < spcl_a - > num ; j + + ) {
res = memcmp ( ( spcl_a - > alt ) . f_str [ j ] , ( spcl_b - > alt ) . f_str [ j ] , spcl_a - > len [ 0 ] ) ;
if ( res ) return res ;
2014-11-11 16:48:19 -05:00
}
2019-11-08 14:05:08 -08:00
} else if ( spcl_a - > type = = AC_SPECIAL_ALT_STR ) {
struct cli_alt_node * alt_a = ( spcl_a - > alt ) . v_str , * alt_b = ( spcl_b - > alt ) . v_str ;
while ( alt_a & & alt_b ) {
RETURN_RES_IF_NE ( alt_a - > len , alt_b - > len ) ;
res = memcmp ( alt_a - > str , alt_b - > str , alt_a - > len ) ;
if ( res ) return res ;
alt_a = alt_a - > next ;
alt_b = alt_b - > next ;
}
RETURN_RES_IF_NE ( alt_a , alt_b ) ;
2014-11-11 16:48:19 -05:00
}
2019-11-08 14:05:08 -08:00
}
return 0 ;
}
static int sort_list_fn ( const void * a , const void * b )
{
const struct cli_ac_node * node_a = ( * ( const struct cli_ac_list * * ) a ) - > node ;
const struct cli_ac_node * node_b = ( * ( const struct cli_ac_list * * ) b ) - > node ;
const struct cli_ac_patt * patt_a = ( * ( const struct cli_ac_list * * ) a ) - > me ;
const struct cli_ac_patt * patt_b = ( * ( const struct cli_ac_list * * ) b ) - > me ;
int res ;
/* 1. Group by owning node
* ( this is for assigning entries to nodes ) */
RETURN_RES_IF_NE ( node_a , node_b ) ;
/* 2. Group together equal pattern in a node
* ( this is for building the next_same list ) */
res = patt_cmp_fn ( patt_a , patt_b ) ;
if ( res )
return res ;
/* 3. Sort equal patterns in a node by partno in ascending order
* ( this is required by the matcher ) */
RETURN_RES_IF_NE ( patt_a - > partno , patt_b - > partno ) ;
/* 4. Keep close patterns close
2024-01-19 09:08:36 -08:00
* ( this is for performance ) */
2019-11-08 14:05:08 -08:00
RETURN_RES_IF_NE ( patt_a , patt_b ) ;
return 0 ;
}
2014-11-11 16:48:19 -05:00
2019-11-08 14:05:08 -08:00
static int sort_heads_by_partno_fn ( const void * a , const void * b )
{
const struct cli_ac_list * list_a = * ( const struct cli_ac_list * * ) a ;
const struct cli_ac_list * list_b = * ( const struct cli_ac_list * * ) b ;
const struct cli_ac_patt * patt_a = list_a - > me ;
const struct cli_ac_patt * patt_b = list_b - > me ;
/* 1. Sort heads by partno
* ( this is required by the matcher ) */
RETURN_RES_IF_NE ( patt_a - > partno , patt_b - > partno ) ;
/* 2. Place longer lists earlier
* ( this is for performance ) */
while ( 1 ) {
if ( ! list_a - > next_same ) {
if ( ! list_b - > next_same )
break ;
return + 1 ;
}
if ( ! list_b - > next_same )
return - 1 ;
list_a = list_a - > next_same ;
list_b = list_b - > next_same ;
2007-09-13 18:14:20 +00:00
}
2019-11-08 14:05:08 -08:00
/* 3. Keep close patterns close
2024-01-19 09:08:36 -08:00
* ( this is for performance ) */
2019-11-08 14:05:08 -08:00
RETURN_RES_IF_NE ( patt_a , patt_b ) ;
return 0 ;
}
static inline void link_node_lists ( struct cli_ac_list * * listtable , unsigned int nentries )
{
struct cli_ac_list * prev = listtable [ 0 ] ;
struct cli_ac_node * node = prev - > node ;
unsigned int i , nheads = 1 ;
/* Link equal patterns in the next_same list (entries are already sorted by partno asc) */
for ( i = 1 ; i < nentries ; i + + ) {
int ret = patt_cmp_fn ( prev - > me , listtable [ i ] - > me ) ;
if ( ret ) {
/* This is a new head of a next_same chain */
prev = listtable [ i ] ;
if ( i ! = nheads ) {
/* Move heads towards the beginning of the table */
listtable [ i ] = listtable [ nheads ] ;
listtable [ nheads ] = prev ;
}
nheads + + ;
} else {
prev - > next_same = listtable [ i ] ;
prev - > next = NULL ;
prev = listtable [ i ] ;
}
2010-12-02 18:50:53 +01:00
}
2004-07-19 17:54:40 +00:00
2019-11-08 14:05:08 -08:00
cli_qsort ( listtable , nheads , sizeof ( listtable [ 0 ] ) , sort_heads_by_partno_fn ) ;
/* Link heads in the next list */
node - > list = listtable [ 0 ] ;
for ( i = 1 ; i < nheads ; i + + )
listtable [ i - 1 ] - > next = listtable [ i ] ;
listtable [ nheads - 1 ] - > next = NULL ;
}
static void link_lists ( struct cli_matcher * root )
{
struct cli_ac_node * curnode ;
unsigned int i , grouplen ;
if ( ! root - > ac_lists )
return ;
/* Group the list by owning node, pattern equality and sort by partno */
cli_qsort ( root - > ac_listtable , root - > ac_lists , sizeof ( root - > ac_listtable [ 0 ] ) , sort_list_fn ) ;
curnode = root - > ac_listtable [ 0 ] - > node ;
for ( i = 1 , grouplen = 1 ; i < = root - > ac_lists ; i + + , grouplen + + ) {
if ( i = = root - > ac_lists | | root - > ac_listtable [ i ] - > node ! = curnode ) {
link_node_lists ( & root - > ac_listtable [ i - grouplen ] , grouplen ) ;
if ( i < root - > ac_lists ) {
grouplen = 0 ;
curnode = root - > ac_listtable [ i ] - > node ;
}
}
}
2004-07-19 17:54:40 +00:00
}
2022-09-30 10:43:55 -07:00
/**
* @ brief Inserts newly malloced trans node in the array of nodes to be freed on
* cleanup . There is no verification that the added node is not already in the
* list , so that is up to the caller .
*
* @ param root The matcher root .
* @ param trans The trans node to be tracked .
* @ return bool
*/
2022-10-18 09:54:50 -07:00
static bool store_trans_node ( struct cli_matcher * root , struct cli_ac_node * * trans )
2022-09-30 10:43:55 -07:00
{
bool bRet = false ;
if ( root - > trans_cnt + 1 > root - > trans_capacity ) {
2022-10-18 09:54:50 -07:00
size_t newCapacity = root - > trans_capacity + 1024 ;
struct cli_ac_node * * * ret = MPOOL_REALLOC ( root - > mempool , root - > trans_array , newCapacity * sizeof ( struct cli_ac_node * * ) ) ;
2022-09-30 10:43:55 -07:00
if ( NULL = = ret ) {
cli_errmsg ( " cli_ac_addpatt: Can't allocate memory for cleanup storage of trans \n " ) ;
goto done ;
}
root - > trans_capacity = newCapacity ;
root - > trans_array = ret ;
}
root - > trans_array [ root - > trans_cnt + + ] = trans ;
bRet = true ;
done :
return bRet ;
}
/**
* @ brief Frees all trans nodes for cleanup .
* cleanup .
*
* @ param root The matcher root .
*/
static void free_trans_nodes ( struct cli_matcher * root )
{
uint32_t i = 0 ;
for ( i = 0 ; i < root - > trans_cnt ; i + + ) {
MPOOL_FREE ( root - > mempool , root - > trans_array [ i ] ) ;
}
MPOOL_FREE ( root - > mempool , root - > trans_array ) ;
root - > trans_array = NULL ;
root - > trans_cnt = 0 ;
root - > trans_capacity = 0 ;
}
2015-02-09 19:28:39 -08:00
static inline struct cli_ac_node * add_new_node ( struct cli_matcher * root , uint16_t i , uint16_t len )
{
struct cli_ac_node * new ;
struct cli_ac_node * * newtable ;
2019-05-03 18:16:03 -04:00
new = ( struct cli_ac_node * ) MPOOL_CALLOC ( root - > mempool , 1 , sizeof ( struct cli_ac_node ) ) ;
2018-12-03 12:40:13 -05:00
if ( ! new ) {
2015-02-09 19:28:39 -08:00
cli_errmsg ( " cli_ac_addpatt: Can't allocate memory for AC node \n " ) ;
return NULL ;
}
2018-12-03 12:40:13 -05:00
if ( i ! = len - 1 ) {
2019-05-03 18:16:03 -04:00
new - > trans = ( struct cli_ac_node * * ) MPOOL_CALLOC ( root - > mempool , 256 , sizeof ( struct cli_ac_node * ) ) ;
2018-12-03 12:40:13 -05:00
if ( ! new - > trans ) {
2015-02-09 19:28:39 -08:00
cli_errmsg ( " cli_ac_addpatt: Can't allocate memory for new->trans \n " ) ;
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , new ) ;
2015-02-09 19:28:39 -08:00
return NULL ;
}
2022-09-30 10:43:55 -07:00
if ( ! store_trans_node ( root , new - > trans ) ) {
/* Error printed in store_trans_node */
MPOOL_FREE ( root - > mempool , new ) ;
return NULL ;
}
2015-02-09 19:28:39 -08:00
}
root - > ac_nodes + + ;
2019-05-03 18:16:03 -04:00
newtable = MPOOL_REALLOC ( root - > mempool , root - > ac_nodetable , root - > ac_nodes * sizeof ( struct cli_ac_node * ) ) ;
2018-12-03 12:40:13 -05:00
if ( ! newtable ) {
2015-02-09 19:28:39 -08:00
root - > ac_nodes - - ;
cli_errmsg ( " cli_ac_addpatt: Can't realloc ac_nodetable \n " ) ;
2018-12-03 12:40:13 -05:00
if ( new - > trans )
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , new - > trans ) ;
MPOOL_FREE ( root - > mempool , new ) ;
2015-02-09 19:28:39 -08:00
return NULL ;
}
2018-12-03 12:40:13 -05:00
root - > ac_nodetable = newtable ;
2015-02-09 19:28:39 -08:00
root - > ac_nodetable [ root - > ac_nodes - 1 ] = new ;
return new ;
}
static int cli_ac_addpatt_recursive ( struct cli_matcher * root , struct cli_ac_patt * pattern , struct cli_ac_node * pt , uint16_t i , uint16_t len )
{
struct cli_ac_node * next ;
int ret ;
/* last node, insert pattern here (base case)*/
2018-12-03 12:40:13 -05:00
if ( i > = len ) {
2015-02-10 09:23:51 -08:00
return insert_list ( root , pattern , pt ) ;
2015-02-09 19:28:39 -08:00
}
/* if current node has no trans table, generate one */
2018-12-03 12:40:13 -05:00
if ( ! pt - > trans ) {
2019-05-03 18:16:03 -04:00
pt - > trans = ( struct cli_ac_node * * ) MPOOL_CALLOC ( root - > mempool , 256 , sizeof ( struct cli_ac_node * ) ) ;
2018-12-03 12:40:13 -05:00
if ( ! pt - > trans ) {
2015-02-09 19:28:39 -08:00
cli_errmsg ( " cli_ac_addpatt: Can't allocate memory for pt->trans \n " ) ;
return CL_EMEM ;
}
2022-09-30 10:43:55 -07:00
if ( ! store_trans_node ( root , pt - > trans ) ) {
/* Error printed in store_trans_node */
return CL_EMEM ;
}
2015-02-09 19:28:39 -08:00
}
/* if pattern is nocase, we need to enumerate all the combinations if applicable
* it ' s why this function was re - written to be recursive
*/
2021-08-31 16:55:39 +02:00
if ( ( pattern - > sigopts & ACPATT_OPTION_NOCASE ) & & ( pattern - > pattern [ i ] & 0xff ) < 0x80 & & isalpha ( ( unsigned char ) ( pattern - > pattern [ i ] & 0xff ) ) ) {
2019-08-22 16:51:01 -04:00
next = pt - > trans [ CLI_NOCASEI ( ( unsigned char ) ( pattern - > pattern [ i ] & 0xff ) ) ] ;
2018-12-03 12:40:13 -05:00
if ( ! next )
2015-02-09 19:28:39 -08:00
next = add_new_node ( root , i , len ) ;
2018-12-03 12:40:13 -05:00
if ( ! next )
2015-02-09 19:28:39 -08:00
return CL_EMEM ;
else
2019-08-22 16:51:01 -04:00
pt - > trans [ CLI_NOCASEI ( ( unsigned char ) ( pattern - > pattern [ i ] & 0xff ) ) ] = next ;
2015-02-09 19:28:39 -08:00
2018-12-03 12:40:13 -05:00
if ( ( ret = cli_ac_addpatt_recursive ( root , pattern , next , i + 1 , len ) ) ! = CL_SUCCESS )
2015-02-09 19:28:39 -08:00
return ret ;
}
/* normal transition, also enumerates the 'normal' nocase */
2018-12-03 12:40:13 -05:00
next = pt - > trans [ ( unsigned char ) ( pattern - > pattern [ i ] & 0xff ) ] ;
if ( ! next )
2015-02-09 19:28:39 -08:00
next = add_new_node ( root , i , len ) ;
2018-12-03 12:40:13 -05:00
if ( ! next )
2015-02-09 19:28:39 -08:00
return CL_EMEM ;
else
2018-12-03 12:40:13 -05:00
pt - > trans [ ( unsigned char ) ( pattern - > pattern [ i ] & 0xff ) ] = next ;
2015-02-09 19:28:39 -08:00
2018-12-03 12:40:13 -05:00
return cli_ac_addpatt_recursive ( root , pattern , next , i + 1 , len ) ;
2015-02-09 19:28:39 -08:00
}
2019-02-27 00:47:38 -05:00
cl_error_t cli_ac_addpatt ( struct cli_matcher * root , struct cli_ac_patt * pattern )
2015-02-09 19:28:39 -08:00
{
struct cli_ac_patt * * newtable ;
2015-07-02 14:41:37 -04:00
uint16_t len = MIN ( root - > ac_maxdepth , pattern - > length [ 0 ] ) ;
2018-11-16 11:50:48 -08:00
uint16_t i ;
2015-02-09 19:28:39 -08:00
2018-12-03 12:40:13 -05:00
for ( i = 0 ; i < len ; i + + ) {
if ( pattern - > pattern [ i ] & CLI_MATCH_WILDCARD ) {
2015-02-09 19:28:39 -08:00
len = i ;
break ;
}
}
2018-12-03 12:40:13 -05:00
if ( len < root - > ac_mindepth ) {
2015-02-09 19:28:39 -08:00
/* cli_errmsg("cli_ac_addpatt: Signature for %s is too short\n", pattern->virname); */
return CL_EMALFDB ;
}
/* pattern added to master list */
root - > ac_patterns + + ;
2019-05-03 18:16:03 -04:00
newtable = MPOOL_REALLOC ( root - > mempool , root - > ac_pattable , root - > ac_patterns * sizeof ( struct cli_ac_patt * ) ) ;
2018-12-03 12:40:13 -05:00
if ( ! newtable ) {
2015-02-09 19:28:39 -08:00
root - > ac_patterns - - ;
cli_errmsg ( " cli_ac_addpatt: Can't realloc ac_pattable \n " ) ;
return CL_EMEM ;
}
2018-12-03 12:40:13 -05:00
root - > ac_pattable = newtable ;
2015-02-09 19:28:39 -08:00
root - > ac_pattable [ root - > ac_patterns - 1 ] = pattern ;
pattern - > depth = len ;
return cli_ac_addpatt_recursive ( root , pattern , root - > ac_root , 0 , len ) ;
}
2007-04-28 18:40:59 +00:00
struct bfs_list {
struct cli_ac_node * node ;
struct bfs_list * next ;
} ;
2007-10-03 00:31:52 +00:00
static int bfs_enqueue ( struct bfs_list * * bfs , struct bfs_list * * last , struct cli_ac_node * n )
2004-07-19 17:54:40 +00:00
{
2014-11-11 16:48:19 -05:00
struct bfs_list * new ;
2004-07-19 17:54:40 +00:00
2022-05-08 14:59:09 -07:00
new = ( struct bfs_list * ) malloc ( sizeof ( struct bfs_list ) ) ;
2018-12-03 12:40:13 -05:00
if ( ! new ) {
2014-11-11 16:48:19 -05:00
cli_errmsg ( " bfs_enqueue: Can't allocate memory for bfs_list \n " ) ;
return CL_EMEM ;
2004-07-19 17:54:40 +00:00
}
2014-11-11 16:48:19 -05:00
2007-10-03 00:31:52 +00:00
new - > next = NULL ;
2004-07-19 17:54:40 +00:00
new - > node = n ;
2007-10-03 00:31:52 +00:00
2018-12-03 12:40:13 -05:00
if ( * last ) {
2014-11-11 16:48:19 -05:00
( * last ) - > next = new ;
2018-12-03 12:40:13 -05:00
* last = new ;
2007-10-03 00:31:52 +00:00
} else {
2014-11-11 16:48:19 -05:00
* bfs = * last = new ;
2007-10-03 00:31:52 +00:00
}
2007-04-28 18:40:59 +00:00
2006-05-18 11:29:24 +00:00
return CL_SUCCESS ;
2004-07-19 17:54:40 +00:00
}
2007-12-06 15:24:03 +00:00
static struct cli_ac_node * bfs_dequeue ( struct bfs_list * * bfs , struct bfs_list * * last )
2004-07-19 17:54:40 +00:00
{
2014-11-11 16:48:19 -05:00
struct bfs_list * lpt ;
struct cli_ac_node * pt ;
2004-07-19 17:54:40 +00:00
2018-12-03 12:40:13 -05:00
if ( ! ( lpt = * bfs ) ) {
2014-11-11 16:48:19 -05:00
return NULL ;
2004-07-19 17:54:40 +00:00
} else {
2014-11-11 16:48:19 -05:00
* bfs = ( * bfs ) - > next ;
2018-12-03 12:40:13 -05:00
pt = lpt - > node ;
2014-11-11 16:48:19 -05:00
2018-12-03 12:40:13 -05:00
if ( lpt = = * last )
2014-11-11 16:48:19 -05:00
* last = NULL ;
free ( lpt ) ;
return pt ;
2004-07-19 17:54:40 +00:00
}
}
2007-04-28 18:40:59 +00:00
static int ac_maketrans ( struct cli_matcher * root )
2004-07-19 17:54:40 +00:00
{
2014-11-11 16:48:19 -05:00
struct bfs_list * bfs = NULL , * bfs_last = NULL ;
struct cli_ac_node * ac_root = root - > ac_root , * child , * node , * fail ;
int i , ret ;
2004-07-19 17:54:40 +00:00
2018-12-03 12:40:13 -05:00
for ( i = 0 ; i < 256 ; i + + ) {
2014-11-11 16:48:19 -05:00
node = ac_root - > trans [ i ] ;
2018-12-03 12:40:13 -05:00
if ( ! node ) {
2014-11-11 16:48:19 -05:00
ac_root - > trans [ i ] = ac_root ;
} else {
node - > fail = ac_root ;
2018-12-03 12:40:13 -05:00
if ( ( ret = bfs_enqueue ( & bfs , & bfs_last , node ) ) )
2014-11-11 16:48:19 -05:00
return ret ;
}
2004-07-19 17:54:40 +00:00
}
2018-12-03 12:40:13 -05:00
while ( ( node = bfs_dequeue ( & bfs , & bfs_last ) ) ) {
if ( IS_LEAF ( node ) ) {
2014-11-11 16:48:19 -05:00
struct cli_ac_node * failtarget = node - > fail ;
while ( NULL ! = failtarget & & ( IS_LEAF ( failtarget ) | | ! IS_FINAL ( failtarget ) ) )
failtarget = failtarget - > fail ;
2014-01-02 17:19:05 -05:00
if ( NULL ! = failtarget )
node - > fail = failtarget ;
2014-11-11 16:48:19 -05:00
continue ;
}
2018-12-03 12:40:13 -05:00
for ( i = 0 ; i < 256 ; i + + ) {
2014-11-11 16:48:19 -05:00
child = node - > trans [ i ] ;
2018-12-03 12:40:13 -05:00
if ( child ) {
2014-11-11 16:48:19 -05:00
fail = node - > fail ;
2018-12-03 12:40:13 -05:00
while ( IS_LEAF ( fail ) | | ! fail - > trans [ i ] )
2014-11-11 16:48:19 -05:00
fail = fail - > fail ;
child - > fail = fail - > trans [ i ] ;
2018-12-03 12:40:13 -05:00
if ( ( ret = bfs_enqueue ( & bfs , & bfs_last , child ) ) ! = 0 )
2014-11-11 16:48:19 -05:00
return ret ;
}
}
2004-07-19 17:54:40 +00:00
}
2007-04-28 18:40:59 +00:00
2008-11-04 19:23:35 +00:00
bfs = bfs_last = NULL ;
2018-12-03 12:40:13 -05:00
for ( i = 0 ; i < 256 ; i + + ) {
2014-11-11 16:48:19 -05:00
node = ac_root - > trans [ i ] ;
2018-12-03 12:40:13 -05:00
if ( node ! = ac_root ) {
if ( ( ret = bfs_enqueue ( & bfs , & bfs_last , node ) ) )
2015-05-14 12:23:56 -04:00
return ret ;
2014-11-11 16:48:19 -05:00
}
2008-11-04 19:23:35 +00:00
}
2014-11-11 16:48:19 -05:00
2018-12-03 12:40:13 -05:00
while ( ( node = bfs_dequeue ( & bfs , & bfs_last ) ) ) {
if ( IS_LEAF ( node ) )
2014-11-11 16:48:19 -05:00
continue ;
2018-12-03 12:40:13 -05:00
for ( i = 0 ; i < 256 ; i + + ) {
2014-11-11 16:48:19 -05:00
child = node - > trans [ i ] ;
if ( ! child | | ( ! IS_FINAL ( child ) & & IS_LEAF ( child ) ) ) {
struct cli_ac_node * failtarget = node - > fail ;
2018-12-03 12:40:13 -05:00
while ( IS_LEAF ( failtarget ) | | ! failtarget - > trans [ i ] )
2014-11-11 16:48:19 -05:00
failtarget = failtarget - > fail ;
2018-12-03 12:40:13 -05:00
failtarget = failtarget - > trans [ i ] ;
2014-11-11 16:48:19 -05:00
node - > trans [ i ] = failtarget ;
} else if ( IS_FINAL ( child ) & & IS_LEAF ( child ) ) {
2015-02-10 09:23:51 -08:00
struct cli_ac_list * list ;
2014-11-11 16:48:19 -05:00
list = child - > list ;
if ( list ) {
while ( list - > next )
list = list - > next ;
list - > next = child - > fail - > list ;
} else {
child - > list = child - > fail - > list ;
}
child - > trans = child - > fail - > trans ;
} else {
2018-12-03 12:40:13 -05:00
if ( ( ret = bfs_enqueue ( & bfs , & bfs_last , child ) ) ! = 0 )
2014-11-11 16:48:19 -05:00
return ret ;
}
}
2008-11-04 19:23:35 +00:00
}
2006-05-18 11:29:24 +00:00
return CL_SUCCESS ;
2004-07-19 17:54:40 +00:00
}
2019-02-27 00:47:38 -05:00
cl_error_t cli_ac_buildtrie ( struct cli_matcher * root )
2004-07-19 17:54:40 +00:00
{
2018-12-03 12:40:13 -05:00
if ( ! root )
2014-11-11 16:48:19 -05:00
return CL_EMALFDB ;
2004-07-19 17:54:40 +00:00
2018-12-03 12:40:13 -05:00
if ( ! ( root - > ac_root ) ) {
2014-11-11 16:48:19 -05:00
cli_dbgmsg ( " cli_ac_buildtrie: AC pattern matcher is not initialised \n " ) ;
return CL_SUCCESS ;
2004-07-19 17:54:40 +00:00
}
2010-02-10 16:58:13 +02:00
if ( root - > filter )
2014-11-11 16:48:19 -05:00
cli_dbgmsg ( " Using filter for trie %d \n " , root - > type ) ;
2019-11-08 14:05:08 -08:00
link_lists ( root ) ;
2007-04-28 18:40:59 +00:00
return ac_maketrans ( root ) ;
2004-07-19 17:54:40 +00:00
}
2019-02-27 00:47:38 -05:00
cl_error_t cli_ac_init ( struct cli_matcher * root , uint8_t mindepth , uint8_t maxdepth , uint8_t dconf_prefiltering )
2004-07-19 17:54:40 +00:00
{
2008-11-03 19:26:57 +00:00
# ifdef USE_MPOOL
assert ( root - > mempool & & " mempool must be initialized " ) ;
# endif
2004-07-19 17:54:40 +00:00
2019-05-03 18:16:03 -04:00
root - > ac_root = ( struct cli_ac_node * ) MPOOL_CALLOC ( root - > mempool , 1 , sizeof ( struct cli_ac_node ) ) ;
2018-12-03 12:40:13 -05:00
if ( ! root - > ac_root ) {
2014-11-11 16:48:19 -05:00
cli_errmsg ( " cli_ac_init: Can't allocate memory for ac_root \n " ) ;
return CL_EMEM ;
2007-04-28 18:40:59 +00:00
}
2004-07-19 17:54:40 +00:00
2019-05-03 18:16:03 -04:00
root - > ac_root - > trans = ( struct cli_ac_node * * ) MPOOL_CALLOC ( root - > mempool , 256 , sizeof ( struct cli_ac_node * ) ) ;
2018-12-03 12:40:13 -05:00
if ( ! root - > ac_root - > trans ) {
2014-11-11 16:48:19 -05:00
cli_errmsg ( " cli_ac_init: Can't allocate memory for ac_root->trans \n " ) ;
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , root - > ac_root ) ;
2014-11-11 16:48:19 -05:00
return CL_EMEM ;
2004-07-19 17:54:40 +00:00
}
2007-04-28 18:40:59 +00:00
root - > ac_mindepth = mindepth ;
root - > ac_maxdepth = maxdepth ;
2010-02-15 15:01:37 +02:00
if ( cli_mtargets [ root - > type ] . enable_prefiltering & & dconf_prefiltering ) {
2019-05-03 18:16:03 -04:00
root - > filter = MPOOL_MALLOC ( root - > mempool , sizeof ( * root - > filter ) ) ;
2014-11-11 16:48:19 -05:00
if ( ! root - > filter ) {
cli_errmsg ( " cli_ac_init: Can't allocate memory for ac_root->filter \n " ) ;
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , root - > ac_root - > trans ) ;
MPOOL_FREE ( root - > mempool , root - > ac_root ) ;
2014-11-11 16:48:19 -05:00
return CL_EMEM ;
}
filter_init ( root - > filter ) ;
2010-02-10 11:39:47 +02:00
}
2007-04-28 18:40:59 +00:00
return CL_SUCCESS ;
2004-07-19 17:54:40 +00:00
}
2008-10-17 17:00:13 +00:00
# ifdef USE_MPOOL
2009-09-11 16:05:50 +02:00
# define mpool_ac_free_special(a, b) ac_free_special(a, b)
static void ac_free_special ( mpool_t * mempool , struct cli_ac_patt * p )
2008-10-17 17:00:13 +00:00
# else
2009-09-11 16:05:50 +02:00
# define mpool_ac_free_special(a, b) ac_free_special(b)
static void ac_free_special ( struct cli_ac_patt * p )
2008-10-17 17:00:13 +00:00
# endif
2007-10-03 00:31:52 +00:00
{
2015-05-13 17:48:22 -04:00
unsigned int i , j ;
struct cli_ac_special * a1 ;
struct cli_alt_node * b1 , * b2 ;
2007-10-03 00:31:52 +00:00
2018-12-03 12:40:13 -05:00
if ( ! p - > special )
2014-11-11 16:48:19 -05:00
return ;
2007-10-03 00:31:52 +00:00
2018-12-03 12:40:13 -05:00
for ( i = 0 ; i < p - > special ; i + + ) {
2014-11-11 16:48:19 -05:00
a1 = p - > special_table [ i ] ;
2015-05-14 12:23:56 -04:00
if ( a1 - > type = = AC_SPECIAL_ALT_CHAR ) {
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( mempool , ( a1 - > alt ) . byte ) ;
2015-05-14 12:23:56 -04:00
} else if ( a1 - > type = = AC_SPECIAL_ALT_STR_FIXED ) {
for ( j = 0 ; j < a1 - > num ; j + + )
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( mempool , ( a1 - > alt ) . f_str [ j ] ) ;
MPOOL_FREE ( mempool , ( a1 - > alt ) . f_str ) ;
2015-05-14 12:23:56 -04:00
} else if ( a1 - > type = = AC_SPECIAL_ALT_STR ) {
b1 = ( a1 - > alt ) . v_str ;
while ( b1 ) {
b2 = b1 - > next ;
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( mempool , b1 - > str ) ;
MPOOL_FREE ( mempool , b1 ) ;
2015-05-14 12:23:56 -04:00
b1 = b2 ;
}
}
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( mempool , a1 ) ;
2007-10-03 00:31:52 +00:00
}
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( mempool , p - > special_table ) ;
2007-10-03 00:31:52 +00:00
}
2005-09-23 02:23:36 +00:00
void cli_ac_free ( struct cli_matcher * root )
2004-07-19 17:54:40 +00:00
{
2022-05-16 21:29:25 -04:00
uint32_t i = 0 ;
struct cli_ac_patt * patt = NULL ;
2004-07-19 17:54:40 +00:00
2018-12-03 12:40:13 -05:00
for ( i = 0 ; i < root - > ac_patterns ; i + + ) {
2014-11-11 16:48:19 -05:00
patt = root - > ac_pattable [ i ] ;
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , patt - > prefix ? patt - > prefix : patt - > pattern ) ;
2021-04-21 16:24:24 -07:00
if ( ! ( patt - > lsigid [ 0 ] = = 1 ) ) {
/* Don't free the virname for patterns lsigs (normal or yara).
For lsigs , we store the virname in lsig - > virname , not in each ac - pattern .
TODO : never store the virname in the ac pattern and only store it per - signature , not per - pattern . */
MPOOL_FREE ( root - > mempool , patt - > virname ) ;
}
2022-05-16 21:29:25 -04:00
if ( patt - > special ) {
2014-11-11 16:48:19 -05:00
mpool_ac_free_special ( root - > mempool , patt ) ;
2022-05-16 21:29:25 -04:00
}
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , patt ) ;
2007-04-28 18:40:59 +00:00
}
2014-11-11 16:48:19 -05:00
2022-05-16 21:29:25 -04:00
if ( root - > ac_pattable ) {
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , root - > ac_pattable ) ;
2022-05-16 21:29:25 -04:00
}
2007-04-28 18:40:59 +00:00
2022-05-16 21:29:25 -04:00
if ( root - > ac_reloff ) {
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , root - > ac_reloff ) ;
2022-05-16 21:29:25 -04:00
}
2009-08-14 14:38:13 +02:00
2022-05-16 21:29:25 -04:00
for ( i = 0 ; i < root - > ac_lists ; i + + ) {
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , root - > ac_listtable [ i ] ) ;
2022-05-16 21:29:25 -04:00
}
2015-02-10 09:23:51 -08:00
2022-05-16 21:29:25 -04:00
if ( root - > ac_listtable ) {
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , root - > ac_listtable ) ;
2022-05-16 21:29:25 -04:00
}
2015-02-10 07:08:15 -08:00
2022-05-16 21:29:25 -04:00
for ( i = 0 ; i < root - > ac_nodes ; i + + ) {
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , root - > ac_nodetable [ i ] ) ;
2022-05-16 21:29:25 -04:00
}
2004-07-19 17:54:40 +00:00
2022-05-16 21:29:25 -04:00
if ( root - > ac_nodetable ) {
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , root - > ac_nodetable ) ;
2022-05-16 21:29:25 -04:00
}
2014-11-11 16:48:19 -05:00
2018-12-03 12:40:13 -05:00
if ( root - > ac_root ) {
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , root - > ac_root - > trans ) ;
MPOOL_FREE ( root - > mempool , root - > ac_root ) ;
2007-04-28 18:40:59 +00:00
}
2014-11-11 16:48:19 -05:00
2022-05-16 21:29:25 -04:00
if ( root - > filter ) {
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , root - > filter ) ;
2022-05-16 21:29:25 -04:00
}
2022-09-30 10:43:55 -07:00
free_trans_nodes ( root ) ;
2004-07-19 17:54:40 +00:00
}
2008-07-25 19:00:25 +00:00
/*
* In parse_only mode this function returns - 1 on error or the max subsig id
*/
2008-07-26 15:48:08 +00:00
int cli_ac_chklsig ( const char * expr , const char * end , uint32_t * lsigcnt , unsigned int * cnt , uint64_t * ids , unsigned int parse_only )
2008-07-25 19:00:25 +00:00
{
2014-11-11 16:48:19 -05:00
unsigned int i , len = end - expr , pth = 0 , opoff = 0 , op1off = 0 , val ;
unsigned int blkend = 0 , id , modval1 , modval2 = 0 , lcnt = 0 , rcnt = 0 , tcnt , modoff = 0 ;
uint64_t lids = 0 , rids = 0 , tids ;
int ret , lval , rval ;
char op = 0 , op1 = 0 , mod = 0 , blkmod = 0 ;
const char * lstart = expr , * lend = NULL , * rstart = NULL , * rend = end , * pt ;
2008-07-25 19:00:25 +00:00
2018-12-03 12:40:13 -05:00
for ( i = 0 ; i < len ; i + + ) {
switch ( expr [ i ] ) {
case ' ( ' :
pth + + ;
break ;
2014-11-11 16:48:19 -05:00
2018-12-03 12:40:13 -05:00
case ' ) ' :
if ( ! pth ) {
cli_errmsg ( " cli_ac_chklsig: Syntax error: Missing opening parenthesis \n " ) ;
return - 1 ;
}
pth - - ;
2022-06-04 12:08:51 -07:00
/* fall-through */
2014-11-11 16:48:19 -05:00
2018-12-03 12:40:13 -05:00
case ' > ' :
case ' < ' :
case ' = ' :
mod = expr [ i ] ;
modoff = i ;
break ;
2014-11-11 16:48:19 -05:00
2018-12-03 12:40:13 -05:00
default :
if ( strchr ( " &| " , expr [ i ] ) ) {
if ( ! pth ) {
op = expr [ i ] ;
opoff = i ;
} else if ( pth = = 1 ) {
op1 = expr [ i ] ;
op1off = i ;
}
2014-11-11 16:48:19 -05:00
}
}
2018-12-03 12:40:13 -05:00
if ( op )
2014-11-11 16:48:19 -05:00
break ;
2018-12-03 12:40:13 -05:00
if ( op1 & & ! pth ) {
2014-11-11 16:48:19 -05:00
blkend = i ;
2018-12-03 12:40:13 -05:00
if ( expr [ i + 1 ] = = ' > ' | | expr [ i + 1 ] = = ' < ' | | expr [ i + 1 ] = = ' = ' ) {
2014-11-11 16:48:19 -05:00
blkmod = expr [ i + 1 ] ;
ret = sscanf ( & expr [ i + 2 ] , " %u,%u " , & modval1 , & modval2 ) ;
2018-12-03 12:40:13 -05:00
if ( ret ! = 2 )
2014-11-11 16:48:19 -05:00
ret = sscanf ( & expr [ i + 2 ] , " %u " , & modval1 ) ;
2018-12-03 12:40:13 -05:00
if ( ! ret | | ret = = EOF ) {
2014-11-11 16:48:19 -05:00
cli_errmsg ( " chklexpr: Syntax error: Missing number after '%c' \n " , expr [ i + 1 ] ) ;
return - 1 ;
}
Increase limit for finding PE files embedded in other PE files
I am seeing missed detections since we changed to prohibit embedded
file type identification when inside an embedded file.
In particular, I'm seeing this issue with PE files that contain multiple
other MSEXE as well as a variety of false positives for PE file headers.
For example, imagine a PE with four concatenated DLL's, like so:
```
[ EXE file | DLL #1 | DLL #2 | DLL #3 | DLL #4 ]
```
And note that false positives for embedded MSEXE files are fairly common.
So there may be a few mixed in there.
Before limiting embedded file identification we might interpret the file
structure something like this:
```
MSEXE: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: DLL #1: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: DLL #2: {
embedded MSEXE #1: DLL #3: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: false positive,
embedded MSEXE #5: DLL #4
}
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: false positive,
embedded MSEXE #5: false positive,
embedded MSEXE #6: DLL #4
}
embedded MSEXE #3: DLL #3,
embedded MSEXE #4: false positive,
embedded MSEXE #5: false positive,
embedded MSEXE #6: false positive,
embedded MSEXE #7: false positive,
embedded MSEXE #8: DLL #4
}
}
```
This is obviously terrible, which is why why we don't allow detecting
embedded files within other embedded files.
So after we enforce that limit, the same file may be interpreted like
this instead:
```
MSEXE: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: DLL #1,
embedded MSEXE #5: false positive,
embedded MSEXE #6: DLL #2,
embedded MSEXE #7: DLL #3,
embedded MSEXE #8: false positive,
embedded MSEXE #9: false positive,
embedded MSEXE #10: false positive,
embedded MSEXE #11: false positive,
embedded MSEXE #12: DLL #4
}
```
That's great! Except that we now exceed the "MAX_EMBEDDED_OBJ" limit
for embedded type matches (limit 10, but 12 found). That means we won't
see or extract the 4th DLL anymore.
My solution is to lift the limit when adding an matched MSEXE type.
We already do this for matched ZIPSFX types.
While doing this, I've significantly tidied up the limits checks to
make it more readble, and removed duplicate checks from within the
`ac_addtype()` function.
CLAM-2897
2025-10-12 16:05:17 -04:00
for ( i + = 2 ; i + 1 < len & & ( isdigit ( expr [ i + 1 ] ) | | expr [ i + 1 ] = = ' , ' ) ; i + + ) {
continue ;
}
2014-11-11 16:48:19 -05:00
}
2018-12-03 12:40:13 -05:00
if ( & expr [ i + 1 ] = = rend )
2014-11-11 16:48:19 -05:00
break ;
else
blkmod = 0 ;
}
2008-07-25 19:00:25 +00:00
}
2018-12-03 12:40:13 -05:00
if ( pth ) {
2014-11-11 16:48:19 -05:00
cli_errmsg ( " cli_ac_chklsig: Syntax error: Missing closing parenthesis \n " ) ;
return - 1 ;
2008-07-25 19:00:25 +00:00
}
2018-12-03 12:40:13 -05:00
if ( ! op & & ! op1 ) {
if ( expr [ 0 ] = = ' ( ' )
2014-11-11 16:48:19 -05:00
return cli_ac_chklsig ( + + expr , - - end , lsigcnt , cnt , ids , parse_only ) ;
ret = sscanf ( expr , " %u " , & id ) ;
2018-12-03 12:40:13 -05:00
if ( ! ret | | ret = = EOF ) {
2014-11-11 16:48:19 -05:00
cli_errmsg ( " cli_ac_chklsig: Can't parse %s \n " , expr ) ;
return - 1 ;
}
2018-12-03 12:40:13 -05:00
if ( parse_only )
2014-11-11 16:48:19 -05:00
val = id ;
else
val = lsigcnt [ id ] ;
2018-12-03 12:40:13 -05:00
if ( mod ) {
pt = expr + modoff + 1 ;
2014-11-11 16:48:19 -05:00
ret = sscanf ( pt , " %u " , & modval1 ) ;
2018-12-03 12:40:13 -05:00
if ( ! ret | | ret = = EOF ) {
2014-11-11 16:48:19 -05:00
cli_errmsg ( " chklexpr: Syntax error: Missing number after '%c' \n " , mod ) ;
return - 1 ;
}
2018-12-03 12:40:13 -05:00
if ( ! parse_only ) {
switch ( mod ) {
case ' = ' :
if ( val ! = modval1 )
return 0 ;
break ;
case ' < ' :
if ( val > = modval1 )
return 0 ;
break ;
case ' > ' :
if ( val < = modval1 )
return 0 ;
break ;
default :
2014-11-11 16:48:19 -05:00
return 0 ;
}
* cnt + = val ;
2018-12-03 12:40:13 -05:00
* ids | = ( uint64_t ) 1 < < id ;
2014-11-11 16:48:19 -05:00
return 1 ;
}
}
2018-12-03 12:40:13 -05:00
if ( parse_only ) {
2014-11-11 16:48:19 -05:00
return val ;
} else {
2018-12-03 12:40:13 -05:00
if ( val ) {
2014-11-11 16:48:19 -05:00
* cnt + = val ;
2018-12-03 12:40:13 -05:00
* ids | = ( uint64_t ) 1 < < id ;
2014-11-11 16:48:19 -05:00
return 1 ;
} else {
return 0 ;
}
}
2008-07-25 19:00:25 +00:00
}
2018-12-03 12:40:13 -05:00
if ( ! op ) {
op = op1 ;
2014-11-11 16:48:19 -05:00
opoff = op1off ;
lstart + + ;
rend = & expr [ blkend ] ;
2008-07-25 19:00:25 +00:00
}
2018-12-03 12:40:13 -05:00
if ( ! opoff ) {
2014-11-11 16:48:19 -05:00
cli_errmsg ( " cli_ac_chklsig: Syntax error: Missing left argument \n " ) ;
return - 1 ;
2008-07-25 19:00:25 +00:00
}
2014-11-11 16:48:19 -05:00
2008-07-25 19:00:25 +00:00
lend = & expr [ opoff ] ;
2018-12-03 12:40:13 -05:00
if ( opoff + 1 = = len ) {
2014-11-11 16:48:19 -05:00
cli_errmsg ( " cli_ac_chklsig: Syntax error: Missing right argument \n " ) ;
return - 1 ;
2008-07-25 19:00:25 +00:00
}
2014-11-11 16:48:19 -05:00
2008-07-25 19:00:25 +00:00
rstart = & expr [ opoff + 1 ] ;
2008-07-26 15:48:08 +00:00
lval = cli_ac_chklsig ( lstart , lend , lsigcnt , & lcnt , & lids , parse_only ) ;
2018-12-03 12:40:13 -05:00
if ( lval = = - 1 ) {
2014-11-11 16:48:19 -05:00
cli_errmsg ( " cli_ac_chklsig: Calculation of lval failed \n " ) ;
return - 1 ;
2008-07-25 19:00:25 +00:00
}
2008-07-26 15:48:08 +00:00
rval = cli_ac_chklsig ( rstart , rend , lsigcnt , & rcnt , & rids , parse_only ) ;
2018-12-03 12:40:13 -05:00
if ( rval = = - 1 ) {
2014-11-11 16:48:19 -05:00
cli_errmsg ( " cli_ac_chklsig: Calculation of rval failed \n " ) ;
return - 1 ;
2008-07-25 19:00:25 +00:00
}
2018-12-03 12:40:13 -05:00
if ( parse_only ) {
switch ( op ) {
case ' & ' :
case ' | ' :
return MAX ( lval , rval ) ;
default :
cli_errmsg ( " cli_ac_chklsig: Incorrect operator type \n " ) ;
return - 1 ;
2014-11-11 16:48:19 -05:00
}
2008-07-25 19:00:25 +00:00
} else {
2018-12-03 12:40:13 -05:00
switch ( op ) {
case ' & ' :
ret = lval & & rval ;
break ;
case ' | ' :
ret = lval | | rval ;
break ;
default :
cli_errmsg ( " cli_ac_chklsig: Incorrect operator type \n " ) ;
return - 1 ;
2014-11-11 16:48:19 -05:00
}
2018-12-03 12:40:13 -05:00
if ( ! blkmod ) {
if ( ret ) {
2014-11-11 16:48:19 -05:00
* cnt + = lcnt + rcnt ;
* ids | = lids | rids ;
}
return ret ;
} else {
2018-12-03 12:40:13 -05:00
if ( ret ) {
2014-11-11 16:48:19 -05:00
tcnt = lcnt + rcnt ;
tids = lids | rids ;
} else {
tcnt = 0 ;
tids = 0 ;
}
2018-12-03 12:40:13 -05:00
switch ( blkmod ) {
case ' = ' :
if ( tcnt ! = modval1 )
return 0 ;
break ;
case ' < ' :
if ( tcnt > = modval1 )
return 0 ;
break ;
case ' > ' :
if ( tcnt < = modval1 )
return 0 ;
break ;
default :
2014-11-11 16:48:19 -05:00
return 0 ;
}
2018-12-03 12:40:13 -05:00
if ( modval2 ) {
2014-11-11 16:48:19 -05:00
val = 0 ;
2018-12-03 12:40:13 -05:00
while ( tids ) {
val + = tids & ( uint64_t ) 1 ;
2014-11-11 16:48:19 -05:00
tids > > = 1 ;
}
2018-12-03 12:40:13 -05:00
if ( val < modval2 )
2014-11-11 16:48:19 -05:00
return 0 ;
}
* cnt + = tcnt ;
return 1 ;
}
2008-07-25 19:00:25 +00:00
}
}
2015-07-06 19:05:36 -04:00
inline static int ac_findmatch_special ( const unsigned char * buffer , uint32_t offset , uint32_t bp , uint32_t fileoffset , uint32_t length ,
const struct cli_ac_patt * pattern , uint32_t pp , uint16_t specialcnt , uint32_t * start , uint32_t * end , int rev ) ;
static int ac_backward_match_branch ( const unsigned char * buffer , uint32_t bp , uint32_t offset , uint32_t length , uint32_t fileoffset ,
const struct cli_ac_patt * pattern , uint32_t pp , uint16_t specialcnt , uint32_t * start , uint32_t * end ) ;
static int ac_forward_match_branch ( const unsigned char * buffer , uint32_t bp , uint32_t offset , uint32_t length , uint32_t fileoffset ,
const struct cli_ac_patt * pattern , uint32_t pp , uint16_t specialcnt , uint32_t * start , uint32_t * end ) ;
2015-05-06 13:34:15 -04:00
2015-05-18 09:59:04 -04:00
/* call only by ac_findmatch_special! Does not handle recursive specials */
2018-12-03 12:40:13 -05:00
# define AC_MATCH_CHAR2(p, b) \
switch ( wc = p & CLI_MATCH_METADATA ) { \
case CLI_MATCH_CHAR : \
if ( ( unsigned char ) p ! = b ) \
match = 0 ; \
break ; \
\
case CLI_MATCH_NOCASE : \
2019-08-22 16:51:01 -04:00
if ( ( unsigned char ) ( p & 0xff ) ! = CLI_NOCASE ( b ) ) \
2018-12-03 12:40:13 -05:00
match = 0 ; \
break ; \
\
case CLI_MATCH_IGNORE : \
break ; \
\
case CLI_MATCH_NIBBLE_HIGH : \
if ( ( unsigned char ) ( p & 0x00f0 ) ! = ( b & 0xf0 ) ) \
match = 0 ; \
break ; \
\
case CLI_MATCH_NIBBLE_LOW : \
if ( ( unsigned char ) ( p & 0x000f ) ! = ( b & 0x0f ) ) \
match = 0 ; \
break ; \
\
default : \
cli_errmsg ( " ac_findmatch: Unknown metatype 0x%x \n " , wc ) ; \
match = 0 ; \
2015-05-18 09:59:04 -04:00
}
2015-07-06 19:05:36 -04:00
/* call only by ac_XX_match_branch! */
2018-12-03 12:40:13 -05:00
# define AC_MATCH_CHAR(p, b, rev) \
switch ( wc = p & CLI_MATCH_METADATA ) { \
case CLI_MATCH_CHAR : \
if ( ( unsigned char ) p ! = b ) \
match = 0 ; \
break ; \
\
case CLI_MATCH_NOCASE : \
2019-08-22 16:51:01 -04:00
if ( ( unsigned char ) ( p & 0xff ) ! = CLI_NOCASE ( b ) ) \
2018-12-03 12:40:13 -05:00
match = 0 ; \
break ; \
\
case CLI_MATCH_IGNORE : \
break ; \
\
case CLI_MATCH_SPECIAL : \
/* >1 = movement, 0 = fail, <1 = resolved in branch */ \
if ( ( match = ac_findmatch_special ( buffer , offset , bp , fileoffset , length , \
pattern , i , specialcnt , start , end , rev ) ) < = 0 ) \
return match ; \
\
if ( ! rev ) { \
bp + = ( match - 1 ) ; /* -1 is for bp++ in parent loop */ \
specialcnt + + ; \
} else { \
bp = bp + 1 - match ; /* +1 is for bp-- in parent loop */ \
specialcnt - - ; \
} \
\
break ; \
\
case CLI_MATCH_NIBBLE_HIGH : \
if ( ( unsigned char ) ( p & 0x00f0 ) ! = ( b & 0xf0 ) ) \
match = 0 ; \
break ; \
\
case CLI_MATCH_NIBBLE_LOW : \
if ( ( unsigned char ) ( p & 0x000f ) ! = ( b & 0x0f ) ) \
match = 0 ; \
break ; \
\
default : \
cli_errmsg ( " ac_findmatch: Unknown metatype 0x%x \n " , wc ) ; \
match = 0 ; \
2015-05-22 10:51:48 -04:00
}
2015-05-06 13:34:15 -04:00
/* special handler */
2015-07-06 19:05:36 -04:00
inline static int ac_findmatch_special ( const unsigned char * buffer , uint32_t offset , uint32_t bp , uint32_t fileoffset , uint32_t length ,
const struct cli_ac_patt * pattern , uint32_t pp , uint16_t specialcnt , uint32_t * start , uint32_t * end , int rev )
2015-05-06 13:34:15 -04:00
{
2015-05-13 17:48:22 -04:00
int match , cmp ;
2015-05-22 16:18:36 -04:00
uint16_t j , b = buffer [ bp ] ;
2015-05-18 09:59:04 -04:00
uint16_t wc ;
2015-07-06 19:05:36 -04:00
uint32_t subbp ;
2015-05-06 13:34:15 -04:00
struct cli_ac_special * special = pattern - > special_table [ specialcnt ] ;
2018-12-03 12:40:13 -05:00
struct cli_alt_node * alt = NULL ;
2015-05-06 13:34:15 -04:00
match = special - > negative ;
2018-12-03 12:40:13 -05:00
switch ( special - > type ) {
case AC_SPECIAL_ALT_CHAR : /* single-byte */
for ( j = 0 ; j < special - > num ; j + + ) {
cmp = b - ( special - > alt ) . byte [ j ] ;
if ( cmp = = 0 ) {
match = ! special - > negative ;
break ;
} else if ( cmp < 0 )
break ;
}
break ;
2015-05-11 11:55:43 -04:00
2018-12-03 12:40:13 -05:00
case AC_SPECIAL_ALT_STR_FIXED : /* fixed length multi-byte */
2015-07-06 19:05:36 -04:00
if ( ! rev ) {
2018-12-03 12:40:13 -05:00
if ( bp + special - > len [ 0 ] > length )
break ;
2015-07-06 19:05:36 -04:00
subbp = bp ;
} else {
2022-06-04 12:08:51 -07:00
if ( bp < ( uint32_t ) ( special - > len [ 0 ] - 1 ) )
2018-12-03 12:40:13 -05:00
break ;
2022-06-04 12:08:51 -07:00
subbp = bp - ( uint32_t ) ( special - > len [ 0 ] - 1 ) ;
2015-05-14 12:23:56 -04:00
}
2018-12-03 12:40:13 -05:00
match * = special - > len [ 0 ] ;
for ( j = 0 ; j < special - > num ; j + + ) {
cmp = memcmp ( & buffer [ subbp ] , ( special - > alt ) . f_str [ j ] , special - > len [ 0 ] ) ;
if ( cmp = = 0 ) {
match = ( ! special - > negative ) * special - > len [ 0 ] ;
2015-05-18 09:59:04 -04:00
break ;
2018-12-03 12:40:13 -05:00
} else if ( cmp < 0 )
2015-05-21 18:46:34 -04:00
break ;
2015-05-18 09:59:04 -04:00
}
2018-12-03 12:40:13 -05:00
break ;
2015-05-14 12:23:56 -04:00
2018-12-03 12:40:13 -05:00
case AC_SPECIAL_ALT_STR : /* generic */
alt = ( special - > alt ) . v_str ;
while ( alt ) {
if ( ! rev ) {
if ( bp + alt - > len > length ) {
alt = alt - > next ;
continue ;
}
subbp = bp ;
} else {
2022-06-04 12:08:51 -07:00
if ( bp < ( uint32_t ) ( alt - > len - 1 ) ) {
2018-12-03 12:40:13 -05:00
alt = alt - > next ;
continue ;
}
2022-06-04 12:08:51 -07:00
subbp = bp - ( uint32_t ) ( alt - > len - 1 ) ;
2018-12-03 12:40:13 -05:00
}
/* note that generic alternates CANNOT be negated */
match = 1 ;
for ( j = 0 ; j < alt - > len ; j + + ) {
AC_MATCH_CHAR2 ( alt - > str [ j ] , buffer [ subbp + j ] ) ;
if ( ! match )
break ;
}
if ( match ) {
/* if match is unique (has no derivatives), we can pass it directly back */
if ( alt - > unique ) {
match = alt - > len ;
break ;
}
/* branch for backtracking */
if ( ! rev )
match = ac_forward_match_branch ( buffer , subbp + alt - > len , offset , fileoffset , length , pattern , pp + 1 , specialcnt + 1 , start , end ) ;
else
match = ac_backward_match_branch ( buffer , subbp - 1 , offset , fileoffset , length , pattern , pp - 1 , specialcnt - 1 , start , end ) ;
if ( match )
return - 1 ; /* alerts caller that match has been resolved in child callee */
}
alt = alt - > next ;
}
break ;
2015-05-06 13:34:15 -04:00
2018-12-03 12:40:13 -05:00
case AC_SPECIAL_LINE_MARKER :
if ( b = = ' \n ' )
match = ! special - > negative ;
else if ( b = = ' \r ' & & ( bp + 1 < length & & buffer [ bp + 1 ] = = ' \n ' ) )
match = ( ! special - > negative ) * 2 ;
break ;
2015-05-06 13:34:15 -04:00
2018-12-03 12:40:13 -05:00
case AC_SPECIAL_BOUNDARY :
if ( boundary [ b ] )
match = ! special - > negative ;
break ;
2015-05-06 13:34:15 -04:00
2018-12-03 12:40:13 -05:00
case AC_SPECIAL_WORD_MARKER :
if ( ! isalnum ( b ) )
match = ! special - > negative ;
break ;
2015-05-06 13:34:15 -04:00
2018-12-03 12:40:13 -05:00
default :
cli_errmsg ( " ac_findmatch: Unknown special \n " ) ;
match = 0 ;
2015-05-06 13:34:15 -04:00
}
return match ;
}
/* state should reset on call, recursion depth = number of alternate specials */
2015-07-06 19:05:36 -04:00
/* each loop iteration starts on the NEXT sequence to be validated */
static int ac_backward_match_branch ( const unsigned char * buffer , uint32_t bp , uint32_t offset , uint32_t fileoffset , uint32_t length ,
const struct cli_ac_patt * pattern , uint32_t pp , uint16_t specialcnt , uint32_t * start , uint32_t * end )
2015-05-06 13:34:15 -04:00
{
2017-12-20 08:27:21 -05:00
int match = 0 ;
2015-05-06 13:34:15 -04:00
uint16_t wc , i ;
2015-07-06 19:05:36 -04:00
uint32_t filestart ;
2007-05-02 09:17:46 +00:00
2015-07-06 19:05:36 -04:00
/* backwards (prefix) validation, determines start */
2018-12-03 12:40:13 -05:00
if ( pattern - > prefix & & pattern - > prefix_length [ 0 ] ) {
2015-07-06 19:05:36 -04:00
match = 1 ;
2014-11-11 16:48:19 -05:00
2015-07-06 19:05:36 -04:00
for ( i = pp ; 1 ; i - - ) {
2018-12-03 12:40:13 -05:00
AC_MATCH_CHAR ( pattern - > prefix [ i ] , buffer [ bp ] , 1 ) ;
if ( ! match )
2015-07-06 19:05:36 -04:00
return 0 ;
/* needs to perform check before decrement due to unsignedness */
if ( i = = 0 | | bp = = 0 )
break ;
bp - - ;
}
2018-12-03 12:40:13 -05:00
* start = bp ;
2015-07-06 19:05:36 -04:00
filestart = fileoffset - offset + bp ;
} else {
/* bp is set to buffer offset */
* start = bp = offset ;
2018-12-03 12:40:13 -05:00
filestart = fileoffset ;
2004-07-19 17:54:40 +00:00
}
2015-07-06 19:05:36 -04:00
/* left-side special checks, bp = start */
2018-12-03 12:40:13 -05:00
if ( pattern - > boundary & AC_BOUNDARY_LEFT ) {
2014-11-11 16:48:19 -05:00
match = ! ! ( pattern - > boundary & AC_BOUNDARY_LEFT_NEGATIVE ) ;
2018-12-03 12:40:13 -05:00
if ( ! filestart | | ( bp & & ( boundary [ buffer [ bp - 1 ] ] = = 1 | | boundary [ buffer [ bp - 1 ] ] = = 3 ) ) )
2014-11-11 16:48:19 -05:00
match = ! match ;
2018-12-03 12:40:13 -05:00
if ( ! match )
2014-11-11 16:48:19 -05:00
return 0 ;
2009-09-17 22:49:45 +02:00
}
2018-12-03 12:40:13 -05:00
if ( pattern - > boundary & AC_LINE_MARKER_LEFT ) {
2015-07-06 19:05:36 -04:00
match = ! ! ( pattern - > boundary & AC_LINE_MARKER_LEFT_NEGATIVE ) ;
2018-12-03 12:40:13 -05:00
if ( ! filestart | | ( bp & & ( buffer [ bp - 1 ] = = ' \n ' ) ) )
2014-11-11 16:48:19 -05:00
match = ! match ;
2018-12-03 12:40:13 -05:00
if ( ! match )
2014-11-11 16:48:19 -05:00
return 0 ;
2009-09-17 22:49:45 +02:00
}
2018-12-03 12:40:13 -05:00
if ( pattern - > boundary & AC_WORD_MARKER_LEFT ) {
2015-07-06 19:05:36 -04:00
match = ! ! ( pattern - > boundary & AC_WORD_MARKER_LEFT_NEGATIVE ) ;
2018-12-03 12:40:13 -05:00
if ( ! filestart )
2015-07-06 19:05:36 -04:00
match = ! match ;
2018-12-03 12:40:13 -05:00
else if ( pattern - > sigopts & ACPATT_OPTION_WIDE ) {
if ( filestart - 1 = = 0 )
2015-07-06 19:05:36 -04:00
match = ! match ;
2018-12-03 12:40:13 -05:00
if ( bp - 1 & & bp & & ! ( isalnum ( buffer [ bp - 2 ] ) & & buffer [ bp - 1 ] = = ' \0 ' ) )
2015-07-06 19:05:36 -04:00
match = ! match ;
2018-12-03 12:40:13 -05:00
} else if ( bp & & ! isalnum ( buffer [ bp - 1 ] ) )
2014-11-11 16:48:19 -05:00
match = ! match ;
2018-12-03 12:40:13 -05:00
if ( ! match )
2014-11-11 16:48:19 -05:00
return 0 ;
2009-09-25 10:38:10 +02:00
}
2015-07-06 19:05:36 -04:00
/* bp is shifted for left anchor check, thus invalidated as pattern start */
2018-12-03 12:40:13 -05:00
if ( ! ( pattern - > ch [ 0 ] & CLI_MATCH_IGNORE ) ) {
if ( pattern - > ch_mindist [ 0 ] + ( uint32_t ) 1 > bp )
2015-07-06 19:05:36 -04:00
return 0 ;
bp - = pattern - > ch_mindist [ 0 ] + 1 ;
2018-12-03 12:40:13 -05:00
for ( i = pattern - > ch_mindist [ 0 ] ; i < = pattern - > ch_maxdist [ 0 ] ; i + + ) {
2015-07-06 19:05:36 -04:00
match = 1 ;
2018-12-03 12:40:13 -05:00
AC_MATCH_CHAR ( pattern - > ch [ 0 ] , buffer [ bp ] , 1 ) ;
if ( match )
2015-07-06 19:05:36 -04:00
break ;
2014-11-11 16:48:19 -05:00
2018-12-03 12:40:13 -05:00
if ( ! bp )
2015-07-06 19:05:36 -04:00
return 0 ;
else
bp - - ;
}
2018-12-03 12:40:13 -05:00
if ( ! match )
2014-11-11 16:48:19 -05:00
return 0 ;
2009-09-25 10:38:10 +02:00
}
2015-07-06 19:05:36 -04:00
return 1 ;
}
/* state should reset on call, recursion depth = number of alternate specials */
/* each loop iteration starts on the NEXT sequence to validate */
static int ac_forward_match_branch ( const unsigned char * buffer , uint32_t bp , uint32_t offset , uint32_t fileoffset , uint32_t length ,
const struct cli_ac_patt * pattern , uint32_t pp , uint16_t specialcnt , uint32_t * start , uint32_t * end )
{
int match ;
uint16_t wc , i ;
match = 1 ;
/* forward (pattern) validation; determines end */
2018-12-03 12:40:13 -05:00
for ( i = pp ; i < pattern - > length [ 0 ] & & bp < length ; i + + ) {
AC_MATCH_CHAR ( pattern - > pattern [ i ] , buffer [ bp ] , 0 ) ;
2015-07-06 19:05:36 -04:00
if ( ! match )
return 0 ;
bp + + ;
}
* end = bp ;
/* right-side special checks, bp = end */
2018-12-03 12:40:13 -05:00
if ( pattern - > boundary & AC_BOUNDARY_RIGHT ) {
2015-07-06 19:05:36 -04:00
match = ! ! ( pattern - > boundary & AC_BOUNDARY_RIGHT_NEGATIVE ) ;
2018-12-03 12:40:13 -05:00
if ( ( length < = SCANBUFF ) & & ( bp = = length | | boundary [ buffer [ bp ] ] > = 2 ) )
2015-02-26 11:21:34 -05:00
match = ! match ;
2015-07-06 19:05:36 -04:00
2018-12-03 12:40:13 -05:00
if ( ! match )
2015-07-06 19:05:36 -04:00
return 0 ;
}
2018-12-03 12:40:13 -05:00
if ( pattern - > boundary & AC_LINE_MARKER_RIGHT ) {
2015-07-06 19:05:36 -04:00
match = ! ! ( pattern - > boundary & AC_LINE_MARKER_RIGHT_NEGATIVE ) ;
2018-12-03 12:40:13 -05:00
if ( ( length < = SCANBUFF ) & & ( bp = = length | | buffer [ bp ] = = ' \n ' | | ( buffer [ bp ] = = ' \r ' & & bp + 1 < length & & buffer [ bp + 1 ] = = ' \n ' ) ) )
2015-02-20 15:40:36 -05:00
match = ! match ;
2018-12-03 12:40:13 -05:00
if ( ! match )
2015-02-20 15:40:36 -05:00
return 0 ;
}
2018-12-03 12:40:13 -05:00
if ( pattern - > boundary & AC_WORD_MARKER_RIGHT ) {
2015-02-20 15:40:36 -05:00
match = ! ! ( pattern - > boundary & AC_WORD_MARKER_RIGHT_NEGATIVE ) ;
2018-12-03 12:40:13 -05:00
if ( length < = SCANBUFF ) {
if ( bp = = length )
2015-02-26 11:21:34 -05:00
match = ! match ;
2018-12-03 12:40:13 -05:00
else if ( ( pattern - > sigopts & ACPATT_OPTION_WIDE ) & & ( bp + 1 < length ) ) {
if ( ! ( isalnum ( buffer [ bp ] ) & & buffer [ bp + 1 ] = = ' \0 ' ) )
2015-02-26 11:21:34 -05:00
match = ! match ;
2018-12-03 12:40:13 -05:00
} else if ( ! isalnum ( buffer [ bp ] ) )
2015-02-26 11:21:34 -05:00
match = ! match ;
}
2015-02-20 15:40:36 -05:00
2018-12-03 12:40:13 -05:00
if ( ! match )
2015-02-20 15:40:36 -05:00
return 0 ;
}
2015-07-06 19:05:36 -04:00
/* bp is shifted for right anchor check, thus invalidated as pattern right-side */
2018-12-03 12:40:13 -05:00
if ( ! ( pattern - > ch [ 1 ] & CLI_MATCH_IGNORE ) ) {
2014-11-11 16:48:19 -05:00
bp + = pattern - > ch_mindist [ 1 ] ;
2018-12-03 12:40:13 -05:00
for ( i = pattern - > ch_mindist [ 1 ] ; i < = pattern - > ch_maxdist [ 1 ] ; i + + ) {
if ( bp > = length )
2014-11-11 16:48:19 -05:00
return 0 ;
match = 1 ;
2018-12-03 12:40:13 -05:00
AC_MATCH_CHAR ( pattern - > ch [ 1 ] , buffer [ bp ] , 0 ) ;
if ( match )
2014-11-11 16:48:19 -05:00
break ;
bp + + ;
}
2018-12-03 12:40:13 -05:00
if ( ! match )
2014-11-11 16:48:19 -05:00
return 0 ;
2008-02-06 12:26:16 +00:00
}
2018-12-03 12:40:13 -05:00
return ac_backward_match_branch ( buffer , offset - 1 , offset , fileoffset , length , pattern , pattern - > prefix_length [ 0 ] - 1 , pattern - > special_pattern - 1 , start , end ) ;
2004-07-19 17:54:40 +00:00
}
2015-07-06 19:05:36 -04:00
inline static int ac_findmatch ( const unsigned char * buffer , uint32_t offset , uint32_t fileoffset , uint32_t length , const struct cli_ac_patt * pattern , uint32_t * start , uint32_t * end )
2015-05-06 13:34:15 -04:00
{
int match ;
uint16_t specialcnt = pattern - > special_pattern ;
2015-07-06 19:05:36 -04:00
/* minimal check as the maximum variable length may exceed the buffer */
2018-12-03 12:40:13 -05:00
if ( ( offset + pattern - > length [ 1 ] > length ) | | ( pattern - > prefix_length [ 1 ] > offset ) )
2015-05-06 13:34:15 -04:00
return 0 ;
2018-12-03 12:40:13 -05:00
match = ac_forward_match_branch ( buffer , offset + pattern - > depth , offset , fileoffset , length , pattern , pattern - > depth , specialcnt , start , end ) ;
if ( match )
2015-05-14 12:23:56 -04:00
return 1 ;
2015-05-06 13:34:15 -04:00
return 0 ;
}
2019-02-27 00:47:38 -05:00
cl_error_t cli_ac_initdata ( struct cli_ac_data * data , uint32_t partsigs , uint32_t lsigs , uint32_t reloffsigs , uint8_t tracklen )
2006-11-15 15:26:54 +00:00
{
2014-11-11 16:48:19 -05:00
unsigned int i , j ;
2008-07-25 19:00:25 +00:00
2014-07-10 18:11:49 -04:00
UNUSEDPARAM ( tracklen ) ;
2006-11-15 15:26:54 +00:00
2018-12-03 12:40:13 -05:00
if ( ! data ) {
2014-11-11 16:48:19 -05:00
cli_errmsg ( " cli_ac_init: data == NULL \n " ) ;
return CL_ENULLARG ;
2006-11-15 15:26:54 +00:00
}
2013-12-05 15:09:19 -08:00
memset ( ( void * ) data , 0 , sizeof ( struct cli_ac_data ) ) ;
2006-11-15 15:26:54 +00:00
2009-08-21 15:55:10 +02:00
data - > reloffsigs = reloffsigs ;
2018-12-03 12:40:13 -05:00
if ( reloffsigs ) {
2024-01-09 17:17:48 -05:00
data - > offset = ( uint32_t * ) malloc ( reloffsigs * 2 * sizeof ( uint32_t ) ) ;
2018-12-03 12:40:13 -05:00
if ( ! data - > offset ) {
2014-11-11 16:48:19 -05:00
cli_errmsg ( " cli_ac_init: Can't allocate memory for data->offset \n " ) ;
return CL_EMEM ;
}
2018-12-03 12:40:13 -05:00
for ( i = 0 ; i < reloffsigs * 2 ; i + = 2 )
2014-11-11 16:48:19 -05:00
data - > offset [ i ] = CLI_OFF_NONE ;
2009-08-21 15:55:10 +02:00
}
2006-11-15 15:26:54 +00:00
2009-08-21 15:55:10 +02:00
data - > partsigs = partsigs ;
2018-12-03 12:40:13 -05:00
if ( partsigs ) {
2024-01-09 17:17:48 -05:00
data - > offmatrix = ( uint32_t * * * ) calloc ( partsigs , sizeof ( uint32_t * * ) ) ;
2018-12-03 12:40:13 -05:00
if ( ! data - > offmatrix ) {
2014-11-11 16:48:19 -05:00
cli_errmsg ( " cli_ac_init: Can't allocate memory for data->offmatrix \n " ) ;
2018-12-03 12:40:13 -05:00
if ( reloffsigs )
2014-11-11 16:48:19 -05:00
free ( data - > offset ) ;
return CL_EMEM ;
}
2006-11-15 15:26:54 +00:00
}
2018-12-03 12:40:13 -05:00
2008-07-25 19:00:25 +00:00
data - > lsigs = lsigs ;
2018-12-03 12:40:13 -05:00
if ( lsigs ) {
2024-01-09 17:17:48 -05:00
data - > lsigcnt = ( uint32_t * * ) malloc ( lsigs * sizeof ( uint32_t * ) ) ;
2018-12-03 12:40:13 -05:00
if ( ! data - > lsigcnt ) {
if ( partsigs )
2014-11-11 16:48:19 -05:00
free ( data - > offmatrix ) ;
2018-12-03 12:40:13 -05:00
if ( reloffsigs )
2014-11-11 16:48:19 -05:00
free ( data - > offset ) ;
cli_errmsg ( " cli_ac_init: Can't allocate memory for data->lsigcnt \n " ) ;
return CL_EMEM ;
}
2024-01-09 17:17:48 -05:00
data - > lsigcnt [ 0 ] = ( uint32_t * ) calloc ( lsigs * 64 , sizeof ( uint32_t ) ) ;
2018-12-03 12:40:13 -05:00
if ( ! data - > lsigcnt [ 0 ] ) {
2014-11-11 16:48:19 -05:00
free ( data - > lsigcnt ) ;
2018-12-03 12:40:13 -05:00
if ( partsigs )
2014-11-11 16:48:19 -05:00
free ( data - > offmatrix ) ;
2018-12-03 12:40:13 -05:00
if ( reloffsigs )
2014-11-11 16:48:19 -05:00
free ( data - > offset ) ;
cli_errmsg ( " cli_ac_init: Can't allocate memory for data->lsigcnt[0] \n " ) ;
return CL_EMEM ;
}
2018-12-03 12:40:13 -05:00
for ( i = 1 ; i < lsigs ; i + + )
2014-11-11 16:48:19 -05:00
data - > lsigcnt [ i ] = data - > lsigcnt [ 0 ] + 64 * i ;
2024-01-09 17:17:48 -05:00
data - > yr_matches = ( uint8_t * ) calloc ( lsigs , sizeof ( uint8_t ) ) ;
2015-06-19 16:33:59 -04:00
if ( data - > yr_matches = = NULL ) {
free ( data - > lsigcnt [ 0 ] ) ;
free ( data - > lsigcnt ) ;
2018-12-03 12:40:13 -05:00
if ( partsigs )
2015-06-19 16:33:59 -04:00
free ( data - > offmatrix ) ;
2018-12-03 12:40:13 -05:00
if ( reloffsigs )
2015-06-19 16:33:59 -04:00
free ( data - > offset ) ;
return CL_EMEM ;
}
2014-11-11 16:48:19 -05:00
/* subsig offsets */
2024-01-09 17:17:48 -05:00
data - > lsig_matches = ( struct cli_lsig_matches * * ) calloc ( lsigs , sizeof ( struct cli_lsig_matches * ) ) ;
2018-12-03 12:40:13 -05:00
if ( ! data - > lsig_matches ) {
2015-06-19 16:33:59 -04:00
free ( data - > yr_matches ) ;
2015-03-30 17:12:01 -04:00
free ( data - > lsigcnt [ 0 ] ) ;
free ( data - > lsigcnt ) ;
2018-12-03 12:40:13 -05:00
if ( partsigs )
2015-03-30 17:12:01 -04:00
free ( data - > offmatrix ) ;
2018-12-03 12:40:13 -05:00
if ( reloffsigs )
2015-03-30 17:12:01 -04:00
free ( data - > offset ) ;
cli_errmsg ( " cli_ac_init: Can't allocate memory for data->lsig_matches \n " ) ;
return CL_EMEM ;
}
2024-01-09 17:17:48 -05:00
data - > lsigsuboff_last = ( uint32_t * * ) malloc ( lsigs * sizeof ( uint32_t * ) ) ;
data - > lsigsuboff_first = ( uint32_t * * ) malloc ( lsigs * sizeof ( uint32_t * ) ) ;
2018-12-03 12:40:13 -05:00
if ( ! data - > lsigsuboff_last | | ! data - > lsigsuboff_first ) {
2015-03-30 17:12:01 -04:00
free ( data - > lsig_matches ) ;
2014-11-11 16:48:19 -05:00
free ( data - > lsigsuboff_last ) ;
free ( data - > lsigsuboff_first ) ;
2015-06-19 16:33:59 -04:00
free ( data - > yr_matches ) ;
2014-11-11 16:48:19 -05:00
free ( data - > lsigcnt [ 0 ] ) ;
free ( data - > lsigcnt ) ;
2018-12-03 12:40:13 -05:00
if ( partsigs )
2014-11-11 16:48:19 -05:00
free ( data - > offmatrix ) ;
2018-12-03 12:40:13 -05:00
if ( reloffsigs )
2014-11-11 16:48:19 -05:00
free ( data - > offset ) ;
cli_errmsg ( " cli_ac_init: Can't allocate memory for data->lsigsuboff_(last|first) \n " ) ;
return CL_EMEM ;
}
2024-01-09 17:17:48 -05:00
data - > lsigsuboff_last [ 0 ] = ( uint32_t * ) calloc ( lsigs * 64 , sizeof ( uint32_t ) ) ;
data - > lsigsuboff_first [ 0 ] = ( uint32_t * ) calloc ( lsigs * 64 , sizeof ( uint32_t ) ) ;
2018-12-03 12:40:13 -05:00
if ( ! data - > lsigsuboff_last [ 0 ] | | ! data - > lsigsuboff_first [ 0 ] ) {
2015-03-30 17:12:01 -04:00
free ( data - > lsig_matches ) ;
2014-11-11 16:48:19 -05:00
free ( data - > lsigsuboff_last [ 0 ] ) ;
free ( data - > lsigsuboff_first [ 0 ] ) ;
free ( data - > lsigsuboff_last ) ;
free ( data - > lsigsuboff_first ) ;
2015-06-19 16:33:59 -04:00
free ( data - > yr_matches ) ;
2014-11-11 16:48:19 -05:00
free ( data - > lsigcnt [ 0 ] ) ;
free ( data - > lsigcnt ) ;
2018-12-03 12:40:13 -05:00
if ( partsigs )
2014-11-11 16:48:19 -05:00
free ( data - > offmatrix ) ;
2018-12-03 12:40:13 -05:00
if ( reloffsigs )
2014-11-11 16:48:19 -05:00
free ( data - > offset ) ;
cli_errmsg ( " cli_ac_init: Can't allocate memory for data->lsigsuboff_(last|first)[0] \n " ) ;
return CL_EMEM ;
}
2018-12-03 12:40:13 -05:00
for ( j = 0 ; j < 64 ; j + + ) {
data - > lsigsuboff_last [ 0 ] [ j ] = CLI_OFF_NONE ;
2014-11-11 16:48:19 -05:00
data - > lsigsuboff_first [ 0 ] [ j ] = CLI_OFF_NONE ;
}
2018-12-03 12:40:13 -05:00
for ( i = 1 ; i < lsigs ; i + + ) {
data - > lsigsuboff_last [ i ] = data - > lsigsuboff_last [ 0 ] + 64 * i ;
2014-11-11 16:48:19 -05:00
data - > lsigsuboff_first [ i ] = data - > lsigsuboff_first [ 0 ] + 64 * i ;
2018-12-03 12:40:13 -05:00
for ( j = 0 ; j < 64 ; j + + ) {
data - > lsigsuboff_last [ i ] [ j ] = CLI_OFF_NONE ;
2014-11-11 16:48:19 -05:00
data - > lsigsuboff_first [ i ] [ j ] = CLI_OFF_NONE ;
}
}
2009-08-21 15:55:10 +02:00
}
2018-12-03 12:40:13 -05:00
for ( i = 0 ; i < 32 ; i + + )
2014-11-11 16:48:19 -05:00
data - > macro_lastmatch [ i ] = CLI_OFF_NONE ;
2009-08-21 15:55:10 +02:00
2010-12-02 18:50:53 +01:00
data - > min_partno = 1 ;
2007-04-28 18:40:59 +00:00
return CL_SUCCESS ;
}
2006-11-15 15:26:54 +00:00
2019-02-27 00:47:38 -05:00
cl_error_t cli_ac_caloff ( const struct cli_matcher * root , struct cli_ac_data * data , const struct cli_target_info * info )
2009-08-14 14:38:13 +02:00
{
2022-08-18 20:00:33 -07:00
cl_error_t ret ;
2014-11-11 16:48:19 -05:00
unsigned int i ;
struct cli_ac_patt * patt ;
2009-08-14 14:38:13 +02:00
2018-12-03 12:40:13 -05:00
if ( info )
2014-11-11 16:48:19 -05:00
data - > vinfo = & info - > exeinfo . vinfo ;
2010-01-04 14:56:04 +01:00
2018-12-03 12:40:13 -05:00
for ( i = 0 ; i < root - > ac_reloff_num ; i + + ) {
2014-11-11 16:48:19 -05:00
patt = root - > ac_reloff [ i ] ;
2018-12-03 12:40:13 -05:00
if ( ! info ) {
2014-11-11 16:48:19 -05:00
data - > offset [ patt - > offset_min ] = CLI_OFF_NONE ;
2022-08-18 20:00:33 -07:00
} else if ( CL_SUCCESS ! = ( ret = cli_caloff ( NULL , info , root - > type , patt - > offdata , & data - > offset [ patt - > offset_min ] , & data - > offset [ patt - > offset_max ] ) ) ) {
2014-11-11 16:48:19 -05:00
cli_errmsg ( " cli_ac_caloff: Can't calculate relative offset in signature for %s \n " , patt - > virname ) ;
return ret ;
2018-12-03 12:40:13 -05:00
} else if ( ( data - > offset [ patt - > offset_min ] ! = CLI_OFF_NONE ) & & ( data - > offset [ patt - > offset_min ] + patt - > length [ 1 ] > info - > fsize ) ) {
2014-11-11 16:48:19 -05:00
data - > offset [ patt - > offset_min ] = CLI_OFF_NONE ;
}
2009-08-14 14:38:13 +02:00
}
return CL_SUCCESS ;
}
2007-04-28 18:40:59 +00:00
void cli_ac_freedata ( struct cli_ac_data * data )
{
2014-11-11 16:48:19 -05:00
uint32_t i ;
2006-11-15 15:26:54 +00:00
2015-03-30 17:12:01 -04:00
if ( ! data )
return ;
2018-12-03 12:40:13 -05:00
if ( data - > partsigs ) {
for ( i = 0 ; i < data - > partsigs ; i + + ) {
if ( data - > offmatrix [ i ] ) {
2014-11-11 16:48:19 -05:00
free ( data - > offmatrix [ i ] [ 0 ] ) ;
free ( data - > offmatrix [ i ] ) ;
}
}
free ( data - > offmatrix ) ;
data - > offmatrix = NULL ;
2018-12-03 12:40:13 -05:00
data - > partsigs = 0 ;
2008-07-25 19:00:25 +00:00
}
2018-12-03 12:40:13 -05:00
if ( data - > lsigs ) {
2015-03-30 17:12:01 -04:00
if ( data - > lsig_matches ) {
for ( i = 0 ; i < data - > lsigs ; i + + ) {
2018-12-03 12:40:13 -05:00
struct cli_lsig_matches * ls_matches ;
2015-03-30 17:12:01 -04:00
if ( ( ls_matches = data - > lsig_matches [ i ] ) ) {
uint32_t j ;
for ( j = 0 ; j < ls_matches - > subsigs ; j + + ) {
if ( ls_matches - > matches [ j ] ) {
free ( ls_matches - > matches [ j ] ) ;
ls_matches - > matches [ j ] = 0 ;
}
}
free ( data - > lsig_matches [ i ] ) ;
data - > lsig_matches [ i ] = 0 ;
}
}
free ( data - > lsig_matches ) ;
data - > lsig_matches = 0 ;
}
2015-06-19 16:33:59 -04:00
free ( data - > yr_matches ) ;
2014-11-11 16:48:19 -05:00
free ( data - > lsigcnt [ 0 ] ) ;
free ( data - > lsigcnt ) ;
free ( data - > lsigsuboff_last [ 0 ] ) ;
free ( data - > lsigsuboff_last ) ;
free ( data - > lsigsuboff_first [ 0 ] ) ;
free ( data - > lsigsuboff_first ) ;
data - > lsigs = 0 ;
2006-11-18 20:49:08 +00:00
}
2009-08-21 15:55:10 +02:00
2018-12-03 12:40:13 -05:00
if ( data - > reloffsigs ) {
2014-11-11 16:48:19 -05:00
free ( data - > offset ) ;
data - > reloffsigs = 0 ;
2009-08-21 15:55:10 +02:00
}
2007-04-28 18:40:59 +00:00
}
2006-11-18 20:49:08 +00:00
Increase limit for finding PE files embedded in other PE files
I am seeing missed detections since we changed to prohibit embedded
file type identification when inside an embedded file.
In particular, I'm seeing this issue with PE files that contain multiple
other MSEXE as well as a variety of false positives for PE file headers.
For example, imagine a PE with four concatenated DLL's, like so:
```
[ EXE file | DLL #1 | DLL #2 | DLL #3 | DLL #4 ]
```
And note that false positives for embedded MSEXE files are fairly common.
So there may be a few mixed in there.
Before limiting embedded file identification we might interpret the file
structure something like this:
```
MSEXE: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: DLL #1: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: DLL #2: {
embedded MSEXE #1: DLL #3: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: false positive,
embedded MSEXE #5: DLL #4
}
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: false positive,
embedded MSEXE #5: false positive,
embedded MSEXE #6: DLL #4
}
embedded MSEXE #3: DLL #3,
embedded MSEXE #4: false positive,
embedded MSEXE #5: false positive,
embedded MSEXE #6: false positive,
embedded MSEXE #7: false positive,
embedded MSEXE #8: DLL #4
}
}
```
This is obviously terrible, which is why why we don't allow detecting
embedded files within other embedded files.
So after we enforce that limit, the same file may be interpreted like
this instead:
```
MSEXE: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: DLL #1,
embedded MSEXE #5: false positive,
embedded MSEXE #6: DLL #2,
embedded MSEXE #7: DLL #3,
embedded MSEXE #8: false positive,
embedded MSEXE #9: false positive,
embedded MSEXE #10: false positive,
embedded MSEXE #11: false positive,
embedded MSEXE #12: DLL #4
}
```
That's great! Except that we now exceed the "MAX_EMBEDDED_OBJ" limit
for embedded type matches (limit 10, but 12 found). That means we won't
see or extract the 4th DLL anymore.
My solution is to lift the limit when adding an matched MSEXE type.
We already do this for matched ZIPSFX types.
While doing this, I've significantly tidied up the limits checks to
make it more readble, and removed duplicate checks from within the
`ac_addtype()` function.
CLAM-2897
2025-10-12 16:05:17 -04:00
/**
* @ brief Add a match for an object type to the list of matched types .
*
* Important : The caller is responsible for checking limits !
*
* @ param list Pointer to the list of matched types . * list may be NULL if no types have been added yet .
* @ param type The type of the embedded object .
* @ param offset The offset of the embedded object .
* @ param ctx The context information . May be NULL .
* @ return cl_error_t CL_SUCCESS regardless if added , or CL_EMEM if memory allocation failed .
*/
inline static cl_error_t ac_addtype ( struct cli_matched_type * * list , cli_file_t type , off_t offset , const cli_ctx * ctx )
2007-04-28 18:40:59 +00:00
{
Increase limit for finding PE files embedded in other PE files
I am seeing missed detections since we changed to prohibit embedded
file type identification when inside an embedded file.
In particular, I'm seeing this issue with PE files that contain multiple
other MSEXE as well as a variety of false positives for PE file headers.
For example, imagine a PE with four concatenated DLL's, like so:
```
[ EXE file | DLL #1 | DLL #2 | DLL #3 | DLL #4 ]
```
And note that false positives for embedded MSEXE files are fairly common.
So there may be a few mixed in there.
Before limiting embedded file identification we might interpret the file
structure something like this:
```
MSEXE: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: DLL #1: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: DLL #2: {
embedded MSEXE #1: DLL #3: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: false positive,
embedded MSEXE #5: DLL #4
}
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: false positive,
embedded MSEXE #5: false positive,
embedded MSEXE #6: DLL #4
}
embedded MSEXE #3: DLL #3,
embedded MSEXE #4: false positive,
embedded MSEXE #5: false positive,
embedded MSEXE #6: false positive,
embedded MSEXE #7: false positive,
embedded MSEXE #8: DLL #4
}
}
```
This is obviously terrible, which is why why we don't allow detecting
embedded files within other embedded files.
So after we enforce that limit, the same file may be interpreted like
this instead:
```
MSEXE: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: DLL #1,
embedded MSEXE #5: false positive,
embedded MSEXE #6: DLL #2,
embedded MSEXE #7: DLL #3,
embedded MSEXE #8: false positive,
embedded MSEXE #9: false positive,
embedded MSEXE #10: false positive,
embedded MSEXE #11: false positive,
embedded MSEXE #12: DLL #4
}
```
That's great! Except that we now exceed the "MAX_EMBEDDED_OBJ" limit
for embedded type matches (limit 10, but 12 found). That means we won't
see or extract the 4th DLL anymore.
My solution is to lift the limit when adding an matched MSEXE type.
We already do this for matched ZIPSFX types.
While doing this, I've significantly tidied up the limits checks to
make it more readble, and removed duplicate checks from within the
`ac_addtype()` function.
CLAM-2897
2025-10-12 16:05:17 -04:00
struct cli_matched_type * tnode ;
2006-11-15 15:26:54 +00:00
Increase limit for finding PE files embedded in other PE files
I am seeing missed detections since we changed to prohibit embedded
file type identification when inside an embedded file.
In particular, I'm seeing this issue with PE files that contain multiple
other MSEXE as well as a variety of false positives for PE file headers.
For example, imagine a PE with four concatenated DLL's, like so:
```
[ EXE file | DLL #1 | DLL #2 | DLL #3 | DLL #4 ]
```
And note that false positives for embedded MSEXE files are fairly common.
So there may be a few mixed in there.
Before limiting embedded file identification we might interpret the file
structure something like this:
```
MSEXE: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: DLL #1: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: DLL #2: {
embedded MSEXE #1: DLL #3: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: false positive,
embedded MSEXE #5: DLL #4
}
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: false positive,
embedded MSEXE #5: false positive,
embedded MSEXE #6: DLL #4
}
embedded MSEXE #3: DLL #3,
embedded MSEXE #4: false positive,
embedded MSEXE #5: false positive,
embedded MSEXE #6: false positive,
embedded MSEXE #7: false positive,
embedded MSEXE #8: DLL #4
}
}
```
This is obviously terrible, which is why why we don't allow detecting
embedded files within other embedded files.
So after we enforce that limit, the same file may be interpreted like
this instead:
```
MSEXE: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: DLL #1,
embedded MSEXE #5: false positive,
embedded MSEXE #6: DLL #2,
embedded MSEXE #7: DLL #3,
embedded MSEXE #8: false positive,
embedded MSEXE #9: false positive,
embedded MSEXE #10: false positive,
embedded MSEXE #11: false positive,
embedded MSEXE #12: DLL #4
}
```
That's great! Except that we now exceed the "MAX_EMBEDDED_OBJ" limit
for embedded type matches (limit 10, but 12 found). That means we won't
see or extract the 4th DLL anymore.
My solution is to lift the limit when adding an matched MSEXE type.
We already do this for matched ZIPSFX types.
While doing this, I've significantly tidied up the limits checks to
make it more readble, and removed duplicate checks from within the
`ac_addtype()` function.
CLAM-2897
2025-10-12 16:05:17 -04:00
tnode = calloc ( 1 , sizeof ( struct cli_matched_type ) ) ;
if ( NULL = = tnode ) {
2014-11-11 16:48:19 -05:00
cli_errmsg ( " cli_ac_addtype: Can't allocate memory for new type node \n " ) ;
return CL_EMEM ;
2006-11-15 15:26:54 +00:00
}
2018-12-03 12:40:13 -05:00
tnode - > type = type ;
2007-04-28 18:40:59 +00:00
tnode - > offset = offset ;
2006-11-15 15:26:54 +00:00
Increase limit for finding PE files embedded in other PE files
I am seeing missed detections since we changed to prohibit embedded
file type identification when inside an embedded file.
In particular, I'm seeing this issue with PE files that contain multiple
other MSEXE as well as a variety of false positives for PE file headers.
For example, imagine a PE with four concatenated DLL's, like so:
```
[ EXE file | DLL #1 | DLL #2 | DLL #3 | DLL #4 ]
```
And note that false positives for embedded MSEXE files are fairly common.
So there may be a few mixed in there.
Before limiting embedded file identification we might interpret the file
structure something like this:
```
MSEXE: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: DLL #1: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: DLL #2: {
embedded MSEXE #1: DLL #3: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: false positive,
embedded MSEXE #5: DLL #4
}
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: false positive,
embedded MSEXE #5: false positive,
embedded MSEXE #6: DLL #4
}
embedded MSEXE #3: DLL #3,
embedded MSEXE #4: false positive,
embedded MSEXE #5: false positive,
embedded MSEXE #6: false positive,
embedded MSEXE #7: false positive,
embedded MSEXE #8: DLL #4
}
}
```
This is obviously terrible, which is why why we don't allow detecting
embedded files within other embedded files.
So after we enforce that limit, the same file may be interpreted like
this instead:
```
MSEXE: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: DLL #1,
embedded MSEXE #5: false positive,
embedded MSEXE #6: DLL #2,
embedded MSEXE #7: DLL #3,
embedded MSEXE #8: false positive,
embedded MSEXE #9: false positive,
embedded MSEXE #10: false positive,
embedded MSEXE #11: false positive,
embedded MSEXE #12: DLL #4
}
```
That's great! Except that we now exceed the "MAX_EMBEDDED_OBJ" limit
for embedded type matches (limit 10, but 12 found). That means we won't
see or extract the 4th DLL anymore.
My solution is to lift the limit when adding an matched MSEXE type.
We already do this for matched ZIPSFX types.
While doing this, I've significantly tidied up the limits checks to
make it more readble, and removed duplicate checks from within the
`ac_addtype()` function.
CLAM-2897
2025-10-12 16:05:17 -04:00
if ( * list ) {
// Add to end of existing list.
struct cli_matched_type * tnode_last = * list ;
2006-11-15 15:26:54 +00:00
Increase limit for finding PE files embedded in other PE files
I am seeing missed detections since we changed to prohibit embedded
file type identification when inside an embedded file.
In particular, I'm seeing this issue with PE files that contain multiple
other MSEXE as well as a variety of false positives for PE file headers.
For example, imagine a PE with four concatenated DLL's, like so:
```
[ EXE file | DLL #1 | DLL #2 | DLL #3 | DLL #4 ]
```
And note that false positives for embedded MSEXE files are fairly common.
So there may be a few mixed in there.
Before limiting embedded file identification we might interpret the file
structure something like this:
```
MSEXE: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: DLL #1: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: DLL #2: {
embedded MSEXE #1: DLL #3: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: false positive,
embedded MSEXE #5: DLL #4
}
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: false positive,
embedded MSEXE #5: false positive,
embedded MSEXE #6: DLL #4
}
embedded MSEXE #3: DLL #3,
embedded MSEXE #4: false positive,
embedded MSEXE #5: false positive,
embedded MSEXE #6: false positive,
embedded MSEXE #7: false positive,
embedded MSEXE #8: DLL #4
}
}
```
This is obviously terrible, which is why why we don't allow detecting
embedded files within other embedded files.
So after we enforce that limit, the same file may be interpreted like
this instead:
```
MSEXE: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: DLL #1,
embedded MSEXE #5: false positive,
embedded MSEXE #6: DLL #2,
embedded MSEXE #7: DLL #3,
embedded MSEXE #8: false positive,
embedded MSEXE #9: false positive,
embedded MSEXE #10: false positive,
embedded MSEXE #11: false positive,
embedded MSEXE #12: DLL #4
}
```
That's great! Except that we now exceed the "MAX_EMBEDDED_OBJ" limit
for embedded type matches (limit 10, but 12 found). That means we won't
see or extract the 4th DLL anymore.
My solution is to lift the limit when adding an matched MSEXE type.
We already do this for matched ZIPSFX types.
While doing this, I've significantly tidied up the limits checks to
make it more readble, and removed duplicate checks from within the
`ac_addtype()` function.
CLAM-2897
2025-10-12 16:05:17 -04:00
while ( tnode_last & & tnode_last - > next ) {
tnode_last = tnode_last - > next ;
}
2014-11-11 16:48:19 -05:00
tnode_last - > next = tnode ;
Increase limit for finding PE files embedded in other PE files
I am seeing missed detections since we changed to prohibit embedded
file type identification when inside an embedded file.
In particular, I'm seeing this issue with PE files that contain multiple
other MSEXE as well as a variety of false positives for PE file headers.
For example, imagine a PE with four concatenated DLL's, like so:
```
[ EXE file | DLL #1 | DLL #2 | DLL #3 | DLL #4 ]
```
And note that false positives for embedded MSEXE files are fairly common.
So there may be a few mixed in there.
Before limiting embedded file identification we might interpret the file
structure something like this:
```
MSEXE: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: DLL #1: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: DLL #2: {
embedded MSEXE #1: DLL #3: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: false positive,
embedded MSEXE #5: DLL #4
}
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: false positive,
embedded MSEXE #5: false positive,
embedded MSEXE #6: DLL #4
}
embedded MSEXE #3: DLL #3,
embedded MSEXE #4: false positive,
embedded MSEXE #5: false positive,
embedded MSEXE #6: false positive,
embedded MSEXE #7: false positive,
embedded MSEXE #8: DLL #4
}
}
```
This is obviously terrible, which is why why we don't allow detecting
embedded files within other embedded files.
So after we enforce that limit, the same file may be interpreted like
this instead:
```
MSEXE: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: DLL #1,
embedded MSEXE #5: false positive,
embedded MSEXE #6: DLL #2,
embedded MSEXE #7: DLL #3,
embedded MSEXE #8: false positive,
embedded MSEXE #9: false positive,
embedded MSEXE #10: false positive,
embedded MSEXE #11: false positive,
embedded MSEXE #12: DLL #4
}
```
That's great! Except that we now exceed the "MAX_EMBEDDED_OBJ" limit
for embedded type matches (limit 10, but 12 found). That means we won't
see or extract the 4th DLL anymore.
My solution is to lift the limit when adding an matched MSEXE type.
We already do this for matched ZIPSFX types.
While doing this, I've significantly tidied up the limits checks to
make it more readble, and removed duplicate checks from within the
`ac_addtype()` function.
CLAM-2897
2025-10-12 16:05:17 -04:00
} else {
// First type in the list.
2014-11-11 16:48:19 -05:00
* list = tnode ;
Increase limit for finding PE files embedded in other PE files
I am seeing missed detections since we changed to prohibit embedded
file type identification when inside an embedded file.
In particular, I'm seeing this issue with PE files that contain multiple
other MSEXE as well as a variety of false positives for PE file headers.
For example, imagine a PE with four concatenated DLL's, like so:
```
[ EXE file | DLL #1 | DLL #2 | DLL #3 | DLL #4 ]
```
And note that false positives for embedded MSEXE files are fairly common.
So there may be a few mixed in there.
Before limiting embedded file identification we might interpret the file
structure something like this:
```
MSEXE: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: DLL #1: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: DLL #2: {
embedded MSEXE #1: DLL #3: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: false positive,
embedded MSEXE #5: DLL #4
}
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: false positive,
embedded MSEXE #5: false positive,
embedded MSEXE #6: DLL #4
}
embedded MSEXE #3: DLL #3,
embedded MSEXE #4: false positive,
embedded MSEXE #5: false positive,
embedded MSEXE #6: false positive,
embedded MSEXE #7: false positive,
embedded MSEXE #8: DLL #4
}
}
```
This is obviously terrible, which is why why we don't allow detecting
embedded files within other embedded files.
So after we enforce that limit, the same file may be interpreted like
this instead:
```
MSEXE: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: DLL #1,
embedded MSEXE #5: false positive,
embedded MSEXE #6: DLL #2,
embedded MSEXE #7: DLL #3,
embedded MSEXE #8: false positive,
embedded MSEXE #9: false positive,
embedded MSEXE #10: false positive,
embedded MSEXE #11: false positive,
embedded MSEXE #12: DLL #4
}
```
That's great! Except that we now exceed the "MAX_EMBEDDED_OBJ" limit
for embedded type matches (limit 10, but 12 found). That means we won't
see or extract the 4th DLL anymore.
My solution is to lift the limit when adding an matched MSEXE type.
We already do this for matched ZIPSFX types.
While doing this, I've significantly tidied up the limits checks to
make it more readble, and removed duplicate checks from within the
`ac_addtype()` function.
CLAM-2897
2025-10-12 16:05:17 -04:00
}
2006-11-15 15:26:54 +00:00
2007-04-28 18:40:59 +00:00
( * list ) - > cnt + + ;
Increase limit for finding PE files embedded in other PE files
I am seeing missed detections since we changed to prohibit embedded
file type identification when inside an embedded file.
In particular, I'm seeing this issue with PE files that contain multiple
other MSEXE as well as a variety of false positives for PE file headers.
For example, imagine a PE with four concatenated DLL's, like so:
```
[ EXE file | DLL #1 | DLL #2 | DLL #3 | DLL #4 ]
```
And note that false positives for embedded MSEXE files are fairly common.
So there may be a few mixed in there.
Before limiting embedded file identification we might interpret the file
structure something like this:
```
MSEXE: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: DLL #1: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: DLL #2: {
embedded MSEXE #1: DLL #3: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: false positive,
embedded MSEXE #5: DLL #4
}
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: false positive,
embedded MSEXE #5: false positive,
embedded MSEXE #6: DLL #4
}
embedded MSEXE #3: DLL #3,
embedded MSEXE #4: false positive,
embedded MSEXE #5: false positive,
embedded MSEXE #6: false positive,
embedded MSEXE #7: false positive,
embedded MSEXE #8: DLL #4
}
}
```
This is obviously terrible, which is why why we don't allow detecting
embedded files within other embedded files.
So after we enforce that limit, the same file may be interpreted like
this instead:
```
MSEXE: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: DLL #1,
embedded MSEXE #5: false positive,
embedded MSEXE #6: DLL #2,
embedded MSEXE #7: DLL #3,
embedded MSEXE #8: false positive,
embedded MSEXE #9: false positive,
embedded MSEXE #10: false positive,
embedded MSEXE #11: false positive,
embedded MSEXE #12: DLL #4
}
```
That's great! Except that we now exceed the "MAX_EMBEDDED_OBJ" limit
for embedded type matches (limit 10, but 12 found). That means we won't
see or extract the 4th DLL anymore.
My solution is to lift the limit when adding an matched MSEXE type.
We already do this for matched ZIPSFX types.
While doing this, I've significantly tidied up the limits checks to
make it more readble, and removed duplicate checks from within the
`ac_addtype()` function.
CLAM-2897
2025-10-12 16:05:17 -04:00
if ( UNLIKELY ( cli_get_debug_flag ( ) ) ) {
cli_dbgmsg ( " ac_addtype: added %s embedded object at offset " STDi64 " . Embedded object count: %d \n " , cli_ftname ( type ) , ( uint64_t ) offset , ( * list ) - > cnt ) ;
}
2006-11-15 15:26:54 +00:00
return CL_SUCCESS ;
}
2021-04-21 16:24:24 -07:00
void lsig_increment_subsig_match ( struct cli_ac_data * mdata , uint32_t lsig_id , uint32_t subsig_id )
{
mdata - > lsigcnt [ lsig_id ] [ subsig_id ] + + ;
}
2022-02-12 14:53:44 -08:00
cl_error_t lsig_sub_matched ( const struct cli_matcher * root , struct cli_ac_data * mdata , uint32_t lsig_id , uint32_t subsig_id , uint32_t realoff , int partial )
2010-02-08 13:45:03 +02:00
{
2022-02-12 14:53:44 -08:00
const struct cli_ac_lsig * ac_lsig = root - > ac_lsigtable [ lsig_id ] ;
2018-12-03 12:40:13 -05:00
const struct cli_lsig_tdb * tdb = & ac_lsig - > tdb ;
2010-07-01 18:24:17 +02:00
2018-12-03 12:40:13 -05:00
if ( realoff ! = CLI_OFF_NONE ) {
2022-02-12 14:53:44 -08:00
if ( mdata - > lsigsuboff_first [ lsig_id ] [ subsig_id ] = = CLI_OFF_NONE ) {
/* If this is the first subsig in the lsig, store the offset in the first-list. */
mdata - > lsigsuboff_first [ lsig_id ] [ subsig_id ] = realoff ;
}
2014-11-11 16:48:19 -05:00
2022-02-12 14:53:44 -08:00
if ( mdata - > lsigsuboff_last [ lsig_id ] [ subsig_id ] ! = CLI_OFF_NONE & &
/* If this isn't the first subsig match for this logical sig and the offset
is earlier in the file than the last subsig match , don ' t count it . */
( ( ! partial & & realoff < = mdata - > lsigsuboff_last [ lsig_id ] [ subsig_id ] ) | |
( partial & & realoff < mdata - > lsigsuboff_last [ lsig_id ] [ subsig_id ] ) ) ) {
2015-03-30 17:12:01 -04:00
return CL_SUCCESS ;
2022-02-12 14:53:44 -08:00
}
2014-11-11 16:48:19 -05:00
2022-02-12 14:53:44 -08:00
/* Increment the subsig count for this logical signature */
mdata - > lsigcnt [ lsig_id ] [ subsig_id ] + + ;
2010-05-04 16:48:54 +02:00
2022-02-12 14:53:44 -08:00
if ( mdata - > lsigcnt [ lsig_id ] [ subsig_id ] < = 1 | | ! tdb - > macro_ptids | | ! tdb - > macro_ptids [ subsig_id ] ) {
/* Store the offset of this subsig match in the last-list (except in certain circumstances) */
mdata - > lsigsuboff_last [ lsig_id ] [ subsig_id ] = realoff ;
}
2015-03-30 17:12:01 -04:00
2022-02-12 14:53:44 -08:00
if ( ac_lsig - > type & CLI_YARA_OFFSET ) {
/*
* There are 3 types of logical signatures : normal , yara - normal , and yara - offset
*
* For yara - offset logical signatures we allocate some structures to
* store yara subsignature match offsets .
*/
struct cli_subsig_matches * ss_matches ;
struct cli_lsig_matches * ls_matches ;
cli_dbgmsg ( " lsig_sub_matched lsig %u:%u at %u \n " , lsig_id , subsig_id , realoff ) ;
ls_matches = mdata - > lsig_matches [ lsig_id ] ;
if ( ls_matches = = NULL ) { /* allocate cli_lsig_matches */
2024-01-09 17:17:48 -05:00
ls_matches = mdata - > lsig_matches [ lsig_id ] = ( struct cli_lsig_matches * ) calloc ( 1 , sizeof ( struct cli_lsig_matches ) +
( ac_lsig - > tdb . subsigs - 1 ) * sizeof ( struct cli_subsig_matches * ) ) ;
2022-02-12 14:53:44 -08:00
if ( ls_matches = = NULL ) {
2024-01-09 17:17:48 -05:00
cli_errmsg ( " lsig_sub_matched: calloc failed for cli_lsig_matches \n " ) ;
2022-02-12 14:53:44 -08:00
return CL_EMEM ;
}
ls_matches - > subsigs = ac_lsig - > tdb . subsigs ;
2015-03-30 17:12:01 -04:00
}
2022-02-12 14:53:44 -08:00
ss_matches = ls_matches - > matches [ subsig_id ] ;
if ( ss_matches = = NULL ) { /* allocate cli_subsig_matches */
2022-05-08 14:59:09 -07:00
ss_matches = ls_matches - > matches [ subsig_id ] = malloc ( sizeof ( struct cli_subsig_matches ) ) ;
2022-02-12 14:53:44 -08:00
if ( ss_matches = = NULL ) {
2024-01-09 17:17:48 -05:00
cli_errmsg ( " lsig_sub_matched: malloc failed for cli_subsig_matches struct \n " ) ;
2022-02-12 14:53:44 -08:00
return CL_EMEM ;
}
ss_matches - > next = 0 ;
ss_matches - > last = sizeof ( ss_matches - > offsets ) / sizeof ( uint32_t ) - 1 ;
2015-03-30 17:12:01 -04:00
}
2022-02-12 14:53:44 -08:00
if ( ss_matches - > next > ss_matches - > last ) { /* cli_matches out of space? realloc */
2024-01-09 17:17:48 -05:00
ss_matches = ls_matches - > matches [ subsig_id ] = realloc ( ss_matches , sizeof ( struct cli_subsig_matches ) + sizeof ( uint32_t ) * ss_matches - > last * 2 ) ;
2022-02-12 14:53:44 -08:00
if ( ss_matches = = NULL ) {
2024-01-09 17:17:48 -05:00
cli_errmsg ( " lsig_sub_matched: realloc failed for cli_subsig_matches struct \n " ) ;
2022-02-12 14:53:44 -08:00
return CL_EMEM ;
}
ss_matches - > last = sizeof ( ss_matches - > offsets ) / sizeof ( uint32_t ) + ss_matches - > last * 2 - 1 ;
2015-03-30 17:12:01 -04:00
}
2018-12-03 12:40:13 -05:00
2022-02-12 14:53:44 -08:00
ss_matches - > offsets [ ss_matches - > next ] = realoff ; /* finally, store the offset */
ss_matches - > next + + ;
}
2015-03-30 17:12:01 -04:00
}
2022-02-12 14:53:44 -08:00
if ( ( tdb - > macro_ptids ! = NULL ) & &
( tdb - > macro_ptids [ subsig_id ] > 0 ) & &
( mdata - > lsigcnt [ lsig_id ] [ subsig_id ] > 1 ) ) {
/*
* This logical signature has a macro subsignature and this current subsignature has a macro following it .
*
* Check that the previous match had a macro match following it at the correct distance .
* This check is only done after the 1 st match .
*/
2014-11-11 16:48:19 -05:00
const struct cli_ac_patt * macropt ;
2022-02-12 14:53:44 -08:00
uint32_t id , last_macro_match , smin , smax , macro_group_id , last_macroprev_match ;
2014-11-11 16:48:19 -05:00
2022-02-12 14:53:44 -08:00
/*
* Look up the subsig for the upcoming macro to get anchor - min / max , and macro group id .
* Reminder : A macro subsignature takes the form :
* $ { anchor_min - anchor_max } macro_group_id $
*/
id = tdb - > macro_ptids [ subsig_id ] ;
2014-11-11 16:48:19 -05:00
2022-02-12 14:53:44 -08:00
macropt = root - > ac_pattable [ id ] ;
smin = macropt - > ch_mindist [ 0 ] ;
smax = macropt - > ch_maxdist [ 0 ] ;
macro_group_id = macropt - > sigid ;
2014-11-11 16:48:19 -05:00
/* start of last macro match */
2022-02-12 14:53:44 -08:00
last_macro_match = mdata - > macro_lastmatch [ macro_group_id ] ;
2014-11-11 16:48:19 -05:00
/* start of previous lsig subsig match */
2022-02-12 14:53:44 -08:00
last_macroprev_match = mdata - > lsigsuboff_last [ lsig_id ] [ subsig_id ] ;
2014-11-11 16:48:19 -05:00
if ( last_macro_match = = CLI_OFF_NONE | |
last_macroprev_match + smin > last_macro_match | |
last_macroprev_match + smax < last_macro_match ) {
cli_dbgmsg ( " Canceled false lsig macro match \n " ) ;
/* Previous match was false - cancel it */
2022-02-12 14:53:44 -08:00
mdata - > lsigcnt [ lsig_id ] [ subsig_id ] - - ;
mdata - > lsigsuboff_last [ lsig_id ] [ subsig_id ] = realoff ;
2014-11-11 16:48:19 -05:00
} else {
/* mark the macro sig itself matched */
2022-02-12 14:53:44 -08:00
cli_dbgmsg ( " Checking macro match: %u + (%u - %u) == %u \n " ,
last_macroprev_match , smin , smax , last_macro_match ) ;
mdata - > lsigcnt [ lsig_id ] [ subsig_id + 1 ] + + ;
mdata - > lsigsuboff_last [ lsig_id ] [ subsig_id + 1 ] = last_macro_match ;
2014-11-11 16:48:19 -05:00
}
2010-02-08 13:45:03 +02:00
}
2022-02-12 14:53:44 -08:00
2015-03-30 17:12:01 -04:00
return CL_SUCCESS ;
2010-02-08 13:45:03 +02:00
}
2022-02-12 14:53:44 -08:00
cl_error_t cli_ac_chkmacro ( struct cli_matcher * root , struct cli_ac_data * data , unsigned lsig_id )
2010-02-08 13:45:03 +02:00
{
2022-02-12 14:53:44 -08:00
const struct cli_lsig_tdb * tdb = & root - > ac_lsigtable [ lsig_id ] - > tdb ;
2010-02-08 13:45:03 +02:00
unsigned i ;
2021-06-18 16:26:56 -07:00
cl_error_t rc ;
2015-03-30 17:12:01 -04:00
2010-02-08 13:45:03 +02:00
/* Loop through all subsigs, and if they are tied to macros check that the
* macro matched at a correct distance */
2018-12-03 12:40:13 -05:00
for ( i = 0 ; i < tdb - > subsigs ; i + + ) {
2022-02-12 14:53:44 -08:00
rc = lsig_sub_matched ( root , data , lsig_id , i , CLI_OFF_NONE , 0 ) ;
2015-03-30 17:12:01 -04:00
if ( rc ! = CL_SUCCESS )
return rc ;
2010-02-08 13:45:03 +02:00
}
2015-03-30 17:12:01 -04:00
return CL_SUCCESS ;
2010-02-08 13:45:03 +02:00
}
2019-02-27 00:47:38 -05:00
cl_error_t cli_ac_scanbuff (
2018-12-03 12:40:13 -05:00
const unsigned char * buffer ,
uint32_t length ,
const char * * virname ,
void * * customdata ,
struct cli_ac_result * * res ,
const struct cli_matcher * root ,
struct cli_ac_data * mdata ,
uint32_t offset ,
cli_file_t ftype ,
struct cli_matched_type * * ftoffset ,
unsigned int mode ,
2017-09-21 13:10:01 -04:00
cli_ctx * ctx )
2004-07-19 17:54:40 +00:00
{
2014-11-11 16:48:19 -05:00
struct cli_ac_node * current ;
2015-02-10 09:23:51 -08:00
struct cli_ac_list * pattN , * ptN ;
2014-11-11 16:48:19 -05:00
struct cli_ac_patt * patt , * pt ;
2015-07-06 19:05:36 -04:00
uint32_t i , bp , exptoff [ 2 ] , realoff , matchstart , matchend ;
2014-11-11 16:48:19 -05:00
uint16_t j ;
uint8_t found , viruses_found = 0 ;
2017-09-21 13:10:01 -04:00
uint32_t * * offmatrix , swp ;
2022-02-12 14:53:44 -08:00
cli_file_t type = CL_TYPE_ANY ;
2014-11-11 16:48:19 -05:00
struct cli_ac_result * newres ;
2021-06-18 16:26:56 -07:00
cl_error_t rc ;
cl_error_t ret ;
2004-07-19 17:54:40 +00:00
2018-12-03 12:40:13 -05:00
if ( ! root - > ac_root )
2014-11-11 16:48:19 -05:00
return CL_CLEAN ;
2004-07-19 17:54:40 +00:00
2018-12-03 12:40:13 -05:00
if ( ! mdata & & ( root - > ac_partsigs | | root - > ac_lsigs | | root - > ac_reloff_num ) ) {
2014-11-11 16:48:19 -05:00
cli_errmsg ( " cli_ac_scanbuff: mdata == NULL \n " ) ;
return CL_ENULLARG ;
2004-07-19 17:54:40 +00:00
}
current = root - > ac_root ;
2018-12-03 12:40:13 -05:00
for ( i = 0 ; i < length ; i + + ) {
2015-02-09 14:22:45 -08:00
current = current - > trans [ buffer [ i ] ] ;
2014-11-11 16:48:19 -05:00
2018-12-03 12:40:13 -05:00
if ( UNLIKELY ( IS_FINAL ( current ) ) ) {
2015-02-10 09:23:51 -08:00
struct cli_ac_list * faillist = current - > fail - > list ;
2018-12-03 12:40:13 -05:00
pattN = current - > list ;
while ( pattN ) {
2015-02-09 19:28:39 -08:00
patt = pattN - > me ;
2018-12-03 12:40:13 -05:00
if ( patt - > partno > mdata - > min_partno ) {
pattN = faillist ;
2014-11-11 16:48:19 -05:00
faillist = NULL ;
continue ;
}
bp = i + 1 - patt - > depth ;
2018-12-03 12:40:13 -05:00
if ( patt - > offdata [ 0 ] ! = CLI_OFF_VERSION & & patt - > offdata [ 0 ] ! = CLI_OFF_MACRO & & ! pattN - > next_same & & ( patt - > offset_min ! = CLI_OFF_ANY ) & & ( ! patt - > sigid | | patt - > partno = = 1 ) ) {
if ( patt - > offset_min = = CLI_OFF_NONE ) {
2015-02-09 19:28:39 -08:00
pattN = pattN - > next ;
2014-11-11 16:48:19 -05:00
continue ;
}
2015-07-06 19:05:36 -04:00
exptoff [ 0 ] = offset + bp - patt - > prefix_length [ 2 ] ; /* lower offset end */
exptoff [ 1 ] = offset + bp - patt - > prefix_length [ 1 ] ; /* higher offset end */
2018-12-03 12:40:13 -05:00
if ( patt - > offdata [ 0 ] = = CLI_OFF_ABSOLUTE ) {
if ( patt - > offset_max < exptoff [ 0 ] | | patt - > offset_min > exptoff [ 1 ] ) {
2015-02-09 19:28:39 -08:00
pattN = pattN - > next ;
2014-11-11 16:48:19 -05:00
continue ;
}
} else {
2018-12-03 12:40:13 -05:00
if ( mdata - > offset [ patt - > offset_min ] = = CLI_OFF_NONE | | mdata - > offset [ patt - > offset_max ] < exptoff [ 0 ] | | mdata - > offset [ patt - > offset_min ] > exptoff [ 1 ] ) {
2015-02-09 19:28:39 -08:00
pattN = pattN - > next ;
2014-11-11 16:48:19 -05:00
continue ;
}
}
}
2015-02-09 19:28:39 -08:00
ptN = pattN ;
2018-12-03 12:40:13 -05:00
if ( ac_findmatch ( buffer , bp , offset + bp , length , patt , & matchstart , & matchend ) ) {
while ( ptN ) {
2015-02-09 19:28:39 -08:00
pt = ptN - > me ;
2018-12-03 12:40:13 -05:00
if ( pt - > partno > mdata - > min_partno )
2014-11-11 16:48:19 -05:00
break ;
2018-12-03 12:40:13 -05:00
if ( ( pt - > type & & ! ( mode & AC_SCAN_FT ) ) | | ( ! pt - > type & & ! ( mode & AC_SCAN_VIR ) ) ) {
2015-02-09 19:28:39 -08:00
ptN = ptN - > next_same ;
2014-11-11 16:48:19 -05:00
continue ;
}
2015-07-06 19:05:36 -04:00
realoff = offset + matchstart ;
2018-12-03 12:40:13 -05:00
if ( pt - > offdata [ 0 ] = = CLI_OFF_VERSION ) {
2022-08-12 16:59:35 -07:00
if ( false = = cli_hashset_contains_maybe_noalloc ( mdata - > vinfo , realoff ) ) {
2015-02-09 19:28:39 -08:00
ptN = ptN - > next_same ;
2014-11-11 16:48:19 -05:00
continue ;
}
cli_dbgmsg ( " cli_ac_scanbuff: VI match for offset %x \n " , realoff ) ;
2018-12-03 12:40:13 -05:00
} else if ( pt - > offdata [ 0 ] = = CLI_OFF_MACRO ) {
2014-11-11 16:48:19 -05:00
mdata - > macro_lastmatch [ patt - > offdata [ 1 ] ] = realoff ;
2018-12-03 12:40:13 -05:00
ptN = ptN - > next_same ;
2014-11-11 16:48:19 -05:00
continue ;
2018-12-03 12:40:13 -05:00
} else if ( pt - > offset_min ! = CLI_OFF_ANY & & ( ! pt - > sigid | | pt - > partno = = 1 ) ) {
if ( pt - > offset_min = = CLI_OFF_NONE ) {
2015-02-09 19:28:39 -08:00
ptN = ptN - > next_same ;
2014-11-11 16:48:19 -05:00
continue ;
}
2018-12-03 12:40:13 -05:00
if ( pt - > offdata [ 0 ] = = CLI_OFF_ABSOLUTE ) {
if ( pt - > offset_max < realoff | | pt - > offset_min > realoff ) {
2015-02-09 19:28:39 -08:00
ptN = ptN - > next_same ;
2014-11-11 16:48:19 -05:00
continue ;
}
} else {
2018-12-03 12:40:13 -05:00
if ( mdata - > offset [ pt - > offset_min ] = = CLI_OFF_NONE | | mdata - > offset [ pt - > offset_max ] < realoff | | mdata - > offset [ pt - > offset_min ] > realoff ) {
2015-02-09 19:28:39 -08:00
ptN = ptN - > next_same ;
2014-11-11 16:48:19 -05:00
continue ;
}
}
}
2018-12-03 12:40:13 -05:00
if ( pt - > sigid ) { /* it's a partial signature */
2014-11-11 16:48:19 -05:00
/* if 2nd or later part, confirm some prior part has matched */
2018-12-03 12:40:13 -05:00
if ( pt - > partno ! = 1 & & ( ! mdata - > offmatrix [ pt - > sigid - 1 ] | | ! mdata - > offmatrix [ pt - > sigid - 1 ] [ pt - > partno - 2 ] [ 0 ] ) ) {
2015-02-09 19:28:39 -08:00
ptN = ptN - > next_same ;
2014-11-11 16:48:19 -05:00
continue ;
}
2022-06-04 12:08:51 -07:00
if ( ( uint32_t ) ( pt - > partno + 1 ) > mdata - > min_partno )
2014-11-11 16:48:19 -05:00
mdata - > min_partno = pt - > partno + 1 ;
/* sparsely populated matrix, so allocate and initialize if NULL */
2018-12-03 12:40:13 -05:00
if ( ! mdata - > offmatrix [ pt - > sigid - 1 ] ) {
2024-01-09 17:17:48 -05:00
mdata - > offmatrix [ pt - > sigid - 1 ] = malloc ( pt - > parts * sizeof ( int32_t * ) ) ;
2018-12-03 12:40:13 -05:00
if ( ! mdata - > offmatrix [ pt - > sigid - 1 ] ) {
2014-11-11 16:48:19 -05:00
cli_errmsg ( " cli_ac_scanbuff: Can't allocate memory for mdata->offmatrix[%u] \n " , pt - > sigid - 1 ) ;
return CL_EMEM ;
}
2024-01-09 17:17:48 -05:00
mdata - > offmatrix [ pt - > sigid - 1 ] [ 0 ] = malloc ( pt - > parts * ( CLI_DEFAULT_AC_TRACKLEN + 2 ) * sizeof ( uint32_t ) ) ;
2018-12-03 12:40:13 -05:00
if ( ! mdata - > offmatrix [ pt - > sigid - 1 ] [ 0 ] ) {
2014-11-11 16:48:19 -05:00
cli_errmsg ( " cli_ac_scanbuff: Can't allocate memory for mdata->offmatrix[%u][0] \n " , pt - > sigid - 1 ) ;
free ( mdata - > offmatrix [ pt - > sigid - 1 ] ) ;
mdata - > offmatrix [ pt - > sigid - 1 ] = NULL ;
return CL_EMEM ;
}
2017-09-21 13:10:01 -04:00
memset ( mdata - > offmatrix [ pt - > sigid - 1 ] [ 0 ] , ( uint32_t ) - 1 , pt - > parts * ( CLI_DEFAULT_AC_TRACKLEN + 2 ) * sizeof ( uint32_t ) ) ;
2014-11-11 16:48:19 -05:00
mdata - > offmatrix [ pt - > sigid - 1 ] [ 0 ] [ 0 ] = 0 ;
2018-12-03 12:40:13 -05:00
for ( j = 1 ; j < pt - > parts ; j + + ) {
mdata - > offmatrix [ pt - > sigid - 1 ] [ j ] = mdata - > offmatrix [ pt - > sigid - 1 ] [ 0 ] + j * ( CLI_DEFAULT_AC_TRACKLEN + 2 ) ;
2014-11-11 16:48:19 -05:00
mdata - > offmatrix [ pt - > sigid - 1 ] [ j ] [ 0 ] = 0 ;
}
}
offmatrix = mdata - > offmatrix [ pt - > sigid - 1 ] ;
found = 0 ;
2018-12-03 12:40:13 -05:00
if ( pt - > partno ! = 1 ) {
for ( j = 1 ; ( j < = CLI_DEFAULT_AC_TRACKLEN + 1 ) & & ( offmatrix [ pt - > partno - 2 ] [ j ] ! = ( uint32_t ) - 1 ) ; j + + ) {
2014-11-11 16:48:19 -05:00
found = j ;
2018-12-03 12:40:13 -05:00
if ( realoff < offmatrix [ pt - > partno - 2 ] [ j ] )
2016-06-14 17:11:41 -04:00
found = 0 ;
2018-12-03 12:40:13 -05:00
if ( found & & pt - > maxdist )
if ( realoff - offmatrix [ pt - > partno - 2 ] [ j ] > pt - > maxdist )
2014-11-11 16:48:19 -05:00
found = 0 ;
2018-12-03 12:40:13 -05:00
if ( found & & pt - > mindist )
if ( realoff - offmatrix [ pt - > partno - 2 ] [ j ] < pt - > mindist )
2014-11-11 16:48:19 -05:00
found = 0 ;
2018-12-03 12:40:13 -05:00
if ( found )
2014-11-11 16:48:19 -05:00
break ;
}
}
2018-12-03 12:40:13 -05:00
if ( pt - > partno = = 2 & & found > 1 ) {
swp = offmatrix [ 0 ] [ 1 ] ;
offmatrix [ 0 ] [ 1 ] = offmatrix [ 0 ] [ found ] ;
2014-11-11 16:48:19 -05:00
offmatrix [ 0 ] [ found ] = swp ;
2018-12-03 12:40:13 -05:00
if ( pt - > type ! = CL_TYPE_MSEXE ) {
swp = offmatrix [ pt - > parts - 1 ] [ 1 ] ;
offmatrix [ pt - > parts - 1 ] [ 1 ] = offmatrix [ pt - > parts - 1 ] [ found ] ;
2014-11-11 16:48:19 -05:00
offmatrix [ pt - > parts - 1 ] [ found ] = swp ;
}
}
2018-12-03 12:40:13 -05:00
if ( pt - > partno = = 1 | | ( found & & ( pt - > partno ! = pt - > parts ) ) ) {
if ( offmatrix [ pt - > partno - 1 ] [ 0 ] = = CLI_DEFAULT_AC_TRACKLEN + 1 )
2014-11-11 16:48:19 -05:00
offmatrix [ pt - > partno - 1 ] [ 0 ] = 1 ; /* wrap, ends up at 2 */
offmatrix [ pt - > partno - 1 ] [ 0 ] + + ;
offmatrix [ pt - > partno - 1 ] [ offmatrix [ pt - > partno - 1 ] [ 0 ] ] = offset + matchend ;
2018-12-03 12:40:13 -05:00
if ( pt - > partno = = 1 ) /* save realoff for the first part */
2014-11-11 16:48:19 -05:00
offmatrix [ pt - > parts - 1 ] [ offmatrix [ pt - > partno - 1 ] [ 0 ] ] = realoff ;
2018-12-03 12:40:13 -05:00
} else if ( found & & pt - > partno = = pt - > parts ) {
if ( pt - > type ) {
2014-11-11 16:48:19 -05:00
2018-12-03 12:40:13 -05:00
if ( pt - > type = = CL_TYPE_IGNORED & & ( ! pt - > rtype | | ftype = = pt - > rtype ) )
2014-11-11 16:48:19 -05:00
return CL_TYPE_IGNORED ;
2022-07-28 16:03:34 -07:00
if ( ( pt - > type > type | | pt - > type > = CL_TYPE_SFX | | pt - > type = = CL_TYPE_MSEXE ) & &
( pt - > rtype = = CL_TYPE_ANY | | ftype = = pt - > rtype ) ) {
2014-11-11 16:48:19 -05:00
cli_dbgmsg ( " Matched signature for file type %s \n " , pt - > virname ) ;
type = pt - > type ;
Increase limit for finding PE files embedded in other PE files
I am seeing missed detections since we changed to prohibit embedded
file type identification when inside an embedded file.
In particular, I'm seeing this issue with PE files that contain multiple
other MSEXE as well as a variety of false positives for PE file headers.
For example, imagine a PE with four concatenated DLL's, like so:
```
[ EXE file | DLL #1 | DLL #2 | DLL #3 | DLL #4 ]
```
And note that false positives for embedded MSEXE files are fairly common.
So there may be a few mixed in there.
Before limiting embedded file identification we might interpret the file
structure something like this:
```
MSEXE: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: DLL #1: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: DLL #2: {
embedded MSEXE #1: DLL #3: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: false positive,
embedded MSEXE #5: DLL #4
}
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: false positive,
embedded MSEXE #5: false positive,
embedded MSEXE #6: DLL #4
}
embedded MSEXE #3: DLL #3,
embedded MSEXE #4: false positive,
embedded MSEXE #5: false positive,
embedded MSEXE #6: false positive,
embedded MSEXE #7: false positive,
embedded MSEXE #8: DLL #4
}
}
```
This is obviously terrible, which is why why we don't allow detecting
embedded files within other embedded files.
So after we enforce that limit, the same file may be interpreted like
this instead:
```
MSEXE: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: DLL #1,
embedded MSEXE #5: false positive,
embedded MSEXE #6: DLL #2,
embedded MSEXE #7: DLL #3,
embedded MSEXE #8: false positive,
embedded MSEXE #9: false positive,
embedded MSEXE #10: false positive,
embedded MSEXE #11: false positive,
embedded MSEXE #12: DLL #4
}
```
That's great! Except that we now exceed the "MAX_EMBEDDED_OBJ" limit
for embedded type matches (limit 10, but 12 found). That means we won't
see or extract the 4th DLL anymore.
My solution is to lift the limit when adding an matched MSEXE type.
We already do this for matched ZIPSFX types.
While doing this, I've significantly tidied up the limits checks to
make it more readble, and removed duplicate checks from within the
`ac_addtype()` function.
CLAM-2897
2025-10-12 16:05:17 -04:00
if ( ftoffset ! = NULL ) {
// Caller provided a pointer to record matched types.
bool too_many_types = false ;
bool supported_type = false ;
if ( * ftoffset ! = NULL ) {
// Have some type matches already. Check limits.
if ( ctx & & ( ( type = = CL_TYPE_ZIPSFX ) | |
( type = = CL_TYPE_MSEXE & & ftype = = CL_TYPE_MSEXE ) ) ) {
// When ctx present, limit the number of type matches using ctx->engine->maxfiles for specific types.
// Reasoning:
// ZIP local file header entries likely to be numerous if a single ZIP appended to the scanned file.
// MSEXE can contain many embedded MSEXE entries and MSEXE type false positives matches.
if ( ctx - > engine - > maxfiles = = 0 ) {
// Max-files limit is disabled.
} else if ( ( * ftoffset ) - > cnt > = ctx - > engine - > maxfiles ) {
if ( UNLIKELY ( cli_get_debug_flag ( ) ) ) {
cli_dbgmsg ( " ac_addtype: Can't add %s type at offset " STDu64 " to list of embedded type matches. Reached maxfiles limit of %u \n " , cli_ftname ( type ) , ( * ftoffset ) - > offset , ctx - > engine - > maxfiles ) ;
}
too_many_types = true ;
}
} else {
// Limit the number of type matches using MAX_EMBEDDED_OBJ.
if ( ( * ftoffset ) - > cnt > = MAX_EMBEDDED_OBJ ) {
if ( UNLIKELY ( cli_get_debug_flag ( ) ) ) {
cli_dbgmsg ( " ac_addtype: Can't add %s type at offset " STDu64 " to list of embedded type matches. Reached MAX_EMBEDDED_OBJ limit of %u \n " , cli_ftname ( type ) , ( * ftoffset ) - > offset , MAX_EMBEDDED_OBJ ) ;
}
too_many_types = true ;
}
}
}
// Filter to supported types.
if (
// Found type is MBR.
type = = CL_TYPE_MBR | |
// Found type is any SFX type (i.e., ZIPSFX, RARSFX, 7ZSSFX, etc.).
type > = CL_TYPE_SFX | |
// Found type is an MSEXE, but only if host file type is one of MSEXE, ZIP, or MSOLE2.
( type = = CL_TYPE_MSEXE & & ( ftype = = CL_TYPE_MSEXE | | ftype = = CL_TYPE_ZIP | | ftype = = CL_TYPE_MSOLE2 ) ) ) {
supported_type = true ;
}
if ( supported_type & & ! too_many_types ) {
/* FIXME: the first offset in the array is most likely the correct one but
* it may happen it is not
* Until we ' re certain and can fix this , we add all offsets in the list .
*/
for ( j = 1 ; j < = CLI_DEFAULT_AC_TRACKLEN + 1 & & offmatrix [ 0 ] [ j ] ! = ( uint32_t ) - 1 ; j + + ) {
ret = ac_addtype ( ftoffset , type , offmatrix [ pt - > parts - 1 ] [ j ] , ctx ) ;
if ( CL_SUCCESS ! = ret ) {
return ret ;
}
}
}
2014-11-11 16:48:19 -05:00
}
2017-09-21 13:10:01 -04:00
memset ( offmatrix [ 0 ] , ( uint32_t ) - 1 , pt - > parts * ( CLI_DEFAULT_AC_TRACKLEN + 2 ) * sizeof ( uint32_t ) ) ;
2018-12-03 12:40:13 -05:00
for ( j = 0 ; j < pt - > parts ; j + + )
2014-11-11 16:48:19 -05:00
offmatrix [ j ] [ 0 ] = 0 ;
}
} else { /* !pt->type */
2018-12-03 12:40:13 -05:00
if ( pt - > lsigid [ 0 ] ) {
2015-03-30 17:12:01 -04:00
rc = lsig_sub_matched ( root , mdata , pt - > lsigid [ 1 ] , pt - > lsigid [ 2 ] , offmatrix [ pt - > parts - 1 ] [ 1 ] , 1 ) ;
if ( rc ! = CL_SUCCESS )
return rc ;
2015-02-09 19:28:39 -08:00
ptN = ptN - > next_same ;
2014-11-11 16:48:19 -05:00
continue ;
}
2018-12-03 12:40:13 -05:00
if ( res ) {
newres = ( struct cli_ac_result * ) malloc ( sizeof ( struct cli_ac_result ) ) ;
if ( ! newres ) {
2014-11-11 16:48:19 -05:00
cli_errmsg ( " cli_ac_scanbuff: Can't allocate memory for newres %lu \n " , ( unsigned long ) sizeof ( struct cli_ac_result ) ) ;
return CL_EMEM ;
}
2018-12-03 12:40:13 -05:00
newres - > virname = pt - > virname ;
2014-11-11 16:48:19 -05:00
newres - > customdata = pt - > customdata ;
2018-12-03 12:40:13 -05:00
newres - > next = * res ;
newres - > offset = ( off_t ) offmatrix [ pt - > parts - 1 ] [ 1 ] ;
* res = newres ;
2014-11-11 16:48:19 -05:00
2015-02-09 19:28:39 -08:00
ptN = ptN - > next_same ;
2014-11-11 16:48:19 -05:00
continue ;
} else {
2018-12-03 12:40:13 -05:00
if ( ctx & & SCAN_ALLMATCHES ) {
2020-02-10 16:14:47 -05:00
ret = cli_append_virus ( ctx , ( const char * ) pt - > virname ) ;
2021-06-18 12:56:38 -07:00
if ( ret = = CL_VIRUS ) {
2020-02-10 16:14:47 -05:00
viruses_found = 1 ;
}
2014-11-11 16:48:19 -05:00
}
if ( virname )
* virname = pt - > virname ;
2018-12-03 12:40:13 -05:00
if ( customdata )
2014-11-11 16:48:19 -05:00
* customdata = pt - > customdata ;
2018-07-20 22:28:48 -04:00
if ( ! ctx | | ! SCAN_ALLMATCHES )
2014-11-11 16:48:19 -05:00
return CL_VIRUS ;
2015-02-09 19:28:39 -08:00
ptN = ptN - > next_same ;
2014-11-11 16:48:19 -05:00
continue ;
}
}
}
} else { /* old type signature */
2018-12-03 12:40:13 -05:00
if ( pt - > type ) {
2022-07-28 16:03:34 -07:00
if ( pt - > type = = CL_TYPE_IGNORED & & ( pt - > rtype = = CL_TYPE_ANY | | ftype = = pt - > rtype ) )
2014-11-11 16:48:19 -05:00
return CL_TYPE_IGNORED ;
2022-07-28 16:03:34 -07:00
if ( ( pt - > type > type | | pt - > type > = CL_TYPE_SFX | | pt - > type = = CL_TYPE_MSEXE ) & &
( pt - > rtype = = CL_TYPE_ANY | | ftype = = pt - > rtype ) ) {
2014-11-11 16:48:19 -05:00
cli_dbgmsg ( " Matched signature for file type %s at %u \n " , pt - > virname , realoff ) ;
type = pt - > type ;
Increase limit for finding PE files embedded in other PE files
I am seeing missed detections since we changed to prohibit embedded
file type identification when inside an embedded file.
In particular, I'm seeing this issue with PE files that contain multiple
other MSEXE as well as a variety of false positives for PE file headers.
For example, imagine a PE with four concatenated DLL's, like so:
```
[ EXE file | DLL #1 | DLL #2 | DLL #3 | DLL #4 ]
```
And note that false positives for embedded MSEXE files are fairly common.
So there may be a few mixed in there.
Before limiting embedded file identification we might interpret the file
structure something like this:
```
MSEXE: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: DLL #1: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: DLL #2: {
embedded MSEXE #1: DLL #3: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: false positive,
embedded MSEXE #5: DLL #4
}
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: false positive,
embedded MSEXE #5: false positive,
embedded MSEXE #6: DLL #4
}
embedded MSEXE #3: DLL #3,
embedded MSEXE #4: false positive,
embedded MSEXE #5: false positive,
embedded MSEXE #6: false positive,
embedded MSEXE #7: false positive,
embedded MSEXE #8: DLL #4
}
}
```
This is obviously terrible, which is why why we don't allow detecting
embedded files within other embedded files.
So after we enforce that limit, the same file may be interpreted like
this instead:
```
MSEXE: {
embedded MSEXE #1: false positive,
embedded MSEXE #2: false positive,
embedded MSEXE #3: false positive,
embedded MSEXE #4: DLL #1,
embedded MSEXE #5: false positive,
embedded MSEXE #6: DLL #2,
embedded MSEXE #7: DLL #3,
embedded MSEXE #8: false positive,
embedded MSEXE #9: false positive,
embedded MSEXE #10: false positive,
embedded MSEXE #11: false positive,
embedded MSEXE #12: DLL #4
}
```
That's great! Except that we now exceed the "MAX_EMBEDDED_OBJ" limit
for embedded type matches (limit 10, but 12 found). That means we won't
see or extract the 4th DLL anymore.
My solution is to lift the limit when adding an matched MSEXE type.
We already do this for matched ZIPSFX types.
While doing this, I've significantly tidied up the limits checks to
make it more readble, and removed duplicate checks from within the
`ac_addtype()` function.
CLAM-2897
2025-10-12 16:05:17 -04:00
if ( ftoffset ! = NULL ) {
// Caller provided a pointer to record matched types.
bool too_many_types = false ;
bool supported_type = false ;
if ( * ftoffset ! = NULL ) {
// Have some type matches already. Check limits.
if ( ctx & & ( ( type = = CL_TYPE_ZIPSFX ) | |
( type = = CL_TYPE_MSEXE & & ftype = = CL_TYPE_MSEXE ) ) ) {
// When ctx present, limit the number of type matches using ctx->engine->maxfiles for specific types.
// Reasoning:
// ZIP local file header entries likely to be numerous if a single ZIP appended to the scanned file.
// MSEXE can contain many embedded MSEXE entries and MSEXE type false positives matches.
if ( ctx - > engine - > maxfiles = = 0 ) {
// Max-files limit is disabled.
} else if ( ( * ftoffset ) - > cnt > = ctx - > engine - > maxfiles ) {
if ( UNLIKELY ( cli_get_debug_flag ( ) ) ) {
cli_dbgmsg ( " ac_addtype: Can't add %s type at offset " STDu64 " to list of embedded type matches. Reached maxfiles limit of %u \n " , cli_ftname ( type ) , ( * ftoffset ) - > offset , ctx - > engine - > maxfiles ) ;
}
too_many_types = true ;
}
} else {
// Limit the number of type matches using MAX_EMBEDDED_OBJ.
if ( ( * ftoffset ) - > cnt > = MAX_EMBEDDED_OBJ ) {
if ( UNLIKELY ( cli_get_debug_flag ( ) ) ) {
cli_dbgmsg ( " ac_addtype: Can't add %s type at offset " STDu64 " to list of embedded type matches. Reached MAX_EMBEDDED_OBJ limit of %u \n " , cli_ftname ( type ) , ( * ftoffset ) - > offset , MAX_EMBEDDED_OBJ ) ;
}
too_many_types = true ;
}
}
}
// Filter to supported types.
if (
// Found type is MBR.
type = = CL_TYPE_MBR | |
// Found type is any SFX type (i.e., ZIPSFX, RARSFX, 7ZSSFX, etc.).
type > = CL_TYPE_SFX | |
// Found type is an MSEXE, but only if host file type is one of MSEXE, ZIP, or MSOLE2.
( type = = CL_TYPE_MSEXE & & ( ftype = = CL_TYPE_MSEXE | | ftype = = CL_TYPE_ZIP | | ftype = = CL_TYPE_MSOLE2 ) ) ) {
supported_type = true ;
}
if ( supported_type & & ! too_many_types ) {
ret = ac_addtype ( ftoffset , type , realoff , ctx ) ;
if ( CL_SUCCESS ! = ret ) {
return ret ;
}
}
2014-11-11 16:48:19 -05:00
}
}
} else {
2018-12-03 12:40:13 -05:00
if ( pt - > lsigid [ 0 ] ) {
2015-03-30 17:12:01 -04:00
rc = lsig_sub_matched ( root , mdata , pt - > lsigid [ 1 ] , pt - > lsigid [ 2 ] , realoff , 0 ) ;
if ( rc ! = CL_SUCCESS )
return rc ;
2015-02-09 19:28:39 -08:00
ptN = ptN - > next_same ;
2014-11-11 16:48:19 -05:00
continue ;
}
2018-12-03 12:40:13 -05:00
if ( res ) {
newres = ( struct cli_ac_result * ) malloc ( sizeof ( struct cli_ac_result ) ) ;
if ( ! newres ) {
2014-11-11 16:48:19 -05:00
cli_errmsg ( " cli_ac_scanbuff: Can't allocate memory for newres %lu \n " , ( unsigned long ) sizeof ( struct cli_ac_result ) ) ;
return CL_EMEM ;
}
2018-12-03 12:40:13 -05:00
newres - > virname = pt - > virname ;
2014-11-11 16:48:19 -05:00
newres - > customdata = pt - > customdata ;
2018-12-03 12:40:13 -05:00
newres - > offset = ( off_t ) realoff ;
newres - > next = * res ;
* res = newres ;
2014-11-11 16:48:19 -05:00
2015-02-09 19:28:39 -08:00
ptN = ptN - > next_same ;
2014-11-11 16:48:19 -05:00
continue ;
} else {
2018-12-03 12:40:13 -05:00
if ( ctx & & SCAN_ALLMATCHES ) {
2020-02-10 16:14:47 -05:00
ret = cli_append_virus ( ctx , ( const char * ) pt - > virname ) ;
2021-06-18 12:56:38 -07:00
if ( ret = = CL_VIRUS ) {
2020-02-10 16:14:47 -05:00
viruses_found = 1 ;
}
2014-11-11 16:48:19 -05:00
}
if ( virname )
* virname = pt - > virname ;
2018-12-03 12:40:13 -05:00
if ( customdata )
2014-11-11 16:48:19 -05:00
* customdata = pt - > customdata ;
2018-07-20 22:28:48 -04:00
if ( ! ctx | | ! SCAN_ALLMATCHES )
2014-11-11 16:48:19 -05:00
return CL_VIRUS ;
2015-02-09 19:28:39 -08:00
ptN = ptN - > next_same ;
2014-11-11 16:48:19 -05:00
continue ;
}
}
}
2015-02-09 19:28:39 -08:00
ptN = ptN - > next_same ;
2014-11-11 16:48:19 -05:00
}
}
2015-02-09 19:28:39 -08:00
pattN = pattN - > next ;
2014-11-11 16:48:19 -05:00
}
}
2004-07-19 17:54:40 +00:00
}
2013-06-13 15:01:39 -04:00
if ( viruses_found )
2014-11-11 16:48:19 -05:00
return CL_VIRUS ;
2008-02-20 22:04:48 +00:00
return ( mode & AC_SCAN_FT ) ? type : CL_CLEAN ;
2004-07-19 17:54:40 +00:00
}
2007-04-28 18:40:59 +00:00
2015-05-21 15:04:22 -04:00
static int qcompare_byte ( const void * a , const void * b )
2009-09-09 22:50:24 +02:00
{
2010-01-27 16:06:12 +01:00
return * ( const unsigned char * ) a - * ( const unsigned char * ) b ;
2009-09-09 22:50:24 +02:00
}
2015-05-21 15:04:22 -04:00
static int qcompare_fstr ( const void * arg , const void * a , const void * b )
{
uint16_t len = * ( uint16_t * ) arg ;
return memcmp ( * ( const unsigned char * * ) a , * ( const unsigned char * * ) b , len ) ;
}
2015-05-11 11:55:43 -04:00
/* returns if level of nesting, end set to MATCHING paren, start AFTER staring paren */
2018-11-16 11:50:48 -08:00
inline static size_t find_paren_end ( char * hexstr , char * * end )
2015-05-11 11:55:43 -04:00
{
2018-11-16 11:50:48 -08:00
size_t i ;
size_t nest = 0 , level = 0 ;
2015-05-11 11:55:43 -04:00
* end = NULL ;
for ( i = 0 ; i < strlen ( hexstr ) ; i + + ) {
2015-05-14 12:23:56 -04:00
if ( hexstr [ i ] = = ' ( ' ) {
nest + + ;
level + + ;
} else if ( hexstr [ i ] = = ' ) ' ) {
if ( ! level ) {
* end = & hexstr [ i ] ;
break ;
}
level - - ;
}
2015-05-11 11:55:43 -04:00
}
return nest ;
}
2015-05-13 17:48:22 -04:00
/* analyzes expr, returns number of subexpr, if fixed length subexpr and longest subexpr len *
* goes to either end of string or to closing parenthesis ; allowed to be unbalanced *
* counts applied to start of expr ( not end , i . e . numexpr starts at 1 for the first expr */
inline static int ac_analyze_expr ( char * hexstr , int * fixed_len , int * sub_len )
2015-05-11 11:55:43 -04:00
{
2017-09-21 13:10:01 -04:00
unsigned long i ;
int level = 0 , len = 0 , numexpr = 1 ;
2015-05-13 17:48:22 -04:00
int flen , slen ;
2015-05-11 11:55:43 -04:00
2015-05-13 17:48:22 -04:00
flen = 1 ;
slen = 0 ;
2015-05-11 11:55:43 -04:00
for ( i = 0 ; i < strlen ( hexstr ) ; i + + ) {
2015-05-14 12:23:56 -04:00
if ( hexstr [ i ] = = ' ( ' ) {
flen = 0 ;
level + + ;
} else if ( hexstr [ i ] = = ' ) ' ) {
if ( ! level ) {
if ( ! slen ) {
slen = len ;
} else if ( len ! = slen ) {
flen = 0 ;
if ( len > slen )
slen = len ;
}
break ;
}
level - - ;
}
if ( ! level & & hexstr [ i ] = = ' | ' ) {
if ( ! slen ) {
slen = len ;
} else if ( len ! = slen ) {
flen = 0 ;
if ( len > slen )
slen = len ;
}
len = 0 ;
numexpr + + ;
} else {
2015-05-18 09:59:04 -04:00
if ( hexstr [ i ] = = ' ? ' )
flen = 0 ;
2015-05-14 12:23:56 -04:00
len + + ;
}
2015-05-11 11:55:43 -04:00
}
2015-09-01 16:13:00 -04:00
if ( ! slen ) {
2015-05-14 12:23:56 -04:00
slen = len ;
2015-09-01 16:13:00 -04:00
} else if ( len ! = slen ) {
flen = 0 ;
if ( len > slen )
slen = len ;
}
2015-05-13 17:48:22 -04:00
if ( sub_len )
2015-05-14 12:23:56 -04:00
* sub_len = slen ;
2015-05-13 17:48:22 -04:00
if ( fixed_len )
2015-05-14 12:23:56 -04:00
* fixed_len = flen ;
2015-05-11 11:55:43 -04:00
return numexpr ;
}
2015-05-21 18:46:34 -04:00
inline static int ac_uicmp ( uint16_t * a , size_t alen , uint16_t * b , size_t blen , int * wild )
{
2017-09-21 13:10:01 -04:00
uint16_t awild , bwild , side_wild ;
2015-05-21 18:46:34 -04:00
size_t i , minlen = MIN ( alen , blen ) ;
side_wild = 0 ;
for ( i = 0 ; i < minlen ; i + + ) {
awild = a [ i ] & CLI_MATCH_WILDCARD ;
bwild = b [ i ] & CLI_MATCH_WILDCARD ;
if ( awild = = bwild ) {
switch ( awild ) {
2018-12-03 12:40:13 -05:00
case CLI_MATCH_CHAR :
if ( ( a [ i ] & 0xff ) ! = ( b [ i ] & 0xff ) ) {
return ( b [ i ] & 0xff ) - ( a [ i ] & 0xff ) ;
}
break ;
2015-05-21 18:46:34 -04:00
case CLI_MATCH_IGNORE :
break ;
case CLI_MATCH_NIBBLE_HIGH :
if ( ( a [ i ] & 0xf0 ) ! = ( b [ i ] & 0xf0 ) ) {
2018-12-03 12:40:13 -05:00
return ( b [ i ] & 0xf0 ) - ( a [ i ] & 0xf0 ) ;
2015-05-21 18:46:34 -04:00
}
break ;
case CLI_MATCH_NIBBLE_LOW :
if ( ( a [ i ] & 0x0f ) ! = ( b [ i ] & 0x0f ) ) {
2018-12-03 12:40:13 -05:00
return ( b [ i ] & 0x0f ) - ( a [ i ] & 0x0f ) ;
2015-05-21 18:46:34 -04:00
}
break ;
default :
cli_errmsg ( " ac_uicmp: unhandled wildcard type \n " ) ;
2018-12-03 12:40:13 -05:00
return 1 ;
}
} else { /* not identical wildcard types */
if ( awild = = CLI_MATCH_CHAR ) { /* b is only wild */
switch ( bwild ) {
case CLI_MATCH_IGNORE :
side_wild | = 2 ;
break ;
case CLI_MATCH_NIBBLE_HIGH :
if ( ( a [ i ] & 0xf0 ) ! = ( b [ i ] & 0xf0 ) ) {
return ( b [ i ] & 0xf0 ) - ( a [ i ] & 0xff ) ;
}
side_wild | = 2 ;
break ;
case CLI_MATCH_NIBBLE_LOW :
if ( ( a [ i ] & 0x0f ) ! = ( b [ i ] & 0x0f ) ) {
return ( b [ i ] & 0x0f ) - ( a [ i ] & 0xff ) ;
}
side_wild | = 2 ;
break ;
default :
cli_errmsg ( " ac_uicmp: unhandled wildcard type \n " ) ;
return - 1 ;
2015-05-21 18:46:34 -04:00
}
} else if ( bwild = = CLI_MATCH_CHAR ) { /* a is only wild */
switch ( awild ) {
2018-12-03 12:40:13 -05:00
case CLI_MATCH_IGNORE :
side_wild | = 1 ;
break ;
case CLI_MATCH_NIBBLE_HIGH :
if ( ( a [ i ] & 0xf0 ) ! = ( b [ i ] & 0xf0 ) ) {
return ( b [ i ] & 0xff ) - ( a [ i ] & 0xf0 ) ;
}
side_wild | = 1 ;
break ;
case CLI_MATCH_NIBBLE_LOW :
if ( ( a [ i ] & 0x0f ) ! = ( b [ i ] & 0x0f ) ) {
return ( b [ i ] & 0xff ) - ( a [ i ] & 0x0f ) ;
}
side_wild | = 1 ;
break ;
default :
cli_errmsg ( " ac_uicmp: unhandled wild typing \n " ) ;
return 1 ;
2015-05-21 18:46:34 -04:00
}
} else { /* not identical, both wildcards */
if ( awild = = CLI_MATCH_IGNORE | | bwild = = CLI_MATCH_IGNORE ) {
if ( awild = = CLI_MATCH_IGNORE ) {
side_wild | = 1 ;
2018-12-03 12:40:13 -05:00
} else if ( bwild = = CLI_MATCH_IGNORE ) {
2015-05-21 18:46:34 -04:00
side_wild | = 2 ;
}
} else {
/* only high and low nibbles should be left here */
side_wild | = 3 ;
}
}
}
/* both sides contain a wildcard that contains the other, therefore unique by wildcards */
if ( side_wild = = 3 )
return 1 ;
}
if ( wild )
* wild = side_wild ;
return 0 ;
}
2015-05-13 17:48:22 -04:00
/* add new generic alternate node to special */
2015-05-19 12:04:59 -04:00
inline static int ac_addspecial_add_alt_node ( const char * subexpr , uint8_t sigopts , struct cli_ac_special * special , struct cli_matcher * root )
2015-05-11 11:55:43 -04:00
{
2022-09-30 10:43:55 -07:00
struct cli_alt_node * newnode = NULL ;
struct cli_alt_node * * prev = NULL ;
struct cli_alt_node * ins = NULL ;
uint16_t * s = NULL ;
int i = 0 ;
int cmp = 0 ;
int wild = 0 ;
2015-05-13 17:48:22 -04:00
2019-05-03 18:16:03 -04:00
# ifndef USE_MPOOL
UNUSEDPARAM ( root ) ;
# endif
newnode = ( struct cli_alt_node * ) MPOOL_CALLOC ( root - > mempool , 1 , sizeof ( struct cli_alt_node ) ) ;
2015-05-13 17:48:22 -04:00
if ( ! newnode ) {
2015-05-14 12:23:56 -04:00
cli_errmsg ( " ac_addspecial_add_alt_node: Can't allocate new alternate node \n " ) ;
return CL_EMEM ;
2015-05-13 17:48:22 -04:00
}
2019-05-03 18:16:03 -04:00
s = CLI_MPOOL_HEX2UI ( root - > mempool , subexpr ) ;
2015-05-18 09:59:04 -04:00
if ( ! s ) {
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , newnode ) ;
2015-05-14 12:23:56 -04:00
return CL_EMALFDB ;
2015-05-13 17:48:22 -04:00
}
2018-12-03 12:40:13 -05:00
newnode - > str = s ;
newnode - > len = ( uint16_t ) strlen ( subexpr ) / 2 ;
2015-05-21 18:46:34 -04:00
newnode - > unique = 1 ;
2015-05-13 17:48:22 -04:00
2015-05-19 12:04:59 -04:00
/* setting nocase match */
if ( sigopts & ACPATT_OPTION_NOCASE ) {
for ( i = 0 ; i < newnode - > len ; + + i )
if ( ( newnode - > str [ i ] & CLI_MATCH_METADATA ) = = CLI_MATCH_CHAR ) {
2019-08-22 16:51:01 -04:00
newnode - > str [ i ] = CLI_NOCASE ( newnode - > str [ i ] & 0xff ) ;
2015-05-19 12:04:59 -04:00
newnode - > str [ i ] + = CLI_MATCH_NOCASE ;
}
}
2015-05-21 18:46:34 -04:00
/* search for uniqueness, TODO: directed acyclic word graph */
2015-05-13 17:48:22 -04:00
prev = & ( ( special - > alt ) . v_str ) ;
2018-12-03 12:40:13 -05:00
ins = ( special - > alt ) . v_str ;
2015-05-13 17:48:22 -04:00
while ( ins ) {
2015-05-21 18:46:34 -04:00
cmp = ac_uicmp ( ins - > str , ins - > len , newnode - > str , newnode - > len , & wild ) ;
2015-05-22 10:51:48 -04:00
if ( cmp = = 0 ) {
if ( newnode - > len ! = ins - > len ) { /* derivative */
2015-05-21 18:46:34 -04:00
newnode - > unique = 0 ;
2018-12-03 12:40:13 -05:00
ins - > unique = 0 ;
2015-05-22 10:51:48 -04:00
} else if ( wild = = 0 ) { /* duplicate */
2022-04-19 18:46:27 -04:00
MPOOL_FREE ( root - > mempool , newnode - > str ) ;
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , newnode ) ;
2015-05-14 12:23:56 -04:00
return CL_SUCCESS ;
}
2015-05-21 18:46:34 -04:00
} /* TODO - possible sorting of altstr uniques and derivative groups? */
2015-05-14 12:23:56 -04:00
prev = & ( ins - > next ) ;
2018-12-03 12:40:13 -05:00
ins = ins - > next ;
2015-05-11 11:55:43 -04:00
}
2018-12-03 12:40:13 -05:00
* prev = newnode ;
2015-05-13 17:48:22 -04:00
newnode - > next = ins ;
2015-07-01 16:53:07 -04:00
if ( ( special - > num = = 0 ) | | ( newnode - > len < special - > len [ 0 ] ) )
special - > len [ 0 ] = newnode - > len ;
if ( ( special - > num = = 0 ) | | ( newnode - > len > special - > len [ 1 ] ) )
special - > len [ 1 ] = newnode - > len ;
2015-05-13 17:48:22 -04:00
special - > num + + ;
return CL_SUCCESS ;
}
2015-05-11 11:55:43 -04:00
2015-05-13 17:48:22 -04:00
/* recursive special handler for expanding and adding generic alternates */
2015-05-19 12:04:59 -04:00
static int ac_special_altexpand ( char * hexpr , char * subexpr , uint16_t maxlen , int lvl , int maxlvl , uint8_t sigopts , struct cli_ac_special * special , struct cli_matcher * root )
2015-05-13 17:48:22 -04:00
{
int ret , scnt = 0 , numexpr ;
char * ept , * sexpr , * end , term ;
char * fp ;
ept = sexpr = hexpr ;
2018-12-03 12:40:13 -05:00
fp = subexpr + strlen ( subexpr ) ;
2015-05-13 17:48:22 -04:00
numexpr = ac_analyze_expr ( hexpr , NULL , NULL ) ;
/* while there are expressions to resolve */
while ( scnt < numexpr ) {
2015-05-14 12:23:56 -04:00
scnt + + ;
while ( ( * ept ! = ' ( ' ) & & ( * ept ! = ' | ' ) & & ( * ept ! = ' ) ' ) & & ( * ept ! = ' \0 ' ) )
ept + + ;
/* check for invalid negation */
term = * ept ;
2018-12-03 12:40:13 -05:00
if ( ( * ept = = ' ( ' ) & & ( ept > = hexpr + 1 ) ) {
2015-05-14 12:23:56 -04:00
if ( ept [ - 1 ] = = ' ! ' ) {
cli_errmsg ( " ac_special_altexpand: Generic alternates cannot contain negations \n " ) ;
return CL_EMALFDB ;
}
}
/* appended token */
* ept = 0 ;
if ( cli_strlcat ( subexpr , sexpr , maxlen ) > = maxlen ) {
cli_errmsg ( " ac_special_altexpand: Unexpected expression larger than expected \n " ) ;
return CL_EMEM ;
}
2015-05-22 10:51:48 -04:00
* ept + + = term ;
2018-12-03 12:40:13 -05:00
sexpr = ept ;
2015-05-14 12:23:56 -04:00
if ( term = = ' | ' ) {
if ( lvl = = 0 ) {
2015-05-19 12:04:59 -04:00
if ( ( ret = ac_addspecial_add_alt_node ( subexpr , sigopts , special , root ) ) ! = CL_SUCCESS )
2015-05-14 12:23:56 -04:00
return ret ;
} else {
find_paren_end ( ept , & end ) ;
if ( ! end ) {
cli_errmsg ( " ac_special_altexpand: Missing closing parenthesis \n " ) ;
return CL_EMALFDB ;
}
end + + ;
2018-12-03 12:40:13 -05:00
if ( ( ret = ac_special_altexpand ( end , subexpr , maxlen , lvl - 1 , lvl , sigopts , special , root ) ) ! = CL_SUCCESS )
2015-05-14 12:23:56 -04:00
return ret ;
}
* fp = 0 ;
} else if ( term = = ' ) ' ) {
if ( lvl = = 0 ) {
cli_errmsg ( " ac_special_altexpand: Unexpected closing parenthesis \n " ) ;
return CL_EPARSE ;
}
2018-12-03 12:40:13 -05:00
if ( ( ret = ac_special_altexpand ( ept , subexpr , maxlen , lvl - 1 , lvl , sigopts , special , root ) ) ! = CL_SUCCESS )
2015-05-14 12:23:56 -04:00
return ret ;
break ;
} else if ( term = = ' ( ' ) {
int inner , found ;
find_paren_end ( ept , & end ) ;
if ( ! end ) {
cli_errmsg ( " ac_special_altexpand: Missing closing parenthesis \n " ) ;
return CL_EMALFDB ;
}
end + + ;
2018-12-03 12:40:13 -05:00
if ( ( ret = ac_special_altexpand ( ept , subexpr , maxlen , lvl + 1 , lvl + 1 , sigopts , special , root ) ) ! = CL_SUCCESS )
2015-05-14 12:23:56 -04:00
return ret ;
2015-05-22 10:51:48 -04:00
/* move ept to end of current alternate expression (recursive call already populates them) */
2018-12-03 12:40:13 -05:00
ept = end ;
2015-05-14 12:23:56 -04:00
inner = 0 ;
found = 0 ;
while ( ! found & & * ept ! = ' \0 ' ) {
2018-12-03 12:40:13 -05:00
switch ( * ept ) {
case ' | ' :
if ( ! inner )
found = 1 ;
break ;
case ' ( ' :
inner + + ;
break ;
case ' ) ' :
inner - - ;
break ;
2015-05-14 12:23:56 -04:00
}
ept + + ;
}
if ( * ept = = ' | ' )
ept + + ;
sexpr = ept ;
2018-12-03 12:40:13 -05:00
* fp = 0 ;
2015-05-14 12:23:56 -04:00
} else if ( term = = ' \0 ' ) {
2015-05-19 12:04:59 -04:00
if ( ( ret = ac_addspecial_add_alt_node ( subexpr , sigopts , special , root ) ) ! = CL_SUCCESS )
2015-05-14 12:23:56 -04:00
return ret ;
break ;
}
if ( lvl ! = maxlvl )
return CL_SUCCESS ;
2015-05-11 11:55:43 -04:00
}
2015-05-13 17:48:22 -04:00
if ( scnt ! = numexpr ) {
2015-05-14 12:23:56 -04:00
cli_errmsg ( " ac_addspecial: Mismatch in parsed and expected signature \n " ) ;
return CL_EMALFDB ;
2015-05-13 17:48:22 -04:00
}
2015-05-11 11:55:43 -04:00
return CL_SUCCESS ;
}
2015-05-13 17:48:22 -04:00
/* alternate string specials (so many specials!) */
2015-05-19 12:04:59 -04:00
inline static int ac_special_altstr ( const char * hexpr , uint8_t sigopts , struct cli_ac_special * special , struct cli_matcher * root )
2015-05-11 11:55:43 -04:00
{
2015-05-13 17:48:22 -04:00
char * hexprcpy , * h , * c ;
libclamav: Fix scan recursion tracking
Scan recursion is the process of identifying files embedded in other
files and then scanning them, recursively.
Internally this process is more complex than it may sound because a file
may have multiple layers of types before finding a new "file".
At present we treat the recursion count in the scanning context as an
index into both our fmap list AND our container list. These two lists
are conceptually a part of the same thing and should be unified.
But what's concerning is that the "recursion level" isn't actually
incremented or decremented at the same time that we add a layer to the
fmap or container lists but instead is more touchy-feely, increasing
when we find a new "file".
To account for this shadiness, the size of the fmap and container lists
has always been a little longer than our "max scan recursion" limit so
we don't accidentally overflow the fmap or container arrays (!).
I've implemented a single recursion-stack as an array, similar to before,
which includes a pointer to each fmap at each layer, along with the size
and type. Push and pop functions add and remove layers whenever a new
fmap is added. A boolean argument when pushing indicates if the new layer
represents a new buffer or new file (descriptor). A new buffer will reset
the "nested fmap level" (described below).
This commit also provides a solution for an issue where we detect
embedded files more than once during scan recursion.
For illustration, imagine a tarball named foo.tar.gz with this structure:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │ └── hola.txt | ASCII | 3 | 0 |
| └── baz.exe | PE | 2 | 1 |
But suppose baz.exe embeds a ZIP archive and a 7Z archive, like this:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| baz.exe | PE | 0 | 0 |
| ├── sfx.zip | ZIP | 1 | 1 |
| │ └── hello.txt | ASCII | 2 | 0 |
| └── sfx.7z | 7Z | 1 | 1 |
| └── world.txt | ASCII | 2 | 0 |
(A) If we scan for embedded files at any layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| ├── foo.tar | TAR | 1 | 0 |
| │ ├── bar.zip | ZIP | 2 | 1 |
| │ │ └── hola.txt | ASCII | 3 | 0 |
| │ ├── baz.exe | PE | 2 | 1 |
| │ │ ├── sfx.zip | ZIP | 3 | 1 |
| │ │ │ └── hello.txt | ASCII | 4 | 0 |
| │ │ └── sfx.7z | 7Z | 3 | 1 |
| │ │ └── world.txt | ASCII | 4 | 0 |
| │ ├── sfx.zip | ZIP | 2 | 1 |
| │ │ └── hello.txt | ASCII | 3 | 0 |
| │ └── sfx.7z | 7Z | 2 | 1 |
| │ └── world.txt | ASCII | 3 | 0 |
| ├── sfx.zip | ZIP | 1 | 1 |
| └── sfx.7z | 7Z | 1 | 1 |
(A) is bad because it scans content more than once.
Note that for the GZ layer, it may detect the ZIP and 7Z if the
signature hits on the compressed data, which it might, though
extracting the ZIP and 7Z will likely fail.
The reason the above doesn't happen now is that we restrict embedded
type scans for a bunch of archive formats to include GZ and TAR.
(B) If we scan for embedded files at the foo.tar layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │ └── hola.txt | ASCII | 3 | 0 |
| ├── baz.exe | PE | 2 | 1 |
| ├── sfx.zip | ZIP | 2 | 1 |
| │ └── hello.txt | ASCII | 3 | 0 |
| └── sfx.7z | 7Z | 2 | 1 |
| └── world.txt | ASCII | 3 | 0 |
(B) is almost right. But we can achieve it easily enough only scanning for
embedded content in the current fmap when the "nested fmap level" is 0.
The upside is that it should safely detect all embedded content, even if
it may think the sfz.zip and sfx.7z are in foo.tar instead of in baz.exe.
The biggest risk I can think of affects ZIPs. SFXZIP detection
is identical to ZIP detection, which is why we don't allow SFXZIP to be
detected if insize of a ZIP. If we only allow embedded type scanning at
fmap-layer 0 in each buffer, this will fail to detect the embedded ZIP
if the bar.exe was not compressed in foo.zip and if non-compressed files
extracted from ZIPs aren't extracted as new buffers:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.zip | ZIP | 0 | 0 |
| └── bar.exe | PE | 1 | 1 |
| └── sfx.zip | ZIP | 2 | 2 |
Provided that we ensure all files extracted from zips are scanned in
new buffers, option (B) should be safe.
(C) If we scan for embedded files at the baz.exe layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │ └── hola.txt | ASCII | 3 | 0 |
| └── baz.exe | PE | 2 | 1 |
| ├── sfx.zip | ZIP | 3 | 1 |
| │ └── hello.txt | ASCII | 4 | 0 |
| └── sfx.7z | 7Z | 3 | 1 |
| └── world.txt | ASCII | 4 | 0 |
(C) is right. But it's harder to achieve. For this example we can get it by
restricting 7ZSFX and ZIPSFX detection only when scanning an executable.
But that may mean losing detection of archives embedded elsewhere.
And we'd have to identify allowable container types for each possible
embedded type, which would be very difficult.
So this commit aims to solve the issue the (B)-way.
Note that in all situations, we still have to scan with file typing
enabled to determine if we need to reassign the current file type, such
as re-identifying a Bzip2 archive as a DMG that happens to be Bzip2-
compressed. Detection of DMG and a handful of other types rely on
finding data partway through or near the ned of a file before
reassigning the entire file as the new type.
Other fixes and considerations in this commit:
- The utf16 HTML parser has weak error handling, particularly with respect
to creating a nested fmap for scanning the ascii decoded file.
This commit cleans up the error handling and wraps the nested scan with
the recursion-stack push()/pop() for correct recursion tracking.
Before this commit, each container layer had a flag to indicate if the
container layer is valid.
We need something similar so that the cli_recursion_stack_get_*()
functions ignore normalized layers. Details...
Imagine an LDB signature for HTML content that specifies a ZIP
container. If the signature actually alerts on the normalized HTML and
you don't ignore normalized layers for the container check, it will
appear as though the alert is in an HTML container rather than a ZIP
container.
This commit accomplishes this with a boolean you set in the scan context
before scanning a new layer. Then when the new fmap is created, it will
use that flag to set similar flag for the layer. The context flag is
reset those that anything after this doesn't have that flag.
The flag allows the new recursion_stack_get() function to ignore
normalized layers when iterating the stack to return a layer at a
requested index, negative or positive.
Scanning normalized extracted/normalized javascript and VBA should also
use the 'layer is normalized' flag.
- This commit also fixes Heuristic.Broken.Executable alert for ELF files
to make sure that:
A) these only alert if cli_append_virus() returns CL_VIRUS (aka it
respects the FP check).
B) all broken-executable alerts for ELF only happen if the
SCAN_HEURISTIC_BROKEN option is enabled.
- This commit also cleans up the error handling in cli_magic_scan_dir().
This was needed so we could correctly apply the layer-is-normalized-flag
to all VBA macros extracted to a directory when scanning the directory.
- Also fix an issue where exceeding scan maximums wouldn't cause embedded
file detection scans to abort. Granted we don't actually want to abort
if max filesize or max recursion depth are exceeded... only if max
scansize, max files, and max scantime are exceeded.
Add 'abort_scan' flag to scan context, to protect against depending on
correct error propagation for fatal conditions. Instead, setting this
flag in the scan context should guarantee that a fatal condition deep in
scan recursion isn't lost which result in more stuff being scanned
instead of aborting. This shouldn't be necessary, but some status codes
like CL_ETIMEOUT never used to be fatal and it's easier to do this than
to verify every parser only returns CL_ETIMEOUT and other "fatal
status codes" in fatal conditions.
- Remove duplicate is_tar() prototype from filestypes.c and include
is_tar.h instead.
- Presently we create the fmap hash when creating the fmap.
This wastes a bit of CPU if the hash is never needed.
Now that we're creating fmap's for all embedded files discovered with
file type recognition scans, this is a much more frequent occurence and
really slows things down.
This commit fixes the issue by only creating fmap hashes as needed.
This should not only resolve the perfomance impact of creating fmap's
for all embedded files, but also should improve performance in general.
- Add allmatch check to the zip parser after the central-header meta
match. That way we don't multiple alerts with the same match except in
allmatch mode. Clean up error handling in the zip parser a tiny bit.
- Fixes to ensure that the scan limits such as scansize, filesize,
recursion depth, # of embedded files, and scantime are always reported
if AlertExceedsMax (--alert-exceeds-max) is enabled.
- Fixed an issue where non-fatal alerts for exceeding scan maximums may
mask signature matches later on. I changed it so these alerts use the
"possibly unwanted" alert-type and thus only alert if no other alerts
were found or if all-match or heuristic-precedence are enabled.
- Added the "Heuristics.Limits.Exceeded.*" events to the JSON metadata
when the --gen-json feature is enabled. These will show up once under
"ParseErrors" the first time a limit is exceeded. In the present
implementation, only one limits-exceeded events will be added, so as to
prevent a malicious or malformed sample from filling the JSON buffer
with millions of events and using a tonne of RAM.
2021-09-11 14:15:21 -07:00
int i , ret , num , fixed , slen ;
2015-05-13 17:48:22 -04:00
2024-01-09 17:44:33 -05:00
if ( ! ( hexprcpy = cli_safer_strdup ( hexpr ) ) ) {
2015-05-14 12:23:56 -04:00
cli_errmsg ( " ac_special_altstr: Can't duplicate alternate expression \n " ) ;
return CL_EDUP ;
2015-05-11 11:55:43 -04:00
}
2015-05-13 17:48:22 -04:00
num = ac_analyze_expr ( hexprcpy , & fixed , & slen ) ;
2015-05-11 11:55:43 -04:00
2015-05-19 12:04:59 -04:00
if ( ! sigopts & & fixed ) {
2018-12-03 12:40:13 -05:00
special - > num = 0 ;
2015-07-01 16:53:07 -04:00
special - > len [ 0 ] = special - > len [ 1 ] = slen / 2 ;
2015-05-14 12:23:56 -04:00
/* single-bytes are len 2 in hex */
if ( slen = = 2 ) {
2018-12-03 12:40:13 -05:00
special - > type = AC_SPECIAL_ALT_CHAR ;
2019-05-03 18:16:03 -04:00
( special - > alt ) . byte = ( unsigned char * ) MPOOL_MALLOC ( root - > mempool , num ) ;
2015-05-14 12:23:56 -04:00
if ( ! ( ( special - > alt ) . byte ) ) {
cli_errmsg ( " cli_ac_special_altstr: Can't allocate newspecial->str \n " ) ;
free ( hexprcpy ) ;
return CL_EMEM ;
}
} else {
2018-12-03 12:40:13 -05:00
special - > type = AC_SPECIAL_ALT_STR_FIXED ;
2019-05-03 18:16:03 -04:00
( special - > alt ) . f_str = ( unsigned char * * ) MPOOL_MALLOC ( root - > mempool , num * sizeof ( unsigned char * ) ) ;
2015-05-14 12:23:56 -04:00
if ( ! ( ( special - > alt ) . f_str ) ) {
cli_errmsg ( " cli_ac_special_altstr: Can't allocate newspecial->str \n " ) ;
free ( hexprcpy ) ;
return CL_EMEM ;
}
}
for ( i = 0 ; i < num ; i + + ) {
if ( num = = 1 ) {
2019-05-03 18:16:03 -04:00
c = CLI_MPOOL_HEX2STR ( root - > mempool , hexprcpy ) ;
2015-05-14 12:23:56 -04:00
} else {
2018-12-03 12:40:13 -05:00
if ( ! ( h = cli_strtok ( hexprcpy , i , " | " ) ) ) {
2015-05-14 12:23:56 -04:00
free ( hexprcpy ) ;
return CL_EMEM ;
}
2019-05-03 18:16:03 -04:00
c = CLI_MPOOL_HEX2STR ( root - > mempool , h ) ;
2015-05-14 12:23:56 -04:00
free ( h ) ;
}
if ( ! c ) {
free ( hexprcpy ) ;
return CL_EMALFDB ;
}
if ( special - > type = = AC_SPECIAL_ALT_CHAR ) {
2017-08-08 17:38:17 -04:00
( special - > alt ) . byte [ i ] = ( unsigned char ) * c ;
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , c ) ;
2015-05-14 12:23:56 -04:00
} else {
2018-12-03 12:40:13 -05:00
( special - > alt ) . f_str [ i ] = ( unsigned char * ) c ;
2015-05-14 12:23:56 -04:00
}
special - > num + + ;
}
/* sorting byte alternates */
if ( special - > num > 1 & & special - > type = = AC_SPECIAL_ALT_CHAR )
2015-05-21 15:04:22 -04:00
cli_qsort ( ( special - > alt ) . byte , special - > num , sizeof ( unsigned char ) , qcompare_byte ) ;
/* sorting str alternates */
if ( special - > num > 1 & & special - > type = = AC_SPECIAL_ALT_STR_FIXED )
cli_qsort_r ( ( special - > alt ) . f_str , special - > num , sizeof ( unsigned char * ) , qcompare_fstr , & ( special - > len ) ) ;
2015-05-13 17:48:22 -04:00
} else { /* generic alternates */
2015-05-14 12:23:56 -04:00
char * subexpr ;
if ( special - > negative ) {
cli_errmsg ( " ac_special_altstr: Can't apply negation operation to generic alternate strings \n " ) ;
free ( hexprcpy ) ;
return CL_EMALFDB ;
}
2015-05-11 11:55:43 -04:00
2015-05-14 12:23:56 -04:00
special - > type = AC_SPECIAL_ALT_STR ;
2015-05-11 11:55:43 -04:00
2015-05-14 12:23:56 -04:00
/* allocate reusable subexpr */
2024-01-09 17:17:48 -05:00
if ( ! ( subexpr = calloc ( slen + 1 , sizeof ( char ) ) ) ) {
2015-05-14 12:23:56 -04:00
cli_errmsg ( " ac_special_altstr: Can't allocate subexpr container \n " ) ;
2015-08-17 12:30:07 -04:00
free ( hexprcpy ) ;
2015-05-14 12:23:56 -04:00
return CL_EMEM ;
}
2015-05-11 11:55:43 -04:00
2018-12-03 12:40:13 -05:00
ret = ac_special_altexpand ( hexprcpy , subexpr , slen + 1 , 0 , 0 , sigopts , special , root ) ;
2015-05-13 17:48:22 -04:00
2015-05-14 12:23:56 -04:00
free ( subexpr ) ;
free ( hexprcpy ) ;
return ret ;
2015-05-11 11:55:43 -04:00
}
2015-05-13 17:48:22 -04:00
free ( hexprcpy ) ;
2015-05-11 11:55:43 -04:00
return CL_SUCCESS ;
}
2007-04-28 18:40:59 +00:00
/* FIXME: clean up the code */
2019-02-27 00:47:38 -05:00
cl_error_t cli_ac_addsig ( struct cli_matcher * root , const char * virname , const char * hexsig , uint8_t sigopts , uint32_t sigid , uint16_t parts , uint16_t partno , uint16_t rtype , uint16_t type , uint32_t mindist , uint32_t maxdist , const char * offset , const uint32_t * lsigid , unsigned int options )
2007-04-28 18:40:59 +00:00
{
2014-10-29 15:14:37 -04:00
struct cli_ac_patt * new ;
char * pt , * pt2 , * hex = NULL , * hexcpy = NULL ;
uint16_t i , j , ppos = 0 , pend , * dec , nzpos = 0 ;
2015-02-20 18:13:28 -05:00
uint8_t wprefix = 0 , zprefix = 1 , plen = 0 , nzplen = 0 ;
2017-09-21 13:10:01 -04:00
struct cli_ac_special * newspecial , * * newtable ;
2014-10-29 15:14:37 -04:00
int ret , error = CL_SUCCESS ;
2021-04-21 16:24:24 -07:00
char * virname_copy = NULL ;
2007-04-28 18:40:59 +00:00
2018-12-03 12:40:13 -05:00
if ( ! root ) {
2014-10-29 15:14:37 -04:00
cli_errmsg ( " cli_ac_addsig: root == NULL \n " ) ;
return CL_ENULLARG ;
2008-07-25 19:00:25 +00:00
}
2018-12-03 12:40:13 -05:00
if ( strlen ( hexsig ) / 2 < root - > ac_mindepth ) {
2014-10-29 15:14:37 -04:00
cli_errmsg ( " cli_ac_addsig: Signature for %s is too short \n " , virname ) ;
return CL_EMALFDB ;
2009-02-12 13:53:23 +00:00
}
2019-05-03 18:16:03 -04:00
if ( ( new = ( struct cli_ac_patt * ) MPOOL_CALLOC ( root - > mempool , 1 , sizeof ( struct cli_ac_patt ) ) ) = = NULL )
2014-10-29 15:14:37 -04:00
return CL_EMEM ;
2007-04-28 18:40:59 +00:00
2018-12-03 12:40:13 -05:00
new - > rtype = rtype ;
new - > type = type ;
new - > sigid = sigid ;
new - > parts = parts ;
new - > partno = partno ;
new - > mindist = mindist ;
new - > maxdist = maxdist ;
2008-07-25 20:01:40 +00:00
new - > customdata = NULL ;
2008-02-06 12:26:16 +00:00
new - > ch [ 0 ] | = CLI_MATCH_IGNORE ;
new - > ch [ 1 ] | = CLI_MATCH_IGNORE ;
2018-12-03 12:40:13 -05:00
if ( lsigid ) {
2014-10-29 15:14:37 -04:00
new - > lsigid [ 0 ] = 1 ;
memcpy ( & new - > lsigid [ 1 ] , lsigid , 2 * sizeof ( uint32_t ) ) ;
2008-07-25 19:00:25 +00:00
}
2008-02-06 12:26:16 +00:00
2018-12-03 12:40:13 -05:00
if ( strchr ( hexsig , ' [ ' ) ) {
2024-01-09 17:44:33 -05:00
if ( ! ( hexcpy = cli_safer_strdup ( hexsig ) ) ) {
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , new ) ;
2014-10-29 15:14:37 -04:00
return CL_EMEM ;
}
2008-02-06 12:26:16 +00:00
2014-10-29 15:14:37 -04:00
hex = hexcpy ;
2018-12-03 12:40:13 -05:00
for ( i = 0 ; i < 2 ; i + + ) {
2015-02-11 09:02:27 -08:00
unsigned int n , n1 , n2 ;
2008-02-06 12:26:16 +00:00
2018-12-03 12:40:13 -05:00
if ( ! ( pt = strchr ( hex , ' [ ' ) ) )
2014-10-29 15:14:37 -04:00
break ;
2008-02-06 12:26:16 +00:00
2014-10-29 15:14:37 -04:00
* pt + + = 0 ;
2008-02-06 12:26:16 +00:00
2018-12-03 12:40:13 -05:00
if ( ! ( pt2 = strchr ( pt , ' ] ' ) ) ) {
2014-10-29 15:14:37 -04:00
cli_dbgmsg ( " cli_ac_addsig: missing closing square bracket \n " ) ;
error = CL_EMALFDB ;
break ;
}
2008-02-06 12:26:16 +00:00
2014-10-29 15:14:37 -04:00
* pt2 + + = 0 ;
2008-02-06 12:26:16 +00:00
2015-02-11 09:02:27 -08:00
n = sscanf ( pt , " %u-%u " , & n1 , & n2 ) ;
2018-12-03 12:40:13 -05:00
if ( n = = 1 ) {
2015-02-11 09:02:27 -08:00
n2 = n1 ;
2018-12-03 12:40:13 -05:00
} else if ( n ! = 2 ) {
2014-10-29 15:14:37 -04:00
cli_dbgmsg ( " cli_ac_addsig: incorrect range inside square brackets \n " ) ;
error = CL_EMALFDB ;
break ;
}
2018-12-03 12:40:13 -05:00
if ( ( n1 > n2 ) | | ( n2 > AC_CH_MAXDIST ) ) {
2014-10-29 15:14:37 -04:00
cli_dbgmsg ( " cli_ac_addsig: incorrect range inside square brackets \n " ) ;
error = CL_EMALFDB ;
break ;
}
2018-12-03 12:40:13 -05:00
if ( strlen ( hex ) = = 2 ) {
if ( i ) {
2014-10-29 15:14:37 -04:00
error = CL_EMALFDB ;
break ;
}
dec = cli_hex2ui ( hex ) ;
2018-12-03 12:40:13 -05:00
if ( ! dec ) {
2014-10-29 15:14:37 -04:00
error = CL_EMALFDB ;
break ;
}
2018-12-03 12:40:13 -05:00
if ( ( sigopts & ACPATT_OPTION_NOCASE ) & & ( ( * dec & CLI_MATCH_METADATA ) = = CLI_MATCH_CHAR ) )
2019-08-22 16:51:01 -04:00
new - > ch [ i ] = CLI_NOCASE ( * dec ) | CLI_MATCH_NOCASE ;
2015-02-11 10:20:07 -08:00
else
new - > ch [ i ] = * dec ;
2014-10-29 15:14:37 -04:00
free ( dec ) ;
new - > ch_mindist [ i ] = n1 ;
new - > ch_maxdist [ i ] = n2 ;
2018-12-03 12:40:13 -05:00
hex = pt2 ;
} else if ( strlen ( pt2 ) = = 2 ) {
i = 1 ;
2014-10-29 15:14:37 -04:00
dec = cli_hex2ui ( pt2 ) ;
2018-12-03 12:40:13 -05:00
if ( ! dec ) {
2014-10-29 15:14:37 -04:00
error = CL_EMALFDB ;
break ;
}
2018-12-03 12:40:13 -05:00
if ( ( sigopts & ACPATT_OPTION_NOCASE ) & & ( ( * dec & CLI_MATCH_METADATA ) = = CLI_MATCH_CHAR ) )
2019-08-22 16:51:01 -04:00
new - > ch [ i ] = CLI_NOCASE ( * dec ) | CLI_MATCH_NOCASE ;
2015-02-11 10:20:07 -08:00
else
new - > ch [ i ] = * dec ;
2014-10-29 15:14:37 -04:00
free ( dec ) ;
new - > ch_mindist [ i ] = n1 ;
new - > ch_maxdist [ i ] = n2 ;
} else {
error = CL_EMALFDB ;
break ;
}
}
2018-12-03 12:40:13 -05:00
if ( error ) {
2014-10-29 15:14:37 -04:00
free ( hexcpy ) ;
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , new ) ;
2014-10-29 15:14:37 -04:00
return error ;
}
2024-01-09 17:44:33 -05:00
hex = cli_safer_strdup ( hex ) ;
2014-10-29 15:14:37 -04:00
free ( hexcpy ) ;
2018-12-03 12:40:13 -05:00
if ( ! hex ) {
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , new ) ;
2014-10-29 15:14:37 -04:00
return CL_EMEM ;
}
2008-02-06 12:26:16 +00:00
}
2007-04-28 18:40:59 +00:00
2018-12-03 12:40:13 -05:00
if ( strchr ( hexsig , ' ( ' ) ) {
2015-05-14 12:23:56 -04:00
char * hexnew , * start ;
2018-11-16 11:50:48 -08:00
size_t nest ;
2015-05-14 12:23:56 -04:00
size_t hexnewsz ;
2018-12-03 12:40:13 -05:00
if ( hex ) {
2015-05-14 12:23:56 -04:00
hexcpy = hex ;
2024-01-09 17:44:33 -05:00
} else if ( ! ( hexcpy = cli_safer_strdup ( hexsig ) ) ) {
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , new ) ;
2015-05-14 12:23:56 -04:00
return CL_EMEM ;
}
hexnewsz = strlen ( hexsig ) + 1 ;
2024-01-09 17:17:48 -05:00
if ( ! ( hexnew = ( char * ) calloc ( 1 , hexnewsz ) ) ) {
2022-03-22 20:06:22 -04:00
MPOOL_FREE ( root - > mempool , new ) ;
2015-05-14 12:23:56 -04:00
free ( hexcpy ) ;
return CL_EMEM ;
}
start = pt = hexcpy ;
2018-12-03 12:40:13 -05:00
while ( ( pt = strchr ( start , ' ( ' ) ) ) {
2015-05-14 12:23:56 -04:00
* pt + + = 0 ;
2018-12-03 12:40:13 -05:00
if ( ! start ) {
2015-05-14 12:23:56 -04:00
error = CL_EMALFDB ;
break ;
}
2019-05-03 18:16:03 -04:00
newspecial = ( struct cli_ac_special * ) MPOOL_CALLOC ( root - > mempool , 1 , sizeof ( struct cli_ac_special ) ) ;
2018-12-03 12:40:13 -05:00
if ( ! newspecial ) {
2015-05-14 12:23:56 -04:00
cli_errmsg ( " cli_ac_addsig: Can't allocate newspecial \n " ) ;
error = CL_EMEM ;
break ;
}
2018-12-03 12:40:13 -05:00
if ( pt > = hexcpy + 2 ) {
if ( pt [ - 2 ] = = ' ! ' ) {
2015-05-14 12:23:56 -04:00
newspecial - > negative = 1 ;
2018-12-03 12:40:13 -05:00
pt [ - 2 ] = 0 ;
2015-05-14 12:23:56 -04:00
}
}
cli_strlcat ( hexnew , start , hexnewsz ) ;
nest = find_paren_end ( pt , & start ) ;
2018-12-03 12:40:13 -05:00
if ( ! start ) {
2015-05-14 12:23:56 -04:00
cli_errmsg ( " cli_ac_addsig: Missing closing parenthesis \n " ) ;
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , newspecial ) ;
2015-05-14 12:23:56 -04:00
error = CL_EMALFDB ;
break ;
}
* start + + = 0 ;
2018-12-03 12:40:13 -05:00
if ( ! strlen ( pt ) ) {
2015-05-14 12:23:56 -04:00
cli_errmsg ( " cli_ac_addsig: Empty block \n " ) ;
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , newspecial ) ;
2015-05-14 12:23:56 -04:00
error = CL_EMALFDB ;
break ;
}
2015-05-22 10:51:48 -04:00
if ( nest > ACPATT_ALTN_MAXNEST ) {
cli_errmsg ( " ac_addspecial: Expression exceeds maximum alternate nesting limit \n " ) ;
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , newspecial ) ;
2016-07-12 12:46:16 -04:00
error = CL_EMALFDB ;
break ;
2015-05-14 12:23:56 -04:00
}
2022-02-12 14:53:44 -08:00
/*
* Detect special character classes
* - ( B ) word boundary
* - ( L ) CR , CRLF line boundaries
* - ( W ) Non - alphanumeric character
*
* For more details : https : //docs.clamav.net/manual/Signatures/BodySignatureFormat.html#character-classes
*/
2018-12-03 12:40:13 -05:00
if ( ! strcmp ( pt , " B " ) ) {
if ( ! * start ) {
2015-05-14 12:23:56 -04:00
new - > boundary | = AC_BOUNDARY_RIGHT ;
2018-12-03 12:40:13 -05:00
if ( newspecial - > negative )
2015-05-14 12:23:56 -04:00
new - > boundary | = AC_BOUNDARY_RIGHT_NEGATIVE ;
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , newspecial ) ;
2015-05-14 12:23:56 -04:00
continue ;
2018-12-03 12:40:13 -05:00
} else if ( pt - 1 = = hexcpy ) {
2015-05-14 12:23:56 -04:00
new - > boundary | = AC_BOUNDARY_LEFT ;
2018-12-03 12:40:13 -05:00
if ( newspecial - > negative )
2015-05-14 12:23:56 -04:00
new - > boundary | = AC_BOUNDARY_LEFT_NEGATIVE ;
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , newspecial ) ;
2015-05-14 12:23:56 -04:00
continue ;
}
2018-12-03 12:40:13 -05:00
} else if ( ! strcmp ( pt , " L " ) ) {
if ( ! * start ) {
2015-05-14 12:23:56 -04:00
new - > boundary | = AC_LINE_MARKER_RIGHT ;
2018-12-03 12:40:13 -05:00
if ( newspecial - > negative )
2015-05-14 12:23:56 -04:00
new - > boundary | = AC_LINE_MARKER_RIGHT_NEGATIVE ;
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , newspecial ) ;
2015-05-14 12:23:56 -04:00
continue ;
2018-12-03 12:40:13 -05:00
} else if ( pt - 1 = = hexcpy ) {
2015-05-14 12:23:56 -04:00
new - > boundary | = AC_LINE_MARKER_LEFT ;
2018-12-03 12:40:13 -05:00
if ( newspecial - > negative )
2015-05-14 12:23:56 -04:00
new - > boundary | = AC_LINE_MARKER_LEFT_NEGATIVE ;
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , newspecial ) ;
2015-05-14 12:23:56 -04:00
continue ;
}
2018-12-03 12:40:13 -05:00
} else if ( ! strcmp ( pt , " W " ) ) {
if ( ! * start ) {
2015-05-14 12:23:56 -04:00
new - > boundary | = AC_WORD_MARKER_RIGHT ;
2018-12-03 12:40:13 -05:00
if ( newspecial - > negative )
2015-05-14 12:23:56 -04:00
new - > boundary | = AC_WORD_MARKER_RIGHT_NEGATIVE ;
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , newspecial ) ;
2015-05-14 12:23:56 -04:00
continue ;
2018-12-03 12:40:13 -05:00
} else if ( pt - 1 = = hexcpy ) {
2015-05-14 12:23:56 -04:00
new - > boundary | = AC_WORD_MARKER_LEFT ;
2018-12-03 12:40:13 -05:00
if ( newspecial - > negative )
2015-05-14 12:23:56 -04:00
new - > boundary | = AC_WORD_MARKER_LEFT_NEGATIVE ;
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , newspecial ) ;
2015-05-14 12:23:56 -04:00
continue ;
}
}
cli_strlcat ( hexnew , " () " , hexnewsz ) ;
new - > special + + ;
2019-05-03 18:16:03 -04:00
newtable = ( struct cli_ac_special * * ) MPOOL_REALLOC ( root - > mempool , new - > special_table , new - > special * sizeof ( struct cli_ac_special * ) ) ;
2018-12-03 12:40:13 -05:00
if ( ! newtable ) {
2015-05-14 12:23:56 -04:00
new - > special - - ;
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , newspecial ) ;
2015-05-14 12:23:56 -04:00
cli_errmsg ( " cli_ac_addsig: Can't realloc new->special_table \n " ) ;
error = CL_EMEM ;
break ;
}
newtable [ new - > special - 1 ] = newspecial ;
2018-12-03 12:40:13 -05:00
new - > special_table = newtable ;
2015-05-14 12:23:56 -04:00
2018-12-03 12:40:13 -05:00
if ( ! strcmp ( pt , " B " ) ) {
2015-05-14 12:23:56 -04:00
newspecial - > type = AC_SPECIAL_BOUNDARY ;
2018-12-03 12:40:13 -05:00
} else if ( ! strcmp ( pt , " L " ) ) {
2015-05-14 12:23:56 -04:00
newspecial - > type = AC_SPECIAL_LINE_MARKER ;
2018-12-03 12:40:13 -05:00
} else if ( ! strcmp ( pt , " W " ) ) {
2015-05-14 12:23:56 -04:00
newspecial - > type = AC_SPECIAL_WORD_MARKER ;
} else {
2015-05-19 12:04:59 -04:00
if ( ( ret = ac_special_altstr ( pt , sigopts , newspecial , root ) ) ! = CL_SUCCESS ) {
2015-05-14 12:23:56 -04:00
error = ret ;
break ;
}
}
}
2018-12-03 12:40:13 -05:00
if ( start )
2015-05-14 12:23:56 -04:00
cli_strlcat ( hexnew , start , hexnewsz ) ;
hex = hexnew ;
free ( hexcpy ) ;
2018-12-03 12:40:13 -05:00
if ( error ) {
2015-05-14 12:23:56 -04:00
free ( hex ) ;
2018-12-03 12:40:13 -05:00
if ( new - > special ) {
2015-05-14 12:23:56 -04:00
mpool_ac_free_special ( root - > mempool , new ) ;
}
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , new ) ;
2015-05-14 12:23:56 -04:00
return error ;
}
2007-04-28 18:40:59 +00:00
}
Add code comments to explain AC pattern prefix process
When adding a pattern to the AC trie, checks are done to make sure the
bytes that go in the AC trie don't have any `?` wildcards and
additionally that the first two bytes are not "\x00\x00".
If they are, the position of the pattern that goes in the AC trie can be
shifted right until a static pattern is identified that can go in the
AC trie. Any bytes to the left of the new start of the pattern become a
"prefix".
During matching, once the AC trie match occurs and the bytes to the
right of that pattern are matched, then the bytes from the prefix are
matched.
The reason that we don't want the bytes that go in the AC trie to start
with "\x00\x00" is that it is such a common pattern in files that it
would match constantly, and the scan process would spend a lot of time
just checking through the list of patterns associated with a "\x00\x00"
AC match, and that'd be crazy slow.
But it is important to note that when shifting right, if there aren't
enough nonzero, non-wildcard bytes to form a good prefix for the AC
trie, that it is tolerable to bend the rule and let some patterns start
with "\x00\x00". In that way, a small pattern like "0000ab" is still
valid, and can be matched.
2022-06-04 12:38:55 -07:00
/*
* Convert the hex string pattern to a uint16_t * pattern ( flags + byte ) patterns .
*/
2019-05-03 18:16:03 -04:00
new - > pattern = CLI_MPOOL_HEX2UI ( root - > mempool , hex ? hex : hexsig ) ;
2018-12-03 12:40:13 -05:00
if ( new - > pattern = = NULL ) {
if ( new - > special )
2014-10-29 15:14:37 -04:00
mpool_ac_free_special ( root - > mempool , new ) ;
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , new ) ;
2014-10-29 15:14:37 -04:00
free ( hex ) ;
return CL_EMALFDB ;
2007-04-28 18:40:59 +00:00
}
2008-10-17 17:00:13 +00:00
2017-08-31 16:38:41 -04:00
new - > length [ 0 ] = ( uint16_t ) strlen ( hex ? hex : hexsig ) / 2 ;
2022-06-04 12:08:51 -07:00
if ( new - > length [ 0 ] < root - > ac_mindepth ) {
cli_errmsg ( " cli_ac_addsig: Subpattern in signature is shorter than the minimum depth of the AC trie. (%u < %u) \n " , new - > length [ 0 ] , root - > ac_mindepth ) ;
if ( new - > special )
mpool_ac_free_special ( root - > mempool , new ) ;
MPOOL_FREE ( root - > mempool , new - > pattern ) ;
MPOOL_FREE ( root - > mempool , new ) ;
free ( hex ) ;
return CL_EMALFDB ;
}
2018-12-03 12:40:13 -05:00
for ( i = 0 , j = 0 ; i < new - > length [ 0 ] ; i + + ) {
if ( ( new - > pattern [ i ] & CLI_MATCH_METADATA ) = = CLI_MATCH_SPECIAL ) {
2015-07-02 15:06:04 -04:00
new - > length [ 1 ] + = new - > special_table [ j ] - > len [ 0 ] ;
new - > length [ 2 ] + = new - > special_table [ j ] - > len [ 1 ] ;
j + + ;
} else {
new - > length [ 1 ] + + ;
new - > length [ 2 ] + + ;
}
}
2008-02-06 12:26:16 +00:00
free ( hex ) ;
2007-04-28 18:40:59 +00:00
2015-02-20 18:13:28 -05:00
new - > sigopts = sigopts ;
2015-05-14 14:44:06 -04:00
/* setting nocase match */
2015-02-20 18:13:28 -05:00
if ( sigopts & ACPATT_OPTION_NOCASE ) {
2015-07-02 15:06:04 -04:00
for ( i = 0 ; i < new - > length [ 0 ] ; i + + )
2015-05-14 12:23:56 -04:00
if ( ( new - > pattern [ i ] & CLI_MATCH_METADATA ) = = CLI_MATCH_CHAR ) {
2019-08-22 16:51:01 -04:00
new - > pattern [ i ] = CLI_NOCASE ( new - > pattern [ i ] & 0xff ) ;
2015-05-14 12:23:56 -04:00
new - > pattern [ i ] + = CLI_MATCH_NOCASE ;
}
2015-02-05 20:52:18 -08:00
}
2015-05-22 10:51:48 -04:00
/* TODO - sigopts affect on filters? */
if ( root - > filter ) {
2014-10-29 15:14:37 -04:00
/* so that we can show meaningful messages */
2018-12-03 12:40:13 -05:00
new - > virname = ( char * ) virname ;
2014-10-29 15:14:37 -04:00
if ( filter_add_acpatt ( root - > filter , new ) = = - 1 ) {
2022-02-12 14:53:44 -08:00
cli_warnmsg ( " cli_ac_addsig: cannot use filter for trie \n " ) ;
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , root - > filter ) ;
2014-10-29 15:14:37 -04:00
root - > filter = NULL ;
2023-06-12 18:03:45 -07:00
return CL_EMALFDB ;
2014-10-29 15:14:37 -04:00
}
/* TODO: should this affect maxpatlen? */
2010-02-10 11:39:47 +02:00
}
Add code comments to explain AC pattern prefix process
When adding a pattern to the AC trie, checks are done to make sure the
bytes that go in the AC trie don't have any `?` wildcards and
additionally that the first two bytes are not "\x00\x00".
If they are, the position of the pattern that goes in the AC trie can be
shifted right until a static pattern is identified that can go in the
AC trie. Any bytes to the left of the new start of the pattern become a
"prefix".
During matching, once the AC trie match occurs and the bytes to the
right of that pattern are matched, then the bytes from the prefix are
matched.
The reason that we don't want the bytes that go in the AC trie to start
with "\x00\x00" is that it is such a common pattern in files that it
would match constantly, and the scan process would spend a lot of time
just checking through the list of patterns associated with a "\x00\x00"
AC match, and that'd be crazy slow.
But it is important to note that when shifting right, if there aren't
enough nonzero, non-wildcard bytes to form a good prefix for the AC
trie, that it is tolerable to bend the rule and let some patterns start
with "\x00\x00". In that way, a small pattern like "0000ab" is still
valid, and can be matched.
2022-06-04 12:38:55 -07:00
/*
* Check beginning bytes of the pattern up to the max - depth of the AC trie to see if :
* a . it contains a wildcard , or
* b . the bytes are all zeroes .
*
* If it does , we can try to shift the start of the pattern the right , have those beginning
* bytes be a " prefix " which gets backwards - matched after the AC match .
* This happens in the call to ac_backward_match_branch ( ) in ac_forward_match_branch ( )
*/
2018-12-03 12:40:13 -05:00
for ( i = 0 ; i < root - > ac_maxdepth & & i < new - > length [ 0 ] ; i + + ) {
if ( new - > pattern [ i ] & CLI_MATCH_WILDCARD ) {
2014-10-29 15:14:37 -04:00
wprefix = 1 ;
break ;
}
2022-06-04 12:08:51 -07:00
if ( zprefix & & 0 ! = new - > pattern [ i ] ) {
2014-10-29 15:14:37 -04:00
zprefix = 0 ;
2022-06-04 12:08:51 -07:00
}
2007-04-28 18:40:59 +00:00
}
2018-12-03 12:40:13 -05:00
if ( wprefix | | zprefix ) {
Add code comments to explain AC pattern prefix process
When adding a pattern to the AC trie, checks are done to make sure the
bytes that go in the AC trie don't have any `?` wildcards and
additionally that the first two bytes are not "\x00\x00".
If they are, the position of the pattern that goes in the AC trie can be
shifted right until a static pattern is identified that can go in the
AC trie. Any bytes to the left of the new start of the pattern become a
"prefix".
During matching, once the AC trie match occurs and the bytes to the
right of that pattern are matched, then the bytes from the prefix are
matched.
The reason that we don't want the bytes that go in the AC trie to start
with "\x00\x00" is that it is such a common pattern in files that it
would match constantly, and the scan process would spend a lot of time
just checking through the list of patterns associated with a "\x00\x00"
AC match, and that'd be crazy slow.
But it is important to note that when shifting right, if there aren't
enough nonzero, non-wildcard bytes to form a good prefix for the AC
trie, that it is tolerable to bend the rule and let some patterns start
with "\x00\x00". In that way, a small pattern like "0000ab" is still
valid, and can be matched.
2022-06-04 12:38:55 -07:00
/*
* This pattern has a wildcard in the first few bytes or starts with some zeroes .
* We ' ll try to shift the start of the pattern right a bit to find a static subpattern to use for the bytes that go in the A - C trie .
*/
// If needed, we can shift the start of the pattern that goes in the A-C Trie right up to the pattern length minus min-depth bytes
// The original starting bytes will become a "prefix" that gets backward-matched.
2015-07-02 14:41:37 -04:00
pend = new - > length [ 0 ] - root - > ac_mindepth + 1 ;
Add code comments to explain AC pattern prefix process
When adding a pattern to the AC trie, checks are done to make sure the
bytes that go in the AC trie don't have any `?` wildcards and
additionally that the first two bytes are not "\x00\x00".
If they are, the position of the pattern that goes in the AC trie can be
shifted right until a static pattern is identified that can go in the
AC trie. Any bytes to the left of the new start of the pattern become a
"prefix".
During matching, once the AC trie match occurs and the bytes to the
right of that pattern are matched, then the bytes from the prefix are
matched.
The reason that we don't want the bytes that go in the AC trie to start
with "\x00\x00" is that it is such a common pattern in files that it
would match constantly, and the scan process would spend a lot of time
just checking through the list of patterns associated with a "\x00\x00"
AC match, and that'd be crazy slow.
But it is important to note that when shifting right, if there aren't
enough nonzero, non-wildcard bytes to form a good prefix for the AC
trie, that it is tolerable to bend the rule and let some patterns start
with "\x00\x00". In that way, a small pattern like "0000ab" is still
valid, and can be matched.
2022-06-04 12:38:55 -07:00
// Search for static bytes to start the pattern in the A-C trie that starts within original min-depth, and of a length up to max-depth.
2018-12-03 12:40:13 -05:00
for ( i = 0 ; i < pend ; i + + ) {
for ( j = i ; j < i + root - > ac_maxdepth & & j < new - > length [ 0 ] ; j + + ) {
if ( new - > pattern [ j ] & CLI_MATCH_WILDCARD ) {
Add code comments to explain AC pattern prefix process
When adding a pattern to the AC trie, checks are done to make sure the
bytes that go in the AC trie don't have any `?` wildcards and
additionally that the first two bytes are not "\x00\x00".
If they are, the position of the pattern that goes in the AC trie can be
shifted right until a static pattern is identified that can go in the
AC trie. Any bytes to the left of the new start of the pattern become a
"prefix".
During matching, once the AC trie match occurs and the bytes to the
right of that pattern are matched, then the bytes from the prefix are
matched.
The reason that we don't want the bytes that go in the AC trie to start
with "\x00\x00" is that it is such a common pattern in files that it
would match constantly, and the scan process would spend a lot of time
just checking through the list of patterns associated with a "\x00\x00"
AC match, and that'd be crazy slow.
But it is important to note that when shifting right, if there aren't
enough nonzero, non-wildcard bytes to form a good prefix for the AC
trie, that it is tolerable to bend the rule and let some patterns start
with "\x00\x00". In that way, a small pattern like "0000ab" is still
valid, and can be matched.
2022-06-04 12:38:55 -07:00
// Found a wildcard. Shift the pattern start right a byte, relegating this byte to the "prefix"
2014-10-29 15:14:37 -04:00
break ;
}
Add code comments to explain AC pattern prefix process
When adding a pattern to the AC trie, checks are done to make sure the
bytes that go in the AC trie don't have any `?` wildcards and
additionally that the first two bytes are not "\x00\x00".
If they are, the position of the pattern that goes in the AC trie can be
shifted right until a static pattern is identified that can go in the
AC trie. Any bytes to the left of the new start of the pattern become a
"prefix".
During matching, once the AC trie match occurs and the bytes to the
right of that pattern are matched, then the bytes from the prefix are
matched.
The reason that we don't want the bytes that go in the AC trie to start
with "\x00\x00" is that it is such a common pattern in files that it
would match constantly, and the scan process would spend a lot of time
just checking through the list of patterns associated with a "\x00\x00"
AC match, and that'd be crazy slow.
But it is important to note that when shifting right, if there aren't
enough nonzero, non-wildcard bytes to form a good prefix for the AC
trie, that it is tolerable to bend the rule and let some patterns start
with "\x00\x00". In that way, a small pattern like "0000ab" is still
valid, and can be matched.
2022-06-04 12:38:55 -07:00
// This byte is a contender for the start of the pattern.
// Record the start + length of the shifted prefix.
2022-06-04 12:08:51 -07:00
if ( j - i + 1 > = plen ) {
plen = j - i + 1 ;
ppos = i ;
}
Add code comments to explain AC pattern prefix process
When adding a pattern to the AC trie, checks are done to make sure the
bytes that go in the AC trie don't have any `?` wildcards and
additionally that the first two bytes are not "\x00\x00".
If they are, the position of the pattern that goes in the AC trie can be
shifted right until a static pattern is identified that can go in the
AC trie. Any bytes to the left of the new start of the pattern become a
"prefix".
During matching, once the AC trie match occurs and the bytes to the
right of that pattern are matched, then the bytes from the prefix are
matched.
The reason that we don't want the bytes that go in the AC trie to start
with "\x00\x00" is that it is such a common pattern in files that it
would match constantly, and the scan process would spend a lot of time
just checking through the list of patterns associated with a "\x00\x00"
AC match, and that'd be crazy slow.
But it is important to note that when shifting right, if there aren't
enough nonzero, non-wildcard bytes to form a good prefix for the AC
trie, that it is tolerable to bend the rule and let some patterns start
with "\x00\x00". In that way, a small pattern like "0000ab" is still
valid, and can be matched.
2022-06-04 12:38:55 -07:00
// Check if the starting bytes at this offset are both non-zero. If they are, then that's even better.
2022-06-04 12:08:51 -07:00
if ( ( 0 ! = new - > pattern [ ppos ] ) | |
( ( new - > length [ 0 ] > ppos + 1 ) & & ( 0 ! = new - > pattern [ ppos + 1 ] ) ) ) {
Add code comments to explain AC pattern prefix process
When adding a pattern to the AC trie, checks are done to make sure the
bytes that go in the AC trie don't have any `?` wildcards and
additionally that the first two bytes are not "\x00\x00".
If they are, the position of the pattern that goes in the AC trie can be
shifted right until a static pattern is identified that can go in the
AC trie. Any bytes to the left of the new start of the pattern become a
"prefix".
During matching, once the AC trie match occurs and the bytes to the
right of that pattern are matched, then the bytes from the prefix are
matched.
The reason that we don't want the bytes that go in the AC trie to start
with "\x00\x00" is that it is such a common pattern in files that it
would match constantly, and the scan process would spend a lot of time
just checking through the list of patterns associated with a "\x00\x00"
AC match, and that'd be crazy slow.
But it is important to note that when shifting right, if there aren't
enough nonzero, non-wildcard bytes to form a good prefix for the AC
trie, that it is tolerable to bend the rule and let some patterns start
with "\x00\x00". In that way, a small pattern like "0000ab" is still
valid, and can be matched.
2022-06-04 12:38:55 -07:00
// At least one of the first two bytes is non-zero which would be better than starting with two zeroes.
2022-06-04 12:08:51 -07:00
2018-12-03 12:40:13 -05:00
if ( plen > = root - > ac_maxdepth ) {
Add code comments to explain AC pattern prefix process
When adding a pattern to the AC trie, checks are done to make sure the
bytes that go in the AC trie don't have any `?` wildcards and
additionally that the first two bytes are not "\x00\x00".
If they are, the position of the pattern that goes in the AC trie can be
shifted right until a static pattern is identified that can go in the
AC trie. Any bytes to the left of the new start of the pattern become a
"prefix".
During matching, once the AC trie match occurs and the bytes to the
right of that pattern are matched, then the bytes from the prefix are
matched.
The reason that we don't want the bytes that go in the AC trie to start
with "\x00\x00" is that it is such a common pattern in files that it
would match constantly, and the scan process would spend a lot of time
just checking through the list of patterns associated with a "\x00\x00"
AC match, and that'd be crazy slow.
But it is important to note that when shifting right, if there aren't
enough nonzero, non-wildcard bytes to form a good prefix for the AC
trie, that it is tolerable to bend the rule and let some patterns start
with "\x00\x00". In that way, a small pattern like "0000ab" is still
valid, and can be matched.
2022-06-04 12:38:55 -07:00
// But... we hit max-depth, so nevermind. Let's stop searching.
2014-10-29 15:14:37 -04:00
break ;
2022-06-04 12:08:51 -07:00
}
Add code comments to explain AC pattern prefix process
When adding a pattern to the AC trie, checks are done to make sure the
bytes that go in the AC trie don't have any `?` wildcards and
additionally that the first two bytes are not "\x00\x00".
If they are, the position of the pattern that goes in the AC trie can be
shifted right until a static pattern is identified that can go in the
AC trie. Any bytes to the left of the new start of the pattern become a
"prefix".
During matching, once the AC trie match occurs and the bytes to the
right of that pattern are matched, then the bytes from the prefix are
matched.
The reason that we don't want the bytes that go in the AC trie to start
with "\x00\x00" is that it is such a common pattern in files that it
would match constantly, and the scan process would spend a lot of time
just checking through the list of patterns associated with a "\x00\x00"
AC match, and that'd be crazy slow.
But it is important to note that when shifting right, if there aren't
enough nonzero, non-wildcard bytes to form a good prefix for the AC
trie, that it is tolerable to bend the rule and let some patterns start
with "\x00\x00". In that way, a small pattern like "0000ab" is still
valid, and can be matched.
2022-06-04 12:38:55 -07:00
// Save off the position and length so we can roll back to it later, if needed.
2022-06-04 12:08:51 -07:00
if ( plen > = root - > ac_mindepth & & plen > nzplen ) {
Add code comments to explain AC pattern prefix process
When adding a pattern to the AC trie, checks are done to make sure the
bytes that go in the AC trie don't have any `?` wildcards and
additionally that the first two bytes are not "\x00\x00".
If they are, the position of the pattern that goes in the AC trie can be
shifted right until a static pattern is identified that can go in the
AC trie. Any bytes to the left of the new start of the pattern become a
"prefix".
During matching, once the AC trie match occurs and the bytes to the
right of that pattern are matched, then the bytes from the prefix are
matched.
The reason that we don't want the bytes that go in the AC trie to start
with "\x00\x00" is that it is such a common pattern in files that it
would match constantly, and the scan process would spend a lot of time
just checking through the list of patterns associated with a "\x00\x00"
AC match, and that'd be crazy slow.
But it is important to note that when shifting right, if there aren't
enough nonzero, non-wildcard bytes to form a good prefix for the AC
trie, that it is tolerable to bend the rule and let some patterns start
with "\x00\x00". In that way, a small pattern like "0000ab" is still
valid, and can be matched.
2022-06-04 12:38:55 -07:00
// We've found a longer sequence of non-zero bytes we could use for the AC pattern starting position.
// Store off the length and position of this starting position with the non-zero bytes, in case we want to roll back to it.
2014-10-29 15:14:37 -04:00
nzplen = plen ;
2018-12-03 12:40:13 -05:00
nzpos = ppos ;
2014-10-29 15:14:37 -04:00
}
}
}
2007-04-28 18:40:59 +00:00
2022-06-04 12:08:51 -07:00
if ( plen > = root - > ac_maxdepth & & ( 0 ! = new - > pattern [ ppos ] | | 0 ! = new - > pattern [ ppos + 1 ] ) ) {
2014-10-29 15:14:37 -04:00
break ;
2022-06-04 12:08:51 -07:00
}
2014-10-29 15:14:37 -04:00
}
2022-06-04 12:08:51 -07:00
if ( ( 0 ! = nzplen ) & &
( new - > length [ 0 ] > ppos + 1 ) & &
( 0 = = new - > pattern [ ppos ] ) & &
( 0 = = new - > pattern [ ppos + 1 ] ) ) {
Add code comments to explain AC pattern prefix process
When adding a pattern to the AC trie, checks are done to make sure the
bytes that go in the AC trie don't have any `?` wildcards and
additionally that the first two bytes are not "\x00\x00".
If they are, the position of the pattern that goes in the AC trie can be
shifted right until a static pattern is identified that can go in the
AC trie. Any bytes to the left of the new start of the pattern become a
"prefix".
During matching, once the AC trie match occurs and the bytes to the
right of that pattern are matched, then the bytes from the prefix are
matched.
The reason that we don't want the bytes that go in the AC trie to start
with "\x00\x00" is that it is such a common pattern in files that it
would match constantly, and the scan process would spend a lot of time
just checking through the list of patterns associated with a "\x00\x00"
AC match, and that'd be crazy slow.
But it is important to note that when shifting right, if there aren't
enough nonzero, non-wildcard bytes to form a good prefix for the AC
trie, that it is tolerable to bend the rule and let some patterns start
with "\x00\x00". In that way, a small pattern like "0000ab" is still
valid, and can be matched.
2022-06-04 12:38:55 -07:00
// The latest shifted position starts with two zeroes.
// We found a valid static pattern earlier that doesn't start with two zeroes.
// Let's roll back a little bit to use that instead.
2014-10-29 15:14:37 -04:00
plen = nzplen ;
ppos = nzpos ;
}
2018-12-03 12:40:13 -05:00
if ( plen < root - > ac_mindepth ) {
2014-10-29 15:14:37 -04:00
cli_errmsg ( " cli_ac_addsig: Can't find a static subpattern of length %u \n " , root - > ac_mindepth ) ;
mpool_ac_free_special ( root - > mempool , new ) ;
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , new - > pattern ) ;
MPOOL_FREE ( root - > mempool , new ) ;
2014-10-29 15:14:37 -04:00
return CL_EMALFDB ;
}
2007-04-28 18:40:59 +00:00
2023-11-26 15:01:19 -08:00
// Store those initial bytes as the pattern "prefix" (the stuff before what goes in the AC Trie)
Add code comments to explain AC pattern prefix process
When adding a pattern to the AC trie, checks are done to make sure the
bytes that go in the AC trie don't have any `?` wildcards and
additionally that the first two bytes are not "\x00\x00".
If they are, the position of the pattern that goes in the AC trie can be
shifted right until a static pattern is identified that can go in the
AC trie. Any bytes to the left of the new start of the pattern become a
"prefix".
During matching, once the AC trie match occurs and the bytes to the
right of that pattern are matched, then the bytes from the prefix are
matched.
The reason that we don't want the bytes that go in the AC trie to start
with "\x00\x00" is that it is such a common pattern in files that it
would match constantly, and the scan process would spend a lot of time
just checking through the list of patterns associated with a "\x00\x00"
AC match, and that'd be crazy slow.
But it is important to note that when shifting right, if there aren't
enough nonzero, non-wildcard bytes to form a good prefix for the AC
trie, that it is tolerable to bend the rule and let some patterns start
with "\x00\x00". In that way, a small pattern like "0000ab" is still
valid, and can be matched.
2022-06-04 12:38:55 -07:00
new - > prefix = new - > pattern ;
// The "prefix" length is the number of bytes before the starting position of the pattern that goes in the AC Trie.
2015-07-02 14:41:37 -04:00
new - > prefix_length [ 0 ] = ppos ;
2018-12-03 12:40:13 -05:00
for ( i = 0 , j = 0 ; i < new - > prefix_length [ 0 ] ; i + + ) {
if ( ( new - > prefix [ i ] & CLI_MATCH_WILDCARD ) = = CLI_MATCH_SPECIAL )
2014-10-29 15:14:37 -04:00
new - > special_pattern + + ;
2015-07-02 15:06:04 -04:00
2018-12-03 12:40:13 -05:00
if ( ( new - > prefix [ i ] & CLI_MATCH_METADATA ) = = CLI_MATCH_SPECIAL ) {
2015-07-02 15:06:04 -04:00
new - > prefix_length [ 1 ] + = new - > special_table [ j ] - > len [ 0 ] ;
new - > prefix_length [ 2 ] + = new - > special_table [ j ] - > len [ 1 ] ;
j + + ;
} else {
new - > prefix_length [ 1 ] + + ;
new - > prefix_length [ 2 ] + + ;
}
}
Add code comments to explain AC pattern prefix process
When adding a pattern to the AC trie, checks are done to make sure the
bytes that go in the AC trie don't have any `?` wildcards and
additionally that the first two bytes are not "\x00\x00".
If they are, the position of the pattern that goes in the AC trie can be
shifted right until a static pattern is identified that can go in the
AC trie. Any bytes to the left of the new start of the pattern become a
"prefix".
During matching, once the AC trie match occurs and the bytes to the
right of that pattern are matched, then the bytes from the prefix are
matched.
The reason that we don't want the bytes that go in the AC trie to start
with "\x00\x00" is that it is such a common pattern in files that it
would match constantly, and the scan process would spend a lot of time
just checking through the list of patterns associated with a "\x00\x00"
AC match, and that'd be crazy slow.
But it is important to note that when shifting right, if there aren't
enough nonzero, non-wildcard bytes to form a good prefix for the AC
trie, that it is tolerable to bend the rule and let some patterns start
with "\x00\x00". In that way, a small pattern like "0000ab" is still
valid, and can be matched.
2022-06-04 12:38:55 -07:00
// Update the pattern to start at the shifted position with the static bytes.
2015-07-02 15:06:04 -04:00
new - > pattern = & new - > prefix [ ppos ] ;
Add code comments to explain AC pattern prefix process
When adding a pattern to the AC trie, checks are done to make sure the
bytes that go in the AC trie don't have any `?` wildcards and
additionally that the first two bytes are not "\x00\x00".
If they are, the position of the pattern that goes in the AC trie can be
shifted right until a static pattern is identified that can go in the
AC trie. Any bytes to the left of the new start of the pattern become a
"prefix".
During matching, once the AC trie match occurs and the bytes to the
right of that pattern are matched, then the bytes from the prefix are
matched.
The reason that we don't want the bytes that go in the AC trie to start
with "\x00\x00" is that it is such a common pattern in files that it
would match constantly, and the scan process would spend a lot of time
just checking through the list of patterns associated with a "\x00\x00"
AC match, and that'd be crazy slow.
But it is important to note that when shifting right, if there aren't
enough nonzero, non-wildcard bytes to form a good prefix for the AC
trie, that it is tolerable to bend the rule and let some patterns start
with "\x00\x00". In that way, a small pattern like "0000ab" is still
valid, and can be matched.
2022-06-04 12:38:55 -07:00
// And update the pattern length to remove the prefix bytes.
2015-07-02 15:06:04 -04:00
new - > length [ 0 ] - = new - > prefix_length [ 0 ] ;
new - > length [ 1 ] - = new - > prefix_length [ 1 ] ;
new - > length [ 2 ] - = new - > prefix_length [ 2 ] ;
2007-04-28 18:40:59 +00:00
}
2022-06-04 12:08:51 -07:00
if ( new - > length [ 2 ] + new - > prefix_length [ 2 ] > root - > maxpatlen ) {
Add code comments to explain AC pattern prefix process
When adding a pattern to the AC trie, checks are done to make sure the
bytes that go in the AC trie don't have any `?` wildcards and
additionally that the first two bytes are not "\x00\x00".
If they are, the position of the pattern that goes in the AC trie can be
shifted right until a static pattern is identified that can go in the
AC trie. Any bytes to the left of the new start of the pattern become a
"prefix".
During matching, once the AC trie match occurs and the bytes to the
right of that pattern are matched, then the bytes from the prefix are
matched.
The reason that we don't want the bytes that go in the AC trie to start
with "\x00\x00" is that it is such a common pattern in files that it
would match constantly, and the scan process would spend a lot of time
just checking through the list of patterns associated with a "\x00\x00"
AC match, and that'd be crazy slow.
But it is important to note that when shifting right, if there aren't
enough nonzero, non-wildcard bytes to form a good prefix for the AC
trie, that it is tolerable to bend the rule and let some patterns start
with "\x00\x00". In that way, a small pattern like "0000ab" is still
valid, and can be matched.
2022-06-04 12:38:55 -07:00
// This is the longest pattern we've stored. Update our max-pattern-length record
2015-07-02 15:37:19 -04:00
root - > maxpatlen = new - > length [ 2 ] + new - > prefix_length [ 2 ] ;
2022-06-04 12:08:51 -07:00
}
2007-04-28 18:40:59 +00:00
2021-04-21 16:24:24 -07:00
if ( 0 = = new - > lsigid [ 0 ] ) {
/* For logical signatures, we already recorded the virname in the lsig table entry.
* For other signature types , continue to store a copy of the virname in each ac_pattern struct .
*
* TODO : Don ' t make a copy of the virname for every ac pattern ,
* because that makes for multipel copies every time a signature has wildcards .
*/
virname_copy = CLI_MPOOL_VIRNAME ( root - > mempool , virname , options & CL_DB_OFFICIAL ) ;
if ( NULL = = virname_copy ) {
MPOOL_FREE ( root - > mempool , new - > prefix ? new - > prefix : new - > pattern ) ;
mpool_ac_free_special ( root - > mempool , new ) ;
MPOOL_FREE ( root - > mempool , new ) ;
return CL_EMEM ;
}
2007-04-28 18:40:59 +00:00
2021-04-21 16:24:24 -07:00
new - > virname = virname_copy ;
}
2008-07-25 19:00:25 +00:00
2010-06-18 15:41:39 +02:00
ret = cli_caloff ( offset , NULL , root - > type , new - > offdata , & new - > offset_min , & new - > offset_max ) ;
2018-12-03 12:40:13 -05:00
if ( ret ! = CL_SUCCESS ) {
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , new - > prefix ? new - > prefix : new - > pattern ) ;
2014-10-29 15:14:37 -04:00
mpool_ac_free_special ( root - > mempool , new ) ;
2022-04-13 12:09:01 -07:00
if ( virname_copy ) {
MPOOL_FREE ( root - > mempool , virname_copy ) ;
}
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , new ) ;
2014-10-29 15:14:37 -04:00
return ret ;
2007-10-03 00:31:52 +00:00
}
2018-12-03 12:40:13 -05:00
if ( ( ret = cli_ac_addpatt ( root , new ) ) ) {
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , new - > prefix ? new - > prefix : new - > pattern ) ;
2022-04-13 12:09:01 -07:00
if ( virname_copy ) {
MPOOL_FREE ( root - > mempool , virname_copy ) ;
}
2014-10-29 15:14:37 -04:00
mpool_ac_free_special ( root - > mempool , new ) ;
2019-05-03 18:16:03 -04:00
MPOOL_FREE ( root - > mempool , new ) ;
2014-10-29 15:14:37 -04:00
return ret ;
2007-04-28 18:40:59 +00:00
}
2021-07-16 11:47:23 -07:00
if ( ( new - > offdata [ 0 ] ! = CLI_OFF_ANY ) & &
( new - > offdata [ 0 ] ! = CLI_OFF_ABSOLUTE ) & &
( new - > offdata [ 0 ] ! = CLI_OFF_MACRO ) ) {
2019-05-03 18:16:03 -04:00
root - > ac_reloff = ( struct cli_ac_patt * * ) MPOOL_REALLOC2 ( root - > mempool , root - > ac_reloff , ( root - > ac_reloff_num + 1 ) * sizeof ( struct cli_ac_patt * ) ) ;
2018-12-03 12:40:13 -05:00
if ( ! root - > ac_reloff ) {
2014-10-29 15:14:37 -04:00
cli_errmsg ( " cli_ac_addsig: Can't allocate memory for root->ac_reloff \n " ) ;
return CL_EMEM ;
}
root - > ac_reloff [ root - > ac_reloff_num ] = new ;
2018-12-03 12:40:13 -05:00
new - > offset_min = root - > ac_reloff_num * 2 ;
new - > offset_max = new - > offset_min + 1 ;
2014-10-29 15:14:37 -04:00
root - > ac_reloff_num + + ;
2009-08-14 14:38:13 +02:00
}
2007-04-28 18:40:59 +00:00
return CL_SUCCESS ;
}