mirror of
https://github.com/Cisco-Talos/clamav.git
synced 2025-11-02 00:50:54 +00:00
uniq update
git-svn: trunk@4069
This commit is contained in:
parent
937ade082c
commit
53ed2cb75c
5 changed files with 47 additions and 152 deletions
|
|
@ -1,3 +1,8 @@
|
|||
Sun Aug 3 23:09:44 CEST 2008 (acab)
|
||||
------------------------------------
|
||||
* libclamav/uniq: faster md5 lookup
|
||||
* sigtool: sync
|
||||
|
||||
Sun Aug 3 16:12:17 CEST 2008 (acab)
|
||||
------------------------------------
|
||||
* libclamav: use md5 based lookup for ole2/vba instead of hashtab (bb#1071)
|
||||
|
|
|
|||
149
libclamav/uniq.c
149
libclamav/uniq.c
|
|
@ -24,118 +24,21 @@
|
|||
#include "clamav-config.h"
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
#if HAVE_STRING_H
|
||||
#include <string.h>
|
||||
#endif
|
||||
|
||||
#include "uniq.h"
|
||||
#include "others.h"
|
||||
#include "md5.h"
|
||||
|
||||
#if 0
|
||||
struct uniq *uniq_init(uint32_t count) {
|
||||
struct uniq *U;
|
||||
uint32_t i;
|
||||
|
||||
if(!count) return NULL;
|
||||
U = cli_calloc(1, sizeof(*U));
|
||||
if(!U) return NULL;
|
||||
if(cli_ac_init(&U->matcher, 16, 16)) {
|
||||
uniq_free(U);
|
||||
return NULL;
|
||||
}
|
||||
U->custs = cli_calloc(count, sizeof(U->custs));
|
||||
if(!U->custs) {
|
||||
uniq_free(U);
|
||||
return NULL;
|
||||
}
|
||||
U->patts = cli_calloc(count, sizeof(U->patts));
|
||||
if(!U->patts) {
|
||||
uniq_free(U);
|
||||
return NULL;
|
||||
}
|
||||
U->md5s = cli_malloc(count*sizeof(U->md5s));
|
||||
if(!U->md5s) {
|
||||
uniq_free(U);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
U->entries = count;
|
||||
|
||||
for(i=0; i<count; i++) {
|
||||
U->patts[i].pattern = U->md5s[i].md5;
|
||||
U->patts[i].length = 16;
|
||||
U->patts[i].ch[0] = U->patts[i].ch[1] |= CLI_MATCH_IGNORE;
|
||||
U->patts[i].customdata = &U->custs[i];
|
||||
}
|
||||
|
||||
return U;
|
||||
}
|
||||
|
||||
void uniq_free(struct uniq *U) {
|
||||
uint32_t i;
|
||||
U->matcher.ac_patterns = 0; /* don't free my arrays! */
|
||||
cli_ac_free(&U->matcher);
|
||||
if(U->custs) free(U->custs);
|
||||
if(U->patts) free(U->patts);
|
||||
if(U->md5s) free(U->md5s);
|
||||
free(U);
|
||||
}
|
||||
|
||||
|
||||
uint32_t uniq_add(struct uniq *U, const char *key, uint32_t key_len, char **rhash) {
|
||||
uint8_t digest[16];
|
||||
struct UNIQCUST *cust;
|
||||
struct cli_ac_data mdata;
|
||||
|
||||
cli_md5_ctx md5;
|
||||
cli_md5_init(&md5);
|
||||
cli_md5_update(&md5, key, key_len);
|
||||
cli_md5_final(digest, &md5);
|
||||
|
||||
cli_ac_initdata(&mdata, 0, 0, AC_DEFAULT_TRACKLEN); /* This can't fail as we don't have parts or lsigs */
|
||||
if (cli_ac_scanbuff(digest,16, NULL, (void *)&cust, NULL, &U->matcher, &mdata,0,0,-1,NULL,AC_SCAN_VIR,NULL)!=CL_VIRUS) {
|
||||
int i;
|
||||
char HEX[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
|
||||
struct cli_ac_patt *patt = &U->patts[U->matcher.ac_patterns];
|
||||
|
||||
cust = patt->customdata;
|
||||
for(i = 0; i < 16; i++) {
|
||||
cust->name[i*2] = HEX[digest[i]>>4 & 0xf];
|
||||
cust->name[i*2+1] = HEX[digest[i] & 0xf];
|
||||
patt->pattern[i] = digest[i];
|
||||
}
|
||||
cli_ac_addpatt(&U->matcher,patt); /* FIXME this can fail */
|
||||
cli_ac_buildtrie(&U->matcher);
|
||||
}
|
||||
|
||||
cust->count++;
|
||||
if(rhash) *rhash = cust->name;
|
||||
return cust->count;
|
||||
}
|
||||
|
||||
uint32_t uniq_get(struct uniq *U, const char *key, uint32_t key_len, char **rhash) {
|
||||
uint8_t digest[16];
|
||||
struct UNIQCUST *cust;
|
||||
struct cli_ac_data mdata;
|
||||
|
||||
cli_md5_ctx md5;
|
||||
cli_md5_init(&md5);
|
||||
cli_md5_update(&md5, key, key_len);
|
||||
cli_md5_final(digest, &md5);
|
||||
|
||||
cli_ac_initdata(&mdata, 0, 0, AC_DEFAULT_TRACKLEN); /* This can't fail as we don't have parts or lsigs */
|
||||
if (cli_ac_scanbuff(digest,16, NULL, (void *)&cust, NULL, &U->matcher, &mdata,0,0,-1,NULL,AC_SCAN_VIR,NULL)!=CL_VIRUS)
|
||||
return 0;
|
||||
|
||||
if(rhash) *rhash = cust->name;
|
||||
return cust->count;
|
||||
}
|
||||
|
||||
#else
|
||||
#include <string.h>
|
||||
|
||||
struct uniq *uniq_init(uint32_t count) {
|
||||
struct uniq *U;
|
||||
|
||||
if(!count) return NULL;
|
||||
U = cli_malloc(sizeof(*U));
|
||||
if(!U) return NULL;
|
||||
|
||||
U->md5s = cli_malloc(count * sizeof(*U->md5s));
|
||||
if(!U->md5s) {
|
||||
|
|
@ -143,7 +46,6 @@ struct uniq *uniq_init(uint32_t count) {
|
|||
return NULL;
|
||||
}
|
||||
|
||||
U->items = 0;
|
||||
return U;
|
||||
}
|
||||
|
||||
|
|
@ -156,22 +58,29 @@ uint32_t uniq_add(struct uniq *U, const char *key, uint32_t key_len, char **rhas
|
|||
unsigned int i;
|
||||
uint8_t digest[16];
|
||||
cli_md5_ctx md5;
|
||||
struct UNIQMD5 *m;
|
||||
struct UNIQMD5 *m = NULL;
|
||||
|
||||
cli_md5_init(&md5);
|
||||
cli_md5_update(&md5, key, key_len);
|
||||
cli_md5_final(digest, &md5);
|
||||
|
||||
for(i=0; i<U->items; i++) {
|
||||
if(memcmp(digest, U->md5s[i].md5, 16)) continue;
|
||||
m = &U->md5s[i];
|
||||
break;
|
||||
}
|
||||
if(U->items && U->md5s[U->idx[*digest]].md5[0]==*digest)
|
||||
for(m=&U->md5s[U->idx[*digest]]; m; m=m->next)
|
||||
if(!memcmp(&digest[1], &m->md5[1], 15)) break;
|
||||
|
||||
if(i==U->items) {
|
||||
char HEX[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
|
||||
m = &U->md5s[i];
|
||||
if(!m) {
|
||||
const char HEX[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
|
||||
|
||||
m = &U->md5s[U->items];
|
||||
m->count = 0;
|
||||
|
||||
if(U->items && U->md5s[U->idx[*digest]].md5[0]==*digest)
|
||||
m->next = &U->md5s[U->idx[*digest]];
|
||||
else
|
||||
m->next = NULL;
|
||||
|
||||
U->idx[*digest]=U->items;
|
||||
|
||||
for(i = 0; i < 16; i++) {
|
||||
m->name[i*2] = HEX[digest[i]>>4 & 0xf];
|
||||
m->name[i*2+1] = HEX[digest[i] & 0xf];
|
||||
|
|
@ -186,20 +95,22 @@ uint32_t uniq_add(struct uniq *U, const char *key, uint32_t key_len, char **rhas
|
|||
}
|
||||
|
||||
uint32_t uniq_get(struct uniq *U, const char *key, uint32_t key_len, char **rhash) {
|
||||
unsigned int i;
|
||||
uint8_t digest[16];
|
||||
cli_md5_ctx md5;
|
||||
struct UNIQMD5 *m = NULL;
|
||||
|
||||
cli_md5_init(&md5);
|
||||
cli_md5_update(&md5, key, key_len);
|
||||
cli_md5_final(digest, &md5);
|
||||
|
||||
for(i=0; i<U->items; i++) {
|
||||
if(memcmp(digest, U->md5s[i].md5, 16)) continue;
|
||||
if(rhash) *rhash = U->md5s[i].name;
|
||||
return U->md5s[i].count;
|
||||
if(!U->items || U->md5s[U->idx[*digest]].md5[0]!=*digest)
|
||||
return 0;
|
||||
|
||||
for(m=&U->md5s[U->idx[*digest]]; m; m=m->next) {
|
||||
if(memcmp(&digest[1], &m->md5[1], 15)) continue;
|
||||
if(rhash) *rhash = m->name;
|
||||
return m->count;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -23,46 +23,25 @@
|
|||
#ifndef _UNIQ_H
|
||||
#define _UNIQ_H
|
||||
|
||||
#include "matcher.h"
|
||||
#include "cltypes.h"
|
||||
|
||||
#if 0
|
||||
struct UNIQCUST {
|
||||
char name[33];
|
||||
uint32_t count;
|
||||
};
|
||||
|
||||
struct UNIQMD5 {
|
||||
uint16_t md5[16];
|
||||
};
|
||||
|
||||
/* A basic storage for unique IDs */
|
||||
struct uniq {
|
||||
struct cli_matcher matcher;
|
||||
struct cli_ac_patt *patts;
|
||||
struct UNIQMD5 *md5s;
|
||||
struct UNIQCUST *custs;
|
||||
uint32_t entries;
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
struct UNIQMD5 {
|
||||
struct UNIQMD5 *next;
|
||||
uint32_t count;
|
||||
uint8_t md5[16];
|
||||
char name[33];
|
||||
};
|
||||
|
||||
struct uniq {
|
||||
uint32_t items;
|
||||
struct UNIQMD5 *md5s;
|
||||
uint32_t items;
|
||||
uint32_t idx[256];
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
struct uniq *uniq_init(uint32_t);
|
||||
void uniq_free(struct uniq *);
|
||||
uint32_t uniq_add(struct uniq *, const char *, uint32_t, char **);
|
||||
uint32_t uniq_get(struct uniq *, const char *, uint32_t, char **);
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -1041,9 +1041,9 @@ int sigtool_vba_scandir (const char *dirname, int hex_output, struct uniq *U)
|
|||
DIR *dd;
|
||||
struct dirent *dent;
|
||||
struct stat statbuf;
|
||||
char *fullname, vbaname[1024];
|
||||
char *fullname, vbaname[1024], *hash;
|
||||
unsigned char *data;
|
||||
uint32_t hashcnt, hash;
|
||||
uint32_t hashcnt;
|
||||
|
||||
hashcnt = uniq_get(U, "_vba_project", 12, NULL);
|
||||
while(hashcnt--) {
|
||||
|
|
@ -1051,7 +1051,7 @@ int sigtool_vba_scandir (const char *dirname, int hex_output, struct uniq *U)
|
|||
|
||||
for(i = 0; i < vba_project->count; i++) {
|
||||
for(j = 0; j < vba_project->colls[i]; j++) {
|
||||
snprintf(vbaname, 1024, "%s/%u_%u", vba_project->dir, vba_project->name[i], j);
|
||||
snprintf(vbaname, 1024, "%s/%s_%u", vba_project->dir, vba_project->name[i], j);
|
||||
vbaname[sizeof(vbaname)-1] = '\0';
|
||||
fd = open(vbaname, O_RDONLY|O_BINARY);
|
||||
if(fd == -1) continue;
|
||||
|
|
@ -1077,7 +1077,7 @@ int sigtool_vba_scandir (const char *dirname, int hex_output, struct uniq *U)
|
|||
|
||||
if((hashcnt = uniq_get(U, "powerpoint document", 19, &hash))) {
|
||||
while(hashcnt--) {
|
||||
snprintf(vbaname, 1024, "%s/%u_%u", dirname, hash, hashcnt);
|
||||
snprintf(vbaname, 1024, "%s/%s_%u", dirname, hash, hashcnt);
|
||||
vbaname[sizeof(vbaname)-1] = '\0';
|
||||
fd = open(vbaname, O_RDONLY|O_BINARY);
|
||||
if (fd == -1) continue;
|
||||
|
|
@ -1093,7 +1093,7 @@ int sigtool_vba_scandir (const char *dirname, int hex_output, struct uniq *U)
|
|||
|
||||
if ((hashcnt = uniq_get(U, "worddocument", 12, &hash))) {
|
||||
while(hashcnt--) {
|
||||
snprintf(vbaname, sizeof(vbaname), "%s/%u_%u", dirname, hash, hashcnt);
|
||||
snprintf(vbaname, sizeof(vbaname), "%s/%s_%u", dirname, hash, hashcnt);
|
||||
vbaname[sizeof(vbaname)-1] = '\0';
|
||||
fd = open(vbaname, O_RDONLY|O_BINARY);
|
||||
if (fd == -1) continue;
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@
|
|||
#ifndef __VBA_H
|
||||
#define __VBA_H
|
||||
|
||||
#include "libclamav/hashtab.h"
|
||||
#include "libclamav/uniq.h"
|
||||
int sigtool_vba_scandir(const char *dirname, int hex_output, struct uniq *U);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue