initial support for new signature format

git-svn: trunk@860
This commit is contained in:
Tomasz Kojm 2004-09-14 01:33:32 +00:00
parent 1f82a167f2
commit b68d11d251
12 changed files with 193 additions and 38 deletions

View file

@ -1,3 +1,7 @@
Tue Sep 14 03:30:12 CEST 2004 (tk)
----------------------------------
* libclamav: initial support for new signature format
Mon Sep 13 21:57:12 CEST 2004 (tk)
----------------------------------
* libclamav/matcher-bm.c: minor optimization

View file

@ -89,17 +89,20 @@ extern "C"
struct cli_bm_patt {
char *pattern;
char *virname;
int length;
char *pattern, *virname, *offset;
const char *viralias;
unsigned int length;
unsigned short target;
struct cli_bm_patt *next;
};
struct cli_ac_patt {
short int *pattern;
unsigned int length, mindist, maxdist;
char *virname;
unsigned short int sigid, parts, partno, type, alt, *altn;
char *virname, *offset;
const char *viralias;
unsigned short int sigid, parts, partno, alt, *altn;
unsigned short type, target;
char **altc;
struct cli_ac_patt *next;
};

View file

@ -176,7 +176,7 @@ int cli_addtypesigs(struct cl_node *root)
int i, ret;
for(i = 0; cli_smagic[i].sig; i++) {
if((ret = cli_parse_add(root, cli_smagic[i].descr, cli_smagic[i].sig, cli_smagic[i].type))) {
if((ret = cli_parse_add(root, cli_smagic[i].descr, cli_smagic[i].sig, cli_smagic[i].type, NULL, 0))) {
cli_errmsg("cli_addtypesigs(): Problem adding signature for %s\n", cli_smagic[i].descr);
return ret;
}

View file

@ -32,6 +32,7 @@
#include "clamav.h"
#include "others.h"
#include "matcher.h"
#include "matcher-ac.h"
#include "unrarlib.h"
#include "defaults.h"
@ -195,6 +196,8 @@ static void cli_freepatt(struct cli_ac_patt *list)
while(handler) {
free(handler->pattern);
free(handler->virname);
if(handler->offset)
free(handler->offset);
if(handler->alt) {
free(handler->altn);
for(i = 0; i < handler->alt; i++)
@ -261,7 +264,7 @@ static int inline cli_findpos(const char *buffer, int offset, int length, const
return 1;
}
int cli_ac_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root, int *partcnt, int typerec, unsigned long int offset, unsigned long int *partoff)
int cli_ac_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root, int *partcnt, short otfrec, unsigned long int offset, unsigned long int *partoff, struct cli_voffset *voffset)
{
struct cli_ac_node *current;
struct cli_ac_patt *pt;
@ -305,7 +308,7 @@ int cli_ac_scanbuff(const char *buffer, unsigned int length, const char **virnam
if(++partcnt[pt->sigid] == pt->parts) { /* the last one */
if(pt->type) {
if(typerec) {
if(otfrec) {
if(pt->type > type) {
cli_dbgmsg("Matched signature for file type: %s\n", pt->virname);
type = pt->type;
@ -323,7 +326,7 @@ int cli_ac_scanbuff(const char *buffer, unsigned int length, const char **virnam
} else { /* old type signature */
if(pt->type) {
if(typerec) {
if(otfrec) {
if(pt->type > type) {
cli_dbgmsg("Matched signature for file type: %s\n", pt->virname);
@ -346,5 +349,5 @@ int cli_ac_scanbuff(const char *buffer, unsigned int length, const char **virnam
}
}
return typerec ? type : CL_CLEAN;
return otfrec ? type : CL_CLEAN;
}

View file

@ -20,9 +20,10 @@
#define __MATCHER_AC_H
#include "clamav.h"
#include "matcher.h"
int cli_ac_addpatt(struct cl_node *root, struct cli_ac_patt *pattern);
int cli_ac_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root, int *partcnt, int typerec, unsigned long int offset, unsigned long int *partoff);
int cli_ac_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root, int *partcnt, short otfrec, unsigned long int offset, unsigned long int *partoff, struct cli_voffset *voffset);
int cli_ac_buildtrie(struct cl_node *root);
void cli_ac_free(struct cl_node *root);

View file

@ -20,6 +20,7 @@
#include "memory.h"
#include "others.h"
#include "cltypes.h"
#include "matcher.h"
#define BM_MIN_LENGTH 10
#define BM_TEST_OFFSET 5
@ -108,6 +109,8 @@ void cli_bm_free(struct cl_node *root)
b1 = b1->next;
if(b2->virname)
free(b2->virname);
if(b2->offset)
free(b2->offset);
if(b2->pattern)
free(b2->pattern);
free(b2);
@ -117,7 +120,7 @@ void cli_bm_free(struct cl_node *root)
}
}
int cli_bm_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root)
int cli_bm_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root, unsigned long int offset, struct cli_voffset *voffset)
{
int i, j, shift, off, found = 0;
uint16_t idx;
@ -163,6 +166,12 @@ int cli_bm_scanbuff(const char *buffer, unsigned int length, const char **virnam
}
if(found && p->length == j) {
if(voffset) {
voffset->offstr = p->offset;
voffset->fileoff = offset + i - BM_MIN_LENGTH + BM_BLOCK_SIZE;
voffset->target = p->target;
}
if(virname)
*virname = p->virname;

View file

@ -20,10 +20,11 @@
#define __MATCHER_BM_H
#include "clamav.h"
#include "matcher.h"
int cli_bm_addpatt(struct cl_node *root, struct cli_bm_patt *pattern);
int cli_bm_init(struct cl_node *root);
int cli_bm_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root);
int cli_bm_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root, unsigned long int offset, struct cli_voffset *voffset);
void cli_bm_free(struct cl_node *root);
#endif

View file

@ -28,9 +28,13 @@
#include "matcher-bm.h"
#include "md5.h"
#include "filetypes.h"
#include "matcher.h"
#define MD5_BLOCKSIZE 4096
#define TARGET_TABLE_SIZE 5
static int targettab[TARGET_TABLE_SIZE] = { 0, CL_TYPE_MSEXE, CL_TYPE_MSOLE2, CL_TYPE_HTML, CL_TYPE_MAIL };
int cl_scanbuff(const char *buffer, unsigned int length, const char **virname, const struct cl_node *root)
{
@ -49,8 +53,8 @@ int cl_scanbuff(const char *buffer, unsigned int length, const char **virname, c
return CL_EMEM;
}
if((ret = cli_bm_scanbuff(buffer, length, virname, root)) != CL_VIRUS)
ret = cli_ac_scanbuff(buffer, length, virname, root, partcnt, 0, 0, partoff);
if((ret = cli_bm_scanbuff(buffer, length, virname, root, 0, NULL)) != CL_VIRUS)
ret = cli_ac_scanbuff(buffer, length, virname, root, partcnt, 0, 0, partoff, NULL);
free(partcnt);
free(partoff);
@ -75,7 +79,7 @@ static struct cli_md5_node *cli_vermd5(const unsigned char *md5, const struct cl
return NULL;
}
int cli_scandesc(int desc, const char **virname, long int *scanned, const struct cl_node *root, int typerec)
int cli_scandesc(int desc, const char **virname, long int *scanned, const struct cl_node *root, short otfrec, unsigned short ftype)
{
char *buffer, *buff, *endbl, *pt;
int bytes, buffsize, length, ret, *partcnt, type = CL_CLEAN;
@ -83,6 +87,7 @@ int cli_scandesc(int desc, const char **virname, long int *scanned, const struct
struct MD5Context ctx;
unsigned char digest[16];
struct cli_md5_node *md5_node;
struct cli_voffset voffset;
if(!root) {
@ -129,14 +134,31 @@ int cli_scandesc(int desc, const char **virname, long int *scanned, const struct
if(bytes < SCANBUFF)
length -= SCANBUFF - bytes;
if(cli_bm_scanbuff(pt, length, virname, root) == CL_VIRUS ||
(ret = cli_ac_scanbuff(pt, length, virname, root, partcnt, typerec, offset, partoff)) == CL_VIRUS) {
if(cli_bm_scanbuff(pt, length, virname, root, offset, &voffset) == CL_VIRUS ||
(ret = cli_ac_scanbuff(pt, length, virname, root, partcnt, otfrec, offset, partoff, &voffset)) == CL_VIRUS) {
free(buffer);
free(partcnt);
free(partoff);
if(voffset.target) {
if(voffset.target >= TARGET_TABLE_SIZE) {
cli_errmsg("Bad target (%d) in signature for %s\n", voffset.target, virname);
} else if(ftype && ftype != CL_TYPE_UNKNOWN_TEXT) {
if(targettab[voffset.target] != ftype) {
cli_dbgmsg("Expected target type (%d) for %s != %d\n", voffset.target, virname, ftype);
return CL_CLEAN;
}
} else if(type) {
if(targettab[voffset.target] != type) {
cli_dbgmsg("Expected target type (%d) for %s != %d\n", voffset.target, virname, type);
return CL_CLEAN;
}
}
}
return CL_VIRUS;
} else if(typerec && ret >= CL_TYPENO) {
} else if(otfrec && ret >= CL_TYPENO) {
if(ret >= type)
type = ret;
}
@ -177,7 +199,7 @@ int cli_scandesc(int desc, const char **virname, long int *scanned, const struct
}
}
return typerec ? type : CL_CLEAN;
return otfrec ? type : CL_CLEAN;
}
int cl_build(struct cl_node *root)

View file

@ -21,6 +21,12 @@
#include "clamav.h"
int cli_scandesc(int desc, const char **virname, long int *scanned, const struct cl_node *root, int typerec);
int cli_scandesc(int desc, const char **virname, long int *scanned, const struct cl_node *root, short otfrec, unsigned short ftype);
struct cli_voffset {
const char *offstr;
unsigned long int fileoff;
unsigned short target;
};
#endif

View file

@ -41,7 +41,7 @@
/* TODO: clean up the code */
static int cli_addsig(struct cl_node *root, const char *virname, const char *hexsig, int sigid, int parts, int partno, int type, unsigned int mindist, unsigned int maxdist)
static int cli_ac_addsig(struct cl_node *root, const char *virname, const char *hexsig, int sigid, int parts, int partno, unsigned short type, unsigned int mindist, unsigned int maxdist, char *offset, unsigned short target)
{
struct cli_ac_patt *new;
char *pt, *hex;
@ -57,6 +57,8 @@ static int cli_addsig(struct cl_node *root, const char *virname, const char *hex
new->partno = partno;
new->mindist = mindist;
new->maxdist = maxdist;
new->target = target;
new->offset = offset;
if(strchr(hexsig, '(')) {
char *hexcpy, *hexnew, *start, *h, *c;
@ -223,7 +225,7 @@ static int cli_addsig(struct cl_node *root, const char *virname, const char *hex
return 0;
}
int cli_parse_add(struct cl_node *root, const char *virname, const char *hexsig, int type)
int cli_parse_add(struct cl_node *root, const char *virname, const char *hexsig, unsigned short type, char *offset, unsigned short target)
{
struct cli_bm_patt *bm_new;
char *pt, *hexcpy, *start, *n;
@ -254,7 +256,7 @@ int cli_parse_add(struct cl_node *root, const char *virname, const char *hexsig,
*pt++ = 0;
}
if((ret = cli_addsig(root, virname, start, root->ac_partsigs, parts, i, type, mindist, maxdist))) {
if((ret = cli_ac_addsig(root, virname, start, root->ac_partsigs, parts, i, type, mindist, maxdist, offset, target))) {
cli_errmsg("cli_parse_add(): Problem adding signature.\n");
error = 1;
break;
@ -281,7 +283,7 @@ int cli_parse_add(struct cl_node *root, const char *virname, const char *hexsig,
break;
}
} else {
if((n = cli_strtok(pt, 0, "-")) != NULL) {
if((n = cli_strtok(pt, 0, "-"))) {
if((mindist = atoi(n)) < 0) {
error = 1;
free(n);
@ -290,7 +292,7 @@ int cli_parse_add(struct cl_node *root, const char *virname, const char *hexsig,
free(n);
}
if((n = cli_strtok(pt, 1, "-")) != NULL) {
if((n = cli_strtok(pt, 1, "-"))) {
if((maxdist = atoi(n)) < 0) {
error = 1;
free(n);
@ -322,7 +324,7 @@ int cli_parse_add(struct cl_node *root, const char *virname, const char *hexsig,
return CL_EMALFDB;
}
if((ret = cli_addsig(root, virname, pt, root->ac_partsigs, parts, i, type, 0, 0))) {
if((ret = cli_ac_addsig(root, virname, pt, root->ac_partsigs, parts, i, type, 0, 0, offset, target))) {
cli_errmsg("cli_parse_add(): Problem adding signature.\n");
free(pt);
return ret;
@ -332,7 +334,7 @@ int cli_parse_add(struct cl_node *root, const char *virname, const char *hexsig,
}
} else if(strpbrk(hexsig, "?(") || type) {
if((ret = cli_addsig(root, virname, hexsig, 0, 0, 0, type, 0, 0))) {
if((ret = cli_ac_addsig(root, virname, hexsig, 0, 0, 0, type, 0, 0, offset, target))) {
cli_errmsg("cli_parse_add(): Problem adding signature\n");
return ret;
}
@ -368,6 +370,9 @@ int cli_parse_add(struct cl_node *root, const char *virname, const char *hexsig,
strncpy(bm_new->virname, virname, virlen);
bm_new->offset = offset;
bm_new->target = target;
if(bm_new->length > root->maxpatlen)
root->maxpatlen = bm_new->length;
@ -430,7 +435,7 @@ static int cli_loaddb(FILE *fd, struct cl_node **root, unsigned int *signo)
if(*pt == '=') continue;
if((ret = cli_parse_add(*root, start, pt, 0))) {
if((ret = cli_parse_add(*root, start, pt, 0, NULL, 0))) {
cli_errmsg("Problem parsing signature at line %d\n", line);
ret = CL_EMALFDB;
break;
@ -444,6 +449,100 @@ static int cli_loaddb(FILE *fd, struct cl_node **root, unsigned int *signo)
}
if(ret) {
cli_errmsg("Problem parsing database at line %d\n", line);
cl_free(*root);
return ret;
}
if(signo)
*signo += line;
return 0;
}
static int cli_loadndb(FILE *fd, struct cl_node **root, unsigned int *signo)
{
char buffer[FILEBUFF], *sig, *virname, *offset, *pt;
int line = 0, ret = 0;
unsigned short target;
if(!*root) {
cli_dbgmsg("Initializing main node\n");
*root = (struct cl_node *) cli_calloc(1, sizeof(struct cl_node));
if(!*root)
return CL_EMEM;
}
if(!(*root)->ac_root) {
cli_dbgmsg("Initializing trie\n");
(*root)->ac_root = (struct cli_ac_node *) cli_calloc(1, sizeof(struct cli_ac_node));
if(!(*root)->ac_root) {
free(*root);
cli_errmsg("Can't initialise AC pattern matcher\n");
return CL_EMEM;
}
}
if(!(*root)->bm_shift) {
cli_dbgmsg("Initializing BM tables\n");
if((ret = cli_bm_init(*root))) {
cli_errmsg("Can't initialise BM pattern matcher\n");
return ret;
}
}
while(fgets(buffer, FILEBUFF, fd)) {
line++;
cli_chomp(buffer);
if(!(virname = cli_strtok(buffer, 0, ":"))) {
ret = CL_EMALFDB;
break;
}
if(!(pt = cli_strtok(buffer, 1, ":")) || !isdigit(*pt)) {
free(virname);
ret = CL_EMALFDB;
break;
}
target = (unsigned short) atoi(pt);
free(pt);
if(!(offset = cli_strtok(buffer, 2, ":"))) {
free(virname);
ret = CL_EMALFDB;
break;
}
if(!(sig = cli_strtok(buffer, 3, ":"))) {
free(virname);
free(offset);
ret = CL_EMALFDB;
break;
}
if((ret = cli_parse_add(*root, virname, sig, 0, offset, target))) {
cli_errmsg("Problem parsing signature at line %d\n", line);
free(virname);
free(offset);
free(sig);
ret = CL_EMALFDB;
break;
}
free(virname);
free(sig);
}
if(!line) {
cli_errmsg("Empty database file\n");
cl_free(*root);
return CL_EMALFDB;
}
if(ret) {
cli_errmsg("Problem parsing database at line %d\n", line);
cl_free(*root);
return ret;
}
@ -531,6 +630,7 @@ static int cli_loadhdb(FILE *fd, struct cl_node **root, unsigned int *signo)
}
if(ret) {
cli_errmsg("Problem parsing database at line %d\n", line);
cl_free(*root);
return ret;
}
@ -563,6 +663,9 @@ int cl_loaddb(const char *filename, struct cl_node **root, unsigned int *signo)
} else if(cli_strbcasestr(filename, ".hdb")) {
ret = cli_loadhdb(fd, root, signo);
} else if(cli_strbcasestr(filename, ".ndb")) {
ret = cli_loadndb(fd, root, signo);
} else {
cli_dbgmsg("cl_loaddb: unknown extension - assuming old database format\n");
ret = cli_loaddb(fd, root, signo);
@ -600,6 +703,7 @@ int cl_loaddbdir(const char *dirname, struct cl_node **root, unsigned int *signo
cli_strbcasestr(dent->d_name, ".db2") ||
cli_strbcasestr(dent->d_name, ".db3") ||
cli_strbcasestr(dent->d_name, ".hdb") ||
cli_strbcasestr(dent->d_name, ".ndb") ||
cli_strbcasestr(dent->d_name, ".cvd"))) {
dbfile = (char *) cli_calloc(strlen(dent->d_name) + strlen(dirname) + 2, sizeof(char));
@ -663,6 +767,7 @@ int cl_statinidir(const char *dirname, struct cl_stat *dbstat)
cli_strbcasestr(dent->d_name, ".db2") ||
cli_strbcasestr(dent->d_name, ".db3") ||
cli_strbcasestr(dent->d_name, ".hdb") ||
cli_strbcasestr(dent->d_name, ".ndb") ||
cli_strbcasestr(dent->d_name, ".cvd"))) {
dbstat->no++;
@ -710,6 +815,7 @@ int cl_statchkdir(const struct cl_stat *dbstat)
cli_strbcasestr(dent->d_name, ".db2") ||
cli_strbcasestr(dent->d_name, ".db3") ||
cli_strbcasestr(dent->d_name, ".hdb") ||
cli_strbcasestr(dent->d_name, ".ndb") ||
cli_strbcasestr(dent->d_name, ".cvd"))) {
fname = cli_calloc(strlen(dbstat->dir) + strlen(dent->d_name) + 2, sizeof(char));

View file

@ -19,6 +19,6 @@
#ifndef __READDB_H
#define __READDB_H
int cli_parse_add(struct cl_node *root, const char *virname, const char *hexsig, int type);
int cli_parse_add(struct cl_node *root, const char *virname, const char *hexsig, unsigned short type, char *offset, unsigned short target);
#endif

View file

@ -134,7 +134,7 @@ static int cli_scanrar(int desc, const char **virname, long int *scanned, const
files++;
cli_dbgmsg("RAR: Encrypted files found in archive.\n");
lseek(desc, 0, SEEK_SET);
if(cli_scandesc(desc, virname, scanned, root, 0) != CL_VIRUS)
if(cli_scandesc(desc, virname, scanned, root, 0, 0) != CL_VIRUS)
*virname = "Encrypted.RAR";
ret = CL_VIRUS;
break;
@ -325,7 +325,7 @@ static int cli_scanzip(int desc, const char **virname, long int *scanned, const
files++;
cli_dbgmsg("Zip: Encrypted files found in archive.\n");
lseek(desc, 0, SEEK_SET);
if(cli_scandesc(desc, virname, scanned, root, 0) != CL_VIRUS)
if(cli_scandesc(desc, virname, scanned, root, 0, 0) != CL_VIRUS)
*virname = "Encrypted.Zip";
ret = CL_VIRUS;
break;
@ -705,7 +705,7 @@ static int cli_scanhtml(int desc, const char **virname, long int *scanned, const
snprintf(fullname, 1024, "%s/comment.html", tempname);
fd = open(fullname, O_RDONLY);
if (fd >= 0) {
ret = cli_scandesc(fd, virname, scanned, root, 0);
ret = cli_scandesc(fd, virname, scanned, root, 0, CL_TYPE_HTML);
close(fd);
}
@ -713,7 +713,7 @@ static int cli_scanhtml(int desc, const char **virname, long int *scanned, const
snprintf(fullname, 1024, "%s/nocomment.html", tempname);
fd = open(fullname, O_RDONLY);
if (fd >= 0) {
ret = cli_scandesc(fd, virname, scanned, root, 0);
ret = cli_scandesc(fd, virname, scanned, root, 0, CL_TYPE_HTML);
close(fd);
}
}
@ -722,7 +722,7 @@ static int cli_scanhtml(int desc, const char **virname, long int *scanned, const
snprintf(fullname, 1024, "%s/script.html", tempname);
fd = open(fullname, O_RDONLY);
if (fd >= 0) {
ret = cli_scandesc(fd, virname, scanned, root, 0);
ret = cli_scandesc(fd, virname, scanned, root, 0, CL_TYPE_HTML);
close(fd);
}
}
@ -1138,8 +1138,8 @@ int cli_magic_scandesc(int desc, const char **virname, long int *scanned, const
}
if(!options) { /* raw mode (stdin, etc.) */
cli_dbgmsg("Raw mode: no support for archives.\n");
if((ret = cli_scandesc(desc, virname, scanned, root, 0) == CL_VIRUS))
cli_dbgmsg("Raw mode: No support for special files\n");
if((ret = cli_scandesc(desc, virname, scanned, root, 0, 0) == CL_VIRUS))
cli_dbgmsg("%s found in descriptor %d\n", *virname, desc);
return ret;
}
@ -1255,7 +1255,7 @@ int cli_magic_scandesc(int desc, const char **virname, long int *scanned, const
type == CL_TYPE_UNKNOWN_TEXT ? (typerec = 1) : (typerec = 0);
lseek(desc, 0, SEEK_SET);
if((nret = cli_scandesc(desc, virname, scanned, root, typerec)) == CL_VIRUS) {
if((nret = cli_scandesc(desc, virname, scanned, root, typerec, type)) == CL_VIRUS) {
cli_dbgmsg("%s found in descriptor %d.\n", *virname, desc);
return CL_VIRUS;