From 3c29ca0b103f5579f509ce46157a97c6b6a9be8d Mon Sep 17 00:00:00 2001 From: Shawn Webb Date: Tue, 21 Jan 2014 11:30:27 -0500 Subject: [PATCH] Phase 1 of reporting hashes of PE sections Conflicts: libclamav/stats.h --- libclamav/clamav.h | 16 +++-- libclamav/json.c | 25 -------- libclamav/matcher.c | 10 ++- libclamav/others.h | 2 +- libclamav/pe.c | 44 ++++++++++--- libclamav/pe.h | 6 +- libclamav/stats.c | 149 ++++++++++++++++++++++++++++++++++++-------- libclamav/stats.h | 6 +- sigtool/sigtool.c | 2 +- 9 files changed, 186 insertions(+), 74 deletions(-) diff --git a/libclamav/clamav.h b/libclamav/clamav.h index 61d24b920..e3c98cc7b 100644 --- a/libclamav/clamav.h +++ b/libclamav/clamav.h @@ -220,7 +220,15 @@ enum bytecode_mode { CL_BYTECODE_MODE_OFF /* for query only, not settable */ }; -typedef enum cli_intel_sample_type { WHOLEFILE = 0, PESECTION = 1 } cli_intel_sample_type_t; /* For stats/intel gathering */ +struct cli_section_hash { + unsigned char md5[16]; + size_t len; +}; + +typedef struct cli_stats_sections { + size_t nsections; + struct cli_section_hash *sections; +} stats_section_t; extern int cl_engine_set_num(struct cl_engine *engine, enum cl_engine_field field, long long num); @@ -360,13 +368,13 @@ extern void cl_engine_set_clcb_meta(struct cl_engine *engine, clcb_meta callback /* Statistics/intelligence gathering callbacks */ extern void cl_engine_set_stats_set_cbdata(struct cl_engine *engine, void *cbdata); -typedef void (*clcb_stats_add_sample)(const char *virname, const unsigned char *md5, size_t size, cli_intel_sample_type_t type, void *cbdata); +typedef void (*clcb_stats_add_sample)(const char *virname, const unsigned char *md5, size_t size, stats_section_t *sections, void *cbdata); extern void cl_engine_set_clcb_stats_add_sample(struct cl_engine *engine, clcb_stats_add_sample callback); -typedef void (*clcb_stats_remove_sample)(const char *virname, const unsigned char *md5, size_t size, cli_intel_sample_type_t type, void *cbdata); +typedef void (*clcb_stats_remove_sample)(const char *virname, const unsigned char *md5, size_t size, void *cbdata); extern void cl_engine_set_clcb_stats_remove_sample(struct cl_engine *engine, clcb_stats_remove_sample callback); -typedef void (*clcb_stats_decrement_count)(const char *virname, const unsigned char *md5, size_t size, cli_intel_sample_type_t type, void *cbdata); +typedef void (*clcb_stats_decrement_count)(const char *virname, const unsigned char *md5, size_t size, void *cbdata); extern void cl_engine_set_clcb_stats_decrement_count(struct cl_engine *engine, clcb_stats_decrement_count callback); typedef void (*clcb_stats_submit)(struct cl_engine *engine, void *cbdata); diff --git a/libclamav/json.c b/libclamav/json.c index 42d5a5ed0..6f3d0b2a4 100644 --- a/libclamav/json.c +++ b/libclamav/json.c @@ -30,18 +30,6 @@ char *hex_encode(char *buf, char *data, size_t len) return p; } -const char *get_sample_type(cli_intel_sample_type_t type) -{ - switch (type) { - case WHOLEFILE: - return "whole-file"; - case PESECTION: - return "PE section"; - default: - return NULL; - } -} - char *ensure_bufsize(char *buf, size_t *oldsize, size_t used, size_t additional) { char *p=buf; @@ -106,19 +94,6 @@ char *export_stats_to_json(struct cl_engine *engine, cli_intel_t *intel) snprintf(buf+curused, bufsz-curused, "\t\t\t\"hash\": \"%s\",\n", md5); curused += strlen(buf+curused); - type = get_sample_type(sample->type); - if (!(type)) { - free(buf); - return NULL; - } - - buf = ensure_bufsize(buf, &bufsz, curused, sizeof("type") + strlen(type) + 15); - if (!(buf)) - return NULL; - - snprintf(buf+curused, bufsz-curused, "\t\t\t\"type\": \"%s\",\n", type); - curused += strlen(buf+curused); - /* Reuse the md5 variable for serializing the number of hits */ snprintf(md5, sizeof(md5), "%u", sample->hits); diff --git a/libclamav/matcher.c b/libclamav/matcher.c index dbf09de9a..20cd72c61 100644 --- a/libclamav/matcher.c +++ b/libclamav/matcher.c @@ -428,6 +428,7 @@ int cli_checkfp(unsigned char *digest, size_t size, cli_ctx *ctx) uint8_t shash1[SHA1_HASH_SIZE*2+1]; uint8_t shash256[SHA256_HASH_SIZE*2+1]; int have_sha1, have_sha256, do_dsig_check = 1; + stats_section_t sections; if(cli_hm_scan(digest, size, &virname, ctx->engine->hm_fp, CLI_HASH_MD5) == CL_VIRUS) { cli_dbgmsg("cli_checkfp(md5): Found false positive detection (fp sig: %s), size: %d\n", virname, (int)size); @@ -522,8 +523,11 @@ int cli_checkfp(unsigned char *digest, size_t size, cli_ctx *ctx) } #endif - if(do_dsig_check) { - switch(cli_checkfp_pe(ctx, shash1)) { + memset(§ions, 0x00, sizeof(stats_section_t)); + if(do_dsig_check || ctx->engine->cb_stats_add_sample) { + uint32_t flags = (do_dsig_check ? CL_CHECKFP_PE_FLAG_AUTHENTICODE : 0) | (ctx->engine->cb_stats_add_sample ? CL_CHECKFP_PE_FLAG_STATS : 0); + + switch(cli_checkfp_pe(ctx, shash1, §ions, flags)) { case CL_CLEAN: cli_dbgmsg("cli_checkfp(pe): PE file whitelisted due to valid embedded digital signature\n"); return CL_CLEAN; @@ -540,7 +544,7 @@ int cli_checkfp(unsigned char *digest, size_t size, cli_ctx *ctx) ctx->engine->cb_hash(fmap_fd(*ctx->fmap), size, md5, cli_get_last_virus(ctx), ctx->cb_ctx); if (ctx->engine->cb_stats_add_sample) - ctx->engine->cb_stats_add_sample(cli_get_last_virus(ctx), digest, size, WHOLEFILE, ctx->engine->stats_data); + ctx->engine->cb_stats_add_sample(cli_get_last_virus(ctx), digest, size, §ions, ctx->engine->stats_data); return CL_VIRUS; } diff --git a/libclamav/others.h b/libclamav/others.h index 7b16ce25f..53446d9bf 100644 --- a/libclamav/others.h +++ b/libclamav/others.h @@ -149,9 +149,9 @@ typedef struct cli_ctx_tag { typedef struct cli_flagged_sample { char **virus_name; char md5[16]; - cli_intel_sample_type_t type; size_t size; /* A size of zero means size is unavailable (why would this ever happen?) */ uint32_t hits; + stats_section_t *sections; struct cli_flagged_sample *prev; struct cli_flagged_sample *next; diff --git a/libclamav/pe.c b/libclamav/pe.c index 66e306c64..6852e02b1 100644 --- a/libclamav/pe.c +++ b/libclamav/pe.c @@ -60,6 +60,7 @@ #include "ishield.h" #include "asn1.h" #include "sha1.h" +#include "libclamav/md5.h" #define DCONF ctx->dconf->pe @@ -2796,7 +2797,7 @@ static int sort_sects(const void *first, const void *second) { return (a->raw - b->raw); } -int cli_checkfp_pe(cli_ctx *ctx, uint8_t *authsha1) { +int cli_checkfp_pe(cli_ctx *ctx, uint8_t *authsha1, stats_section_t *hashes, uint32_t flags) { uint16_t e_magic; /* DOS signature ("MZ") */ uint16_t nsections; uint32_t e_lfanew; /* address of new exe header */ @@ -2814,6 +2815,14 @@ int cli_checkfp_pe(cli_ctx *ctx, uint8_t *authsha1) { struct pe_image_data_dir *dirs; fmap_t *map = *ctx->fmap; SHA1Context sha1; + cli_md5_ctx md5ctx; + + if (flags & CL_CHECKFP_PE_FLAG_STATS) + if (!(hashes)) + return CL_EFORMAT; + + if (flags == CL_CHECKFP_PE_FLAG_NONE) + return 0; if(!(DCONF & PE_CONF_CATALOG)) return CL_EFORMAT; @@ -2903,6 +2912,15 @@ int cli_checkfp_pe(cli_ctx *ctx, uint8_t *authsha1) { hdr_size = PESALIGN(hdr_size, falign); /* Aligned headers virtual size */ + if (flags & CL_CHECKFP_PE_FLAG_STATS) { + hashes->nsections = nsections; + hashes->sections = cli_calloc(nsections, sizeof(struct cli_section_hash)); + if (!(hashes->sections)) { + free(exe_sections); + return CL_EMEM; + } + } + for(i = 0; i < nsections; i++) { exe_sections[i].rva = PEALIGN(EC32(section_hdr[i].VirtualAddress), valign); exe_sections[i].vsz = PESALIGN(EC32(section_hdr[i].VirtualSize), valign); @@ -2928,9 +2946,10 @@ int cli_checkfp_pe(cli_ctx *ctx, uint8_t *authsha1) { cli_qsort(exe_sections, nsections, sizeof(*exe_sections), sort_sects); - SHA1Init(&sha1); + if (flags & CL_CHECKFP_PE_FLAG_AUTHENTICODE) + SHA1Init(&sha1); -#define hash_chunk(where, size) \ +#define hash_chunk(where, size, isStatAble, section) \ do { \ const uint8_t *hptr; \ if(!(size)) break; \ @@ -2938,13 +2957,20 @@ int cli_checkfp_pe(cli_ctx *ctx, uint8_t *authsha1) { free(exe_sections); \ return CL_EFORMAT; \ } \ - SHA1Update(&sha1, hptr, size); \ + if (flags & CL_CHECKFP_PE_FLAG_AUTHENTICODE) \ + SHA1Update(&sha1, hptr, size); \ + if (isStatAble && flags & CL_CHECKFP_PE_FLAG_STATS) { \ + cli_md5_init(&md5ctx); \ + cli_md5_update(&md5ctx, hptr, size); \ + cli_md5_final(hashes->sections[section].md5, &md5ctx); \ + hashes->sections[section].len = size; \ + } \ } while(0) /* MZ to checksum */ at = 0; hlen = e_lfanew + sizeof(struct pe_image_file_hdr) + (pe_plus ? offsetof(struct pe_image_optional_hdr64, CheckSum) : offsetof(struct pe_image_optional_hdr32, CheckSum)); - hash_chunk(0, hlen); + hash_chunk(0, hlen, 0, 0); at = hlen + 4; /* Checksum to security */ @@ -2952,7 +2978,7 @@ int cli_checkfp_pe(cli_ctx *ctx, uint8_t *authsha1) { hlen = offsetof(struct pe_image_optional_hdr64, DataDirectory[4]) - offsetof(struct pe_image_optional_hdr64, CheckSum) - 4; else hlen = offsetof(struct pe_image_optional_hdr32, DataDirectory[4]) - offsetof(struct pe_image_optional_hdr32, CheckSum) - 4; - hash_chunk(at, hlen); + hash_chunk(at, hlen, 0, 0); at += hlen + 8; if(at > hdr_size) { @@ -2962,7 +2988,7 @@ int cli_checkfp_pe(cli_ctx *ctx, uint8_t *authsha1) { /* Security to End of header */ hlen = hdr_size - at; - hash_chunk(at, hlen); + hash_chunk(at, hlen, 0, 0); /* Sections */ at = hdr_size; @@ -2970,7 +2996,7 @@ int cli_checkfp_pe(cli_ctx *ctx, uint8_t *authsha1) { if(!exe_sections[i].rsz) continue; - hash_chunk(exe_sections[i].raw, exe_sections[i].rsz); + hash_chunk(exe_sections[i].raw, exe_sections[i].rsz, 1, i); at += exe_sections[i].rsz; } @@ -2982,7 +3008,7 @@ int cli_checkfp_pe(cli_ctx *ctx, uint8_t *authsha1) { } hlen -= dirs[4].Size; - hash_chunk(at, hlen); + hash_chunk(at, hlen, 0, 0); at += hlen; } free(exe_sections); diff --git a/libclamav/pe.h b/libclamav/pe.h index 5b87ba5f5..cd10ee698 100644 --- a/libclamav/pe.h +++ b/libclamav/pe.h @@ -160,8 +160,12 @@ struct cli_pe_hook_data { int cli_scanpe(cli_ctx *ctx); +#define CL_CHECKFP_PE_FLAG_NONE 0x00000000 +#define CL_CHECKFP_PE_FLAG_STATS 0x00000001 +#define CL_CHECKFP_PE_FLAG_AUTHENTICODE 0x00000002 + int cli_peheader(fmap_t *map, struct cli_exe_info *peinfo); -int cli_checkfp_pe(cli_ctx *ctx, uint8_t *authsha1); +int cli_checkfp_pe(cli_ctx *ctx, uint8_t *authsha1, stats_section_t *hashes, uint32_t flags); uint32_t cli_rawaddr(uint32_t, const struct cli_exe_section *, uint16_t, unsigned int *, size_t, uint32_t); void findres(uint32_t, uint32_t, uint32_t, fmap_t *map, struct cli_exe_section *, uint16_t, uint32_t, int (*)(void *, uint32_t, uint32_t, uint32_t, uint32_t), void *); diff --git a/libclamav/stats.c b/libclamav/stats.c index cce719b5e..0f9164f1f 100644 --- a/libclamav/stats.c +++ b/libclamav/stats.c @@ -31,10 +31,84 @@ #include "libclamav/hostid.h" #include "libclamav/www.h" -static cli_flagged_sample_t *find_sample(cli_intel_t *intel, const char *virname, const unsigned char *md5, size_t size, cli_intel_sample_type_t type); +#define DEBUG_STATS 1 + +static cli_flagged_sample_t *find_sample(cli_intel_t *intel, const char *virname, const unsigned char *md5, size_t size, stats_section_t *sections); void free_sample(cli_flagged_sample_t *sample); -void clamav_stats_add_sample(const char *virname, const unsigned char *md5, size_t size, cli_intel_sample_type_t type, void *cbdata) +#if DEBUG_STATS +char *get_hash(unsigned char *md5) +{ + char *hash; + int i; + + hash = calloc(1, 33); + if (!(hash)) + return NULL; + + for (i=0; i<16; i++) + sprintf(hash+(i*2), "%02x", md5[i]); + + return hash; +} + +char *get_sample_names(char **names) +{ + char *ret; + size_t n, i, sz; + + sz = 0; + for (n=0; names[n] != NULL; n++) + sz += strlen(names[n]); + + ret = calloc(1, sz + n + 1); + if (!(ret)) + return NULL; + + for (i=0; names[i] != NULL; i++) + sprintf(ret+strlen(ret), "%s%s", (i==0) ? "" : " ", names[i]); + + return ret; +} + +void print_sample(cli_flagged_sample_t *sample) +{ + char *hash, *names; + size_t i; + + if (!(sample)) + return; + + hash = get_hash(sample->md5); + if (!(hash)) + return; + + cli_warnmsg("Sample[%s]:\n", hash); + cli_warnmsg(" * Size: %zu\n", sample->size); + cli_warnmsg(" * Hits: %u\n", sample->hits); + + free(hash); + + names = get_sample_names(sample->virus_name); + if ((names)) + cli_warnmsg(" * Names: %s\n", names); + + if (sample->sections && sample->sections->nsections) { + for (i=0; i < sample->sections->nsections; i++) { + hash = get_hash(sample->sections->sections[i].md5); + if ((hash)) { + cli_warnmsg(" * Section[%zu] (%zu): %s\n", i, sample->sections->sections[i].len, hash); + free(hash); + } + } + } + + if ((names)) + free(names); +} +#endif + +void clamav_stats_add_sample(const char *virname, const unsigned char *md5, size_t size, stats_section_t *sections, void *cbdata) { cli_intel_t *intel; cli_flagged_sample_t *sample; @@ -81,7 +155,7 @@ void clamav_stats_add_sample(const char *virname, const unsigned char *md5, size } #endif - sample = find_sample(intel, virname, md5, size, type); + sample = find_sample(intel, virname, md5, size, sections); if (!(sample)) { if (!(intel->samples)) { sample = intel->samples = calloc(1, sizeof(cli_flagged_sample_t)); @@ -136,9 +210,15 @@ void clamav_stats_add_sample(const char *virname, const unsigned char *md5, size sample->virus_name[i+1] = NULL; memcpy(sample->md5, md5, sizeof(sample->md5)); - sample->type = type; sample->size = size; intel->nsamples++; + + if (sections && sections->nsections && !(sample->sections)) { + /* Copy the section data that has already been allocated. We don't care if calloc fails; just skip copying if it does. */ + sample->sections = calloc(1, sizeof(stats_section_t)); + if ((sample->sections)) + memcpy(sample->sections, sections, sizeof(stats_section_t)); + } } cli_warnmsg("Added %s to the stats cache\n", (virname != NULL) ? virname: "[unknown]"); @@ -249,6 +329,9 @@ void clamav_stats_submit(struct cl_engine *engine, void *cbdata) #endif for (sample=myintel.samples; sample != NULL; sample = next) { +#if DEBUG_STATS + print_sample(sample); +#endif next = sample->next; free_sample(sample); @@ -261,7 +344,7 @@ void clamav_stats_submit(struct cl_engine *engine, void *cbdata) } } -void clamav_stats_remove_sample(const char *virname, const unsigned char *md5, size_t size, cli_intel_sample_type_t type, void *cbdata) +void clamav_stats_remove_sample(const char *virname, const unsigned char *md5, size_t size, void *cbdata) { cli_intel_t *intel; cli_flagged_sample_t *sample; @@ -279,19 +362,17 @@ void clamav_stats_remove_sample(const char *virname, const unsigned char *md5, s } #endif - sample = find_sample(intel, virname, md5, size, type); - if (!(sample)) - return; + while ((sample = find_sample(intel, virname, md5, size, NULL))) { + if (sample->prev) + sample->prev->next = sample->next; + if (sample->next) + sample->next->prev = sample->prev; + if (sample == intel->samples) + intel->samples = sample->next; - if (sample->prev) - sample->prev->next = sample->next; - if (sample->next) - sample->next->prev = sample; - if (sample == intel->samples) - intel->samples = sample->next; - - free_sample(sample); - intel->nsamples--; + free_sample(sample); + intel->nsamples--; + } #ifdef CL_THREAD_SAFE err = pthread_mutex_unlock(&(intel->mutex)); @@ -301,7 +382,7 @@ void clamav_stats_remove_sample(const char *virname, const unsigned char *md5, s #endif } -void clamav_stats_decrement_count(const char *virname, const unsigned char *md5, size_t size, cli_intel_sample_type_t type, void *cbdata) +void clamav_stats_decrement_count(const char *virname, const unsigned char *md5, size_t size, void *cbdata) { cli_intel_t *intel; cli_flagged_sample_t *sample; @@ -319,15 +400,15 @@ void clamav_stats_decrement_count(const char *virname, const unsigned char *md5, } #endif - sample = find_sample(intel, virname, md5, size, type); + sample = find_sample(intel, virname, md5, size, NULL); if (!(sample)) return; if (sample->hits == 1) { if ((intel->engine->cb_stats_remove_sample)) - intel->engine->cb_stats_remove_sample(virname, md5, size, type, intel); + intel->engine->cb_stats_remove_sample(virname, md5, size, intel); else - clamav_stats_remove_sample(virname, md5, size, type, intel); + clamav_stats_remove_sample(virname, md5, size, intel); return; } @@ -444,14 +525,13 @@ char *clamav_stats_get_hostid(void *cbdata) } #endif -static cli_flagged_sample_t *find_sample(cli_intel_t *intel, const char *virname, const unsigned char *md5, size_t size, cli_intel_sample_type_t type) +static cli_flagged_sample_t *find_sample(cli_intel_t *intel, const char *virname, const unsigned char *md5, size_t size, stats_section_t *sections) { cli_flagged_sample_t *sample; size_t i; for (sample = intel->samples; sample != NULL; sample = sample->next) { - if (sample->type != type) - continue; + int foundSections = 0; if (sample->size != size) continue; @@ -462,9 +542,24 @@ static cli_flagged_sample_t *find_sample(cli_intel_t *intel, const char *virname if (!(virname)) return sample; - for (i=0; sample->virus_name[i] != NULL; i++) - if (!strcmp(sample->virus_name[i], virname)) - return sample; + if ((sections) && (sample->sections)) { + if (sections->nsections == sample->sections->nsections) { + for (i=0; i < sections->nsections; i++) + if (sections->sections[i].len == sample->sections->sections[i].len) + if (memcmp(sections->sections[i].md5, sample->sections->sections[i].md5, sizeof(stats_section_t))) + break; + + if (i == sections->nsections) + foundSections = 1; + } + } else { + foundSections = 1; + } + + if (foundSections) + for (i=0; sample->virus_name[i] != NULL; i++) + if (!strcmp(sample->virus_name[i], virname)) + return sample; } return NULL; diff --git a/libclamav/stats.h b/libclamav/stats.h index b4991bc47..75e00ffad 100644 --- a/libclamav/stats.h +++ b/libclamav/stats.h @@ -4,11 +4,11 @@ #define STATS_HOST "stats.clamav.dev" /* Change this before release! */ #define STATS_PORT "8080" -void clamav_stats_add_sample(const char *virname, const unsigned char *md5, uint64_t size, cli_intel_sample_type_t type, void *cbdata); +void clamav_stats_add_sample(const char *virname, const unsigned char *md5, size_t size, stats_section_t *sections, void *cbdata); void clamav_stats_submit(struct cl_engine *engine, void *cbdata); void clamav_stats_flush(struct cl_engine *engine, void *cbdata); -void clamav_stats_remove_sample(const char *virname, const unsigned char *md5, size_t size, cli_intel_sample_type_t type, void *cbdata); -void clamav_stats_decrement_count(const char *virname, const unsigned char *md5, size_t size, cli_intel_sample_type_t type, void *cbdata); +void clamav_stats_remove_sample(const char *virname, const unsigned char *md5, size_t size, void *cbdata); +void clamav_stats_decrement_count(const char *virname, const unsigned char *md5, size_t size, void *cbdata); size_t clamav_stats_get_num(void *cbdata); size_t clamav_stats_get_size(void *cbdata); char *clamav_stats_get_hostid(void *cbdata); diff --git a/sigtool/sigtool.c b/sigtool/sigtool.c index 8d06d1e6e..735458ede 100644 --- a/sigtool/sigtool.c +++ b/sigtool/sigtool.c @@ -2873,7 +2873,7 @@ static int dumpcerts(const struct optstruct *opts) SHA1Update(&sha1, fmptr, sb.st_size); SHA1Final(&sha1, shash1); - ret = cli_checkfp_pe(&ctx, shash1); + ret = cli_checkfp_pe(&ctx, shash1, NULL, CL_CHECKFP_PE_FLAG_AUTHENTICODE); switch(ret) { case CL_CLEAN: