mirror of
https://github.com/Cisco-Talos/clamav.git
synced 2025-10-19 10:23:17 +00:00
Compare commits
7 commits
e98b0075e6
...
3c3b5e3dfd
Author | SHA1 | Date | |
---|---|---|---|
![]() |
3c3b5e3dfd | ||
![]() |
0cf59435dc | ||
![]() |
a1cd8215be | ||
![]() |
00033e92b5 | ||
![]() |
b720cfaaca | ||
![]() |
389ccf2e1d | ||
![]() |
d647e607f7 |
5 changed files with 44 additions and 93 deletions
|
@ -33,7 +33,7 @@
|
|||
#define MAGIC_BUFFER_SIZE 1028
|
||||
|
||||
#define CL_TYPENO 500
|
||||
#define MAX_EMBEDDED_OBJ 10
|
||||
#define MAX_EMBEDDED_OBJ 16
|
||||
|
||||
typedef enum cli_file {
|
||||
CL_TYPE_ANY = 0,
|
||||
|
|
|
@ -2781,7 +2781,7 @@ int cli_scanpe(cli_ctx *ctx)
|
|||
|
||||
cli_exe_info_init(peinfo, 0);
|
||||
|
||||
peheader_ret = cli_peheader(map, peinfo, opts, ctx);
|
||||
peheader_ret = cli_peheader(ctx, peinfo, opts);
|
||||
|
||||
// Warn the user if PE header parsing failed - if it's a binary that runs
|
||||
// successfully on Windows, we need to relax our PE parsing standards so
|
||||
|
@ -4360,7 +4360,7 @@ int cli_scanpe(cli_ctx *ctx)
|
|||
|
||||
cl_error_t cli_pe_targetinfo(cli_ctx *ctx, struct cli_exe_info *peinfo)
|
||||
{
|
||||
return cli_peheader(ctx->fmap, peinfo, CLI_PEHEADER_OPT_EXTRACT_VINFO, NULL);
|
||||
return cli_peheader(ctx, peinfo, CLI_PEHEADER_OPT_EXTRACT_VINFO);
|
||||
}
|
||||
|
||||
/** Parse the PE header and, if successful, populate peinfo
|
||||
|
@ -4420,7 +4420,7 @@ cl_error_t cli_pe_targetinfo(cli_ctx *ctx, struct cli_exe_info *peinfo)
|
|||
*
|
||||
* TODO Same as above but with JSON creation
|
||||
*/
|
||||
cl_error_t cli_peheader(fmap_t *map, struct cli_exe_info *peinfo, uint32_t opts, cli_ctx *ctx)
|
||||
cl_error_t cli_peheader(cli_ctx *ctx, struct cli_exe_info *peinfo, uint32_t opts)
|
||||
{
|
||||
cl_error_t ret = CL_ERROR;
|
||||
|
||||
|
@ -4446,17 +4446,20 @@ cl_error_t cli_peheader(fmap_t *map, struct cli_exe_info *peinfo, uint32_t opts,
|
|||
size_t read;
|
||||
uint32_t temp;
|
||||
|
||||
fmap_t *map = NULL;
|
||||
|
||||
int toval = 0;
|
||||
struct json_object *pe_json = NULL;
|
||||
char jsonbuf[128];
|
||||
|
||||
if (ctx == NULL &&
|
||||
(opts & CLI_PEHEADER_OPT_COLLECT_JSON ||
|
||||
opts & CLI_PEHEADER_OPT_DBG_PRINT_INFO)) {
|
||||
cli_errmsg("cli_peheader: ctx can't be NULL for options specified\n");
|
||||
if (ctx == NULL) {
|
||||
cli_errmsg("cli_peheader: ctx can't be NULL\n");
|
||||
ret = CL_EARG;
|
||||
goto done;
|
||||
}
|
||||
|
||||
map = ctx->fmap;
|
||||
|
||||
if (opts & CLI_PEHEADER_OPT_COLLECT_JSON) {
|
||||
pe_json = get_pe_property(ctx);
|
||||
}
|
||||
|
@ -5469,7 +5472,7 @@ cl_error_t cli_check_auth_header(cli_ctx *ctx, struct cli_exe_info *peinfo)
|
|||
peinfo = &_peinfo;
|
||||
cli_exe_info_init(peinfo, 0);
|
||||
|
||||
if (CL_SUCCESS != cli_peheader(ctx->fmap, peinfo, CLI_PEHEADER_OPT_NONE, NULL)) {
|
||||
if (CL_SUCCESS != cli_peheader(ctx, peinfo, CLI_PEHEADER_OPT_NONE)) {
|
||||
cli_exe_info_destroy(peinfo);
|
||||
return CL_EFORMAT;
|
||||
}
|
||||
|
@ -5741,7 +5744,7 @@ cl_error_t cli_genhash_pe(cli_ctx *ctx, unsigned int class, cli_hash_type_t type
|
|||
// if so, use that to avoid having to re-parse the header
|
||||
cli_exe_info_init(peinfo, 0);
|
||||
|
||||
if (cli_peheader(ctx->fmap, peinfo, CLI_PEHEADER_OPT_NONE, NULL) != CL_SUCCESS) {
|
||||
if (cli_peheader(ctx, peinfo, CLI_PEHEADER_OPT_NONE) != CL_SUCCESS) {
|
||||
cli_exe_info_destroy(peinfo);
|
||||
return CL_EFORMAT;
|
||||
}
|
||||
|
|
|
@ -88,7 +88,7 @@ enum {
|
|||
#define CLI_PEHEADER_OPT_REMOVE_MISSING_SECTIONS 0x10
|
||||
|
||||
cl_error_t cli_pe_targetinfo(cli_ctx *ctx, struct cli_exe_info *peinfo);
|
||||
cl_error_t cli_peheader(fmap_t *map, struct cli_exe_info *peinfo, uint32_t opts, cli_ctx *ctx);
|
||||
cl_error_t cli_peheader(cli_ctx *ctx, struct cli_exe_info *peinfo, uint32_t opts);
|
||||
|
||||
cl_error_t cli_check_auth_header(cli_ctx *ctx, struct cli_exe_info *peinfo);
|
||||
cl_error_t cli_genhash_pe(cli_ctx *ctx, unsigned int class, cli_hash_type_t type, stats_section_t *hashes);
|
||||
|
|
|
@ -1665,7 +1665,7 @@ static cl_error_t cli_ole2_tempdir_scan_vba_new(const char *dir, cli_ctx *ctx, s
|
|||
goto done;
|
||||
}
|
||||
|
||||
ret = cli_scan_desc(tempfd, ctx, CL_TYPE_SCRIPT, false, NULL, AC_SCAN_VIR, NULL, "extracted-vba-project", tempfile, LAYER_ATTRIBUTES_NORMALIZED);
|
||||
ret = cli_scan_desc(tempfd, ctx, CL_TYPE_SCRIPT, false, NULL, AC_SCAN_VIR, NULL, "extracted-vba-project", tempfile, LAYER_ATTRIBUTES_NONE);
|
||||
if (CL_SUCCESS != ret) {
|
||||
goto done;
|
||||
}
|
||||
|
@ -3650,7 +3650,9 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi
|
|||
// Omit OLD TAR files because it's a raw archive format that we can extract and scan manually.
|
||||
(type != CL_TYPE_OLD_TAR) &&
|
||||
// Omit POSIX TAR files because it's a raw archive format that we can extract and scan manually.
|
||||
(type != CL_TYPE_POSIX_TAR)) {
|
||||
(type != CL_TYPE_POSIX_TAR) &&
|
||||
// Omit TNEF files because TNEF message attachments are raw / not compressed. Document and ZIP attachments would be likely to have double-extraction issues.
|
||||
(type != CL_TYPE_TNEF)) {
|
||||
/*
|
||||
* Enable file type recognition scan mode if requested, except for some problematic types (above).
|
||||
*/
|
||||
|
@ -3668,6 +3670,8 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi
|
|||
// In allmatch-mode, ret will never be CL_VIRUS, so ret may be used exclusively for file type detection and for terminal errors.
|
||||
// When not in allmatch-mode, it's more important to return right away if ret is CL_VIRUS, so we don't care if file type matches were found.
|
||||
if (ret >= CL_TYPENO) {
|
||||
size_t last_offset = 0;
|
||||
|
||||
// Matched 1+ file type signatures. Handle them.
|
||||
found_type = (cli_file_t)ret;
|
||||
|
||||
|
@ -3676,11 +3680,16 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi
|
|||
fpt = ftoffset;
|
||||
|
||||
while (fpt) {
|
||||
if (fpt->offset > 0) {
|
||||
if ((fpt->offset > 0) &&
|
||||
// Only handle each offset once to prevent duplicate processing like if two signatures are found at the same offset.
|
||||
((size_t)fpt->offset > last_offset)) {
|
||||
|
||||
bool type_has_been_handled = true;
|
||||
bool ancestor_was_embedded = false;
|
||||
size_t i;
|
||||
|
||||
last_offset = (size_t)fpt->offset;
|
||||
|
||||
/*
|
||||
* First, use "embedded type recognition" to identify a file's actual type.
|
||||
* (a.k.a. not embedded files, but file type detection corrections)
|
||||
|
@ -3871,84 +3880,10 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi
|
|||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Only scan embedded files if we are not already in an embedded context.
|
||||
* That is, if this or a previous layer was identified with embedded file type recognition, then we do
|
||||
* not scan for embedded files again.
|
||||
*
|
||||
* This restriction will prevent detecting the same embedded content more than once when recursing with
|
||||
* embedded file type recognition deeper within the same buffer.
|
||||
*
|
||||
* This is necessary because we have no way of knowing the length of a file for many formats and cannot
|
||||
* prevent a search for embedded files from finding the same embedded content multiple times (like a LOT
|
||||
* of times).
|
||||
*
|
||||
* E.g. if the file is like this:
|
||||
*
|
||||
* [ data ] [ embedded file ] [ data ] [ embedded file ]
|
||||
*
|
||||
* The first time we do it we'll find "two" embedded files, like this:
|
||||
*
|
||||
* Emb. File #1: [ embedded file ] [ data ] [ embedded file ]
|
||||
* Emb. File #2: [ embedded file ]
|
||||
*
|
||||
* We must not scan Emb. File #1 again for embedded files, because it would double-extract Emb. File #2.
|
||||
*
|
||||
* There is a flaw in this logic, though. Suppose that we actually have:
|
||||
*
|
||||
* [ data ] [ compressed file w. recognizable magic bytes ]
|
||||
*
|
||||
* A first pass of the above will again identify "two" embedded files:
|
||||
*
|
||||
* Emb. File #1: [ compressed archive w. recognizable magic bytes ]
|
||||
* Emb. File #2: [ magic bytes ] <- Compressed data/Not real file
|
||||
*
|
||||
* In this case, the magic bytes of a contained, compressed file is somehow still identifiable despite
|
||||
* compression. The result is the Emb. File #2 will fail to be parsed and when we decompress Emb. File
|
||||
* #1, then we maybe get something like this:
|
||||
*
|
||||
* Decompressed: [ data ] [ embedded file ]
|
||||
*
|
||||
* So if this happened... then we WOULD want to scan the decompressed file for embedded files.
|
||||
* The problem is, we have no way of knowing how long embedded files are.
|
||||
* We don't know if we have:
|
||||
*
|
||||
* A. [ data ] [ embedded file ] [ data ] [ embedded file ]
|
||||
* or
|
||||
* B. [ data ] [ embedded compressed archive w. recognizable magic bytes ]
|
||||
* or
|
||||
* C. [ data ] [ embedded uncompressed archive w. multiple file entries [ file 1 ] [ file 2 ] [ file 2 ] ]
|
||||
*
|
||||
* Some ideas for a more accurate solution:
|
||||
*
|
||||
* 1. Record the offset and size of each file extracted by the parsers.
|
||||
* Then, when we do embedded file type recognition, we can check if the offset and size of the
|
||||
* embedded file matches the offset and size of a file that was extracted by a parser.
|
||||
* This falls apart a little bit for multiple layers of archives unless we also compare offsets within
|
||||
* each layer. We could do that, but it would be a lot of work. And we'd probably want to take into
|
||||
* consideration if files were decompressed or decrypted. ... I don't know a clean solution.
|
||||
*
|
||||
* 2. Have all parsers to run before embedded file type recognition and they each determine the length
|
||||
* of the file they parsed, so we can differentiate between embedded files and appended files.
|
||||
* For appended files, we would know they weren't extracted by a parser module and the parser for
|
||||
* each of those would report the length of the file it parsed so we can use that to mitigate
|
||||
* overlapping embedded file type recognition.
|
||||
* But I highly doubt all file types can be parsed to determine the correct length of the file.
|
||||
*/
|
||||
for (i = ctx->recursion_level; i > 0; i--) {
|
||||
if (ctx->recursion_stack[i].attributes & LAYER_ATTRIBUTES_EMBEDDED) {
|
||||
// Found an ancestor that was embedded.
|
||||
// Do not scan embedded files again.
|
||||
ancestor_was_embedded = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Next, check for actual embedded files.
|
||||
*/
|
||||
if ((false == ancestor_was_embedded) &&
|
||||
(false == type_has_been_handled)) {
|
||||
if (false == type_has_been_handled) {
|
||||
cli_dbgmsg("%s signature found at %u\n", cli_ftname(fpt->type), (unsigned int)fpt->offset);
|
||||
|
||||
type_has_been_handled = true;
|
||||
|
@ -4001,6 +3936,9 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi
|
|||
break;
|
||||
}
|
||||
|
||||
// Increment last_offset to ignore any file type matches that occured within this legitimate archive.
|
||||
last_offset += zip_size - 1; // Note: size is definitely > 0 because header_check succeeded.
|
||||
|
||||
nret = cli_magic_scan_nested_fmap_type(
|
||||
ctx->fmap,
|
||||
fpt->offset,
|
||||
|
@ -4023,6 +3961,9 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi
|
|||
break;
|
||||
}
|
||||
|
||||
// Increment last_offset to ignore any file type matches that occured within this legitimate archive.
|
||||
last_offset += cab_size - 1; // Note: size is definitely > 0 because header_check succeeded.
|
||||
|
||||
nret = cli_magic_scan_nested_fmap_type(
|
||||
ctx->fmap,
|
||||
fpt->offset,
|
||||
|
@ -4046,6 +3987,9 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi
|
|||
break;
|
||||
}
|
||||
|
||||
// Increment last_offset to ignore any file type matches that occured within this legitimate archive.
|
||||
last_offset += arj_size - 1; // Note: size is definitely > 0 because header_check succeeded.
|
||||
|
||||
nret = cli_magic_scan_nested_fmap_type(
|
||||
ctx->fmap,
|
||||
fpt->offset,
|
||||
|
@ -4143,10 +4087,10 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi
|
|||
break;
|
||||
}
|
||||
|
||||
cli_exe_info_init(&peinfo, 0);
|
||||
cli_exe_info_init(&peinfo, fpt->offset);
|
||||
|
||||
// Header validity check to prevent false positives from being scanned.
|
||||
ret = cli_peheader(ctx->fmap, &peinfo, CLI_PEHEADER_OPT_NONE, NULL);
|
||||
ret = cli_peheader(ctx, &peinfo, CLI_PEHEADER_OPT_NONE);
|
||||
|
||||
// peinfo memory may have been allocated and must be freed even if it failed.
|
||||
cli_exe_info_destroy(&peinfo);
|
||||
|
|
|
@ -1310,7 +1310,8 @@ cl_error_t index_local_file_headers_within_bounds(
|
|||
index = *num_records;
|
||||
|
||||
if (start_offset > fsize || end_offset > fsize || start_offset > end_offset) {
|
||||
cli_errmsg("index_local_file_headers_within_bounds: Invalid offset arguments\n");
|
||||
cli_errmsg("index_local_file_headers_within_bounds: Invalid offset arguments: start_offset=%u, end_offset=%u, fsize=%u\n",
|
||||
start_offset, end_offset, fsize);
|
||||
status = CL_EPARSE;
|
||||
goto done;
|
||||
}
|
||||
|
@ -2075,6 +2076,9 @@ cl_error_t unzip_search(cli_ctx *ctx, struct zip_requests *requests)
|
|||
status = CL_ETIMEOUT;
|
||||
goto done;
|
||||
}
|
||||
|
||||
// Increment to the next central file header.
|
||||
central_file_header_offset += file_record_size;
|
||||
} while ((ret == CL_SUCCESS) && (file_record_size > 0));
|
||||
} else {
|
||||
cli_dbgmsg("unzip_search: Cannot locate central directory. unzip_search failed.\n");
|
||||
|
@ -2106,7 +2110,7 @@ cl_error_t unzip_search_single(cli_ctx *ctx, const char *name, size_t nlen, uint
|
|||
|
||||
// Search for the zip file entry in the current layer.
|
||||
status = unzip_search(ctx, &requests);
|
||||
if (CL_SUCCESS == status) {
|
||||
if (CL_VIRUS == status) {
|
||||
*loff = requests.loff;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue