mirror of
https://github.com/Cisco-Talos/clamav.git
synced 2025-10-19 18:33:16 +00:00
Reduce unnecessary scanning of embedded file FPs (#1571)
When embedded file type recognition finds a possible embedded file, it is being scanned as a new embedded file even if it turns out it was a false positive and parsing fails. My solution is to pre-parse the file headers as little possible to determine if it is valid. If possible, also determine the file size based on the headers. That will make it so we don't have to scan additional data when the embedded file is not at the very end. This commit adds header checks prior to embedded ZIP, ARJ, and CAB scanning. For these types I was also able to use the header checks to determine the object size so as to prevent excessive pattern matching. TODO: Add the same for RAR, EGG, 7Z, NULSFT, AUTOIT, IShield, and PDF. This commit also removes duplicate matching for embedded MSEXE. The embedded MSEXE detection and scanning logic was accidentally creating an extra duplicate layer in between scanning and detection because of the logic within the `cli_scanembpe()` function. That function was effectively doing the header check which this commit adds for ZIP, ARJ, and CAB but minus the size check. Note: It is unfortunately not possible to get an accurage size from PE file headers. The `cli_scanembpe()` function also used to dump to a temp file for no reason since FMAPs were extended to support windows into other FMAPs. So this commit removes the intermediate layer as well as dropping a temp file for each embedded PE file. Further, this commit adds configuration and DCONF safeguards around all embedded file type scanning. Finally, this commit adds a set of tests to validate proper extraction of embedded ZIP, ARJ, CAB, and MSEXE files. CLAM-2862 Co-authored-by: TheRaynMan <draynor@sourcefire.com>
This commit is contained in:
parent
1d158c13d4
commit
a77a271fb5
28 changed files with 618 additions and 228 deletions
|
@ -133,7 +133,7 @@ static void mspack_fmap_close(struct mspack_file *file)
|
||||||
static int mspack_fmap_read(struct mspack_file *file, void *buffer, int bytes)
|
static int mspack_fmap_read(struct mspack_file *file, void *buffer, int bytes)
|
||||||
{
|
{
|
||||||
struct mspack_handle *mspack_handle = (struct mspack_handle *)file;
|
struct mspack_handle *mspack_handle = (struct mspack_handle *)file;
|
||||||
off_t offset;
|
size_t offset;
|
||||||
size_t count;
|
size_t count;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
@ -150,7 +150,7 @@ static int mspack_fmap_read(struct mspack_file *file, void *buffer, int bytes)
|
||||||
/* Use fmap */
|
/* Use fmap */
|
||||||
offset = mspack_handle->offset + mspack_handle->org;
|
offset = mspack_handle->offset + mspack_handle->org;
|
||||||
|
|
||||||
count = fmap_readn(mspack_handle->fmap, buffer, (size_t)offset, (size_t)bytes);
|
count = fmap_readn(mspack_handle->fmap, buffer, offset, (size_t)bytes);
|
||||||
if (count == (size_t)-1) {
|
if (count == (size_t)-1) {
|
||||||
cli_dbgmsg("%s() %d requested %d bytes, read failed (-1)\n", __func__, __LINE__, bytes);
|
cli_dbgmsg("%s() %d requested %d bytes, read failed (-1)\n", __func__, __LINE__, bytes);
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -163,7 +163,7 @@ static int mspack_fmap_read(struct mspack_file *file, void *buffer, int bytes)
|
||||||
return (int)count;
|
return (int)count;
|
||||||
} else {
|
} else {
|
||||||
/* Use file descriptor */
|
/* Use file descriptor */
|
||||||
count = fread(buffer, bytes, 1, mspack_handle->f);
|
count = fread(buffer, (size_t)bytes, 1, mspack_handle->f);
|
||||||
if (count < 1) {
|
if (count < 1) {
|
||||||
cli_dbgmsg("%s() %d requested %d bytes, read failed (%zu)\n", __func__, __LINE__, bytes, count);
|
cli_dbgmsg("%s() %d requested %d bytes, read failed (%zu)\n", __func__, __LINE__, bytes, count);
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -340,18 +340,83 @@ static struct mspack_system mspack_sys_fmap_ops = {
|
||||||
.copy = mspack_fmap_copy,
|
.copy = mspack_fmap_copy,
|
||||||
};
|
};
|
||||||
|
|
||||||
cl_error_t cli_scanmscab(cli_ctx *ctx, off_t sfx_offset)
|
cl_error_t cli_mscab_header_check(cli_ctx *ctx, size_t offset, size_t *size)
|
||||||
|
{
|
||||||
|
cl_error_t status = CL_EFORMAT;
|
||||||
|
|
||||||
|
struct mscab_decompressor *cab_d = NULL;
|
||||||
|
struct mscabd_cabinet *cab_h = NULL;
|
||||||
|
struct mspack_name mspack_fmap = {0};
|
||||||
|
struct mspack_system_ex ops_ex = {0};
|
||||||
|
|
||||||
|
if (NULL == ctx || NULL == size) {
|
||||||
|
cli_dbgmsg("%s() invalid argument\n", __func__);
|
||||||
|
status = CL_EARG;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
*size = 0;
|
||||||
|
mspack_fmap.fmap = ctx->fmap;
|
||||||
|
|
||||||
|
if (offset > INT32_MAX) {
|
||||||
|
cli_dbgmsg("%s() offset too large %zu\n", __func__, offset);
|
||||||
|
status = CL_EFORMAT;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
mspack_fmap.org = (off_t)offset;
|
||||||
|
|
||||||
|
ops_ex.ops = mspack_sys_fmap_ops;
|
||||||
|
|
||||||
|
cab_d = mspack_create_cab_decompressor(&ops_ex.ops);
|
||||||
|
if (NULL == cab_d) {
|
||||||
|
cli_dbgmsg("%s() failed at %d\n", __func__, __LINE__);
|
||||||
|
status = CL_EUNPACK;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
cab_h = cab_d->open(cab_d, (char *)&mspack_fmap);
|
||||||
|
if (NULL == cab_h) {
|
||||||
|
cli_dbgmsg("%s() failed at %d\n", __func__, __LINE__);
|
||||||
|
status = CL_EFORMAT;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
*size = (size_t)cab_h->length;
|
||||||
|
|
||||||
|
cli_dbgmsg("%s(): Successfully read CAB header for CAB of size %zu\n", __func__, *size);
|
||||||
|
status = CL_SUCCESS;
|
||||||
|
|
||||||
|
done:
|
||||||
|
if (NULL != cab_d) {
|
||||||
|
if (NULL != cab_h) {
|
||||||
|
cab_d->close(cab_d, cab_h);
|
||||||
|
}
|
||||||
|
mspack_destroy_cab_decompressor(cab_d);
|
||||||
|
}
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
cl_error_t cli_scanmscab(cli_ctx *ctx, size_t sfx_offset)
|
||||||
{
|
{
|
||||||
cl_error_t ret = CL_SUCCESS;
|
cl_error_t ret = CL_SUCCESS;
|
||||||
struct mscab_decompressor *cab_d = NULL;
|
struct mscab_decompressor *cab_d = NULL;
|
||||||
struct mscabd_cabinet *cab_h = NULL;
|
struct mscabd_cabinet *cab_h = NULL;
|
||||||
struct mscabd_file *cab_f = NULL;
|
struct mscabd_file *cab_f = NULL;
|
||||||
int files;
|
int files;
|
||||||
struct mspack_name mspack_fmap = {
|
struct mspack_name mspack_fmap = {0};
|
||||||
.fmap = ctx->fmap,
|
struct mspack_system_ex ops_ex = {0};
|
||||||
.org = sfx_offset,
|
|
||||||
};
|
mspack_fmap.fmap = ctx->fmap;
|
||||||
struct mspack_system_ex ops_ex;
|
|
||||||
|
if (sfx_offset > INT32_MAX) {
|
||||||
|
cli_dbgmsg("%s() offset too large %zu\n", __func__, sfx_offset);
|
||||||
|
ret = CL_EFORMAT;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
mspack_fmap.org = (off_t)sfx_offset;
|
||||||
|
|
||||||
char *tmp_fname = NULL;
|
char *tmp_fname = NULL;
|
||||||
bool tempfile_exists = false;
|
bool tempfile_exists = false;
|
||||||
|
|
|
@ -10,7 +10,31 @@
|
||||||
#ifndef __LIBMSPACK_H__
|
#ifndef __LIBMSPACK_H__
|
||||||
#define __LIBMSPACK_H__
|
#define __LIBMSPACK_H__
|
||||||
|
|
||||||
int cli_scanmscab(cli_ctx *ctx, off_t sfx_offset);
|
/**
|
||||||
int cli_scanmschm(cli_ctx *ctx);
|
* @brief Check the CAB header for validity.
|
||||||
|
*
|
||||||
|
* @param fmap The fmap containing the CAB file.
|
||||||
|
* @param offset Offset of the start of a CAB file within the current fmap.
|
||||||
|
* @param size The size of the CAB file.
|
||||||
|
* @return cl_error_t
|
||||||
|
*/
|
||||||
|
cl_error_t cli_mscab_header_check(cli_ctx *ctx, size_t offset, size_t *size);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Open and extract a Microsoft CAB file, scanning each extracted file.
|
||||||
|
*
|
||||||
|
* @param ctx Scan context
|
||||||
|
* @param sfx_offset Offset of the start of a CAB file within the current fmap.
|
||||||
|
* @return cl_error_t CL_SUCCESS on success, or an error code on failure.
|
||||||
|
*/
|
||||||
|
cl_error_t cli_scanmscab(cli_ctx *ctx, size_t sfx_offset);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Open and extract a Microsoft CHM file, scanning each extracted file.
|
||||||
|
*
|
||||||
|
* @param ctx Scan context
|
||||||
|
* @return cl_error_t CL_SUCCESS on success, or an error code on failure.
|
||||||
|
*/
|
||||||
|
cl_error_t cli_scanmschm(cli_ctx *ctx);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -602,7 +602,10 @@ cl_error_t cli_check_fp(cli_ctx *ctx, const char *vname)
|
||||||
|
|
||||||
need_hash[CLI_HASH_SHA2_256] = cli_hm_have_size(ctx->engine->hm_fp, CLI_HASH_SHA2_256, map->len) ||
|
need_hash[CLI_HASH_SHA2_256] = cli_hm_have_size(ctx->engine->hm_fp, CLI_HASH_SHA2_256, map->len) ||
|
||||||
cli_hm_have_wild(ctx->engine->hm_fp, CLI_HASH_SHA2_256) ||
|
cli_hm_have_wild(ctx->engine->hm_fp, CLI_HASH_SHA2_256) ||
|
||||||
cli_hm_have_size(ctx->engine->hm_fp, CLI_HASH_SHA2_256, 1);
|
cli_hm_have_size(ctx->engine->hm_fp, CLI_HASH_SHA2_256, 1) ||
|
||||||
|
// If debug logging is enabled, we want to calculate SHA256 hashes for all layers.
|
||||||
|
// Some users rely on the debug log output to create new FP signatures.
|
||||||
|
cli_debug_flag;
|
||||||
|
|
||||||
/* Set fmap to need hash later if required.
|
/* Set fmap to need hash later if required.
|
||||||
* This is an optimization so we can calculate all needed hashes in one pass. */
|
* This is an optimization so we can calculate all needed hashes in one pass. */
|
||||||
|
@ -629,13 +632,14 @@ cl_error_t cli_check_fp(cli_ctx *ctx, const char *vname)
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (cli_debug_flag ||
|
||||||
|
((CLI_HASH_MD5 == hash_type) && (ctx->engine->cb_hash))) {
|
||||||
/* Convert hash to string */
|
/* Convert hash to string */
|
||||||
for (i = 0; i < hash_len; i++) {
|
for (i = 0; i < hash_len; i++) {
|
||||||
sprintf(hash_string + i * 2, "%02x", hash[i]);
|
sprintf(hash_string + i * 2, "%02x", hash[i]);
|
||||||
}
|
}
|
||||||
hash_string[hash_len * 2] = 0;
|
hash_string[hash_len * 2] = 0;
|
||||||
|
|
||||||
if (cli_debug_flag || ctx->engine->cb_hash) {
|
|
||||||
const char *name = ctx->recursion_stack[stack_index].fmap->name;
|
const char *name = ctx->recursion_stack[stack_index].fmap->name;
|
||||||
const char *type = cli_ftname(ctx->recursion_stack[stack_index].type);
|
const char *type = cli_ftname(ctx->recursion_stack[stack_index].type);
|
||||||
|
|
||||||
|
|
|
@ -360,7 +360,7 @@ cl_error_t cli_scan_fmap(cli_ctx *ctx, cli_file_t ftype, bool filetype_only, str
|
||||||
*/
|
*/
|
||||||
cl_error_t cli_exp_eval(cli_ctx *ctx, struct cli_matcher *root, struct cli_ac_data *acdata, struct cli_target_info *target_info);
|
cl_error_t cli_exp_eval(cli_ctx *ctx, struct cli_matcher *root, struct cli_ac_data *acdata, struct cli_target_info *target_info);
|
||||||
|
|
||||||
cl_error_t cli_caloff(const char *offstr, const struct cli_target_info *info, unsigned int target, uint32_t *offdata, uint32_t *offset_min, uint32_t *offset_max);
|
cl_error_t cli_caloff(const char *offstr, const struct cli_target_info *info, cli_target_t target, uint32_t *offdata, uint32_t *offset_min, uint32_t *offset_max);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Determine if an alert is a known false positive, using each fmap in the ctx->container stack to check MD5, SHA1, and SHA2-256 hashes.
|
* @brief Determine if an alert is a known false positive, using each fmap in the ctx->container stack to check MD5, SHA1, and SHA2-256 hashes.
|
||||||
|
|
|
@ -4658,10 +4658,7 @@ cl_error_t cli_peheader(fmap_t *map, struct cli_exe_info *peinfo, uint32_t opts,
|
||||||
pe_add_heuristic_property(ctx, "BadNumberOfSections");
|
pe_add_heuristic_property(ctx, "BadNumberOfSections");
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO Investigate how corrupted_input is set and whether this
|
if ((opts & CLI_PEHEADER_OPT_DBG_PRINT_INFO) && !ctx->corrupted_input) {
|
||||||
// check is needed
|
|
||||||
if (opts & CLI_PEHEADER_OPT_DBG_PRINT_INFO &&
|
|
||||||
!ctx->corrupted_input) {
|
|
||||||
if (peinfo->nsections == 0) {
|
if (peinfo->nsections == 0) {
|
||||||
cli_dbgmsg("cli_peheader: Invalid NumberOfSections (0)\n");
|
cli_dbgmsg("cli_peheader: Invalid NumberOfSections (0)\n");
|
||||||
}
|
}
|
||||||
|
|
|
@ -997,7 +997,7 @@ static cl_error_t cli_scanarj(cli_ctx *ctx)
|
||||||
do {
|
do {
|
||||||
metadata.filename = NULL;
|
metadata.filename = NULL;
|
||||||
|
|
||||||
ret = cli_unarj_prepare_file(dir, &metadata);
|
ret = cli_unarj_prepare_file(&metadata);
|
||||||
if (ret != CL_SUCCESS) {
|
if (ret != CL_SUCCESS) {
|
||||||
cli_dbgmsg("ARJ: cli_unarj_prepare_file Error: %s\n", cl_strerror(ret));
|
cli_dbgmsg("ARJ: cli_unarj_prepare_file Error: %s\n", cl_strerror(ret));
|
||||||
break;
|
break;
|
||||||
|
@ -3447,94 +3447,6 @@ static cl_error_t cli_scan_structured(cli_ctx *ctx)
|
||||||
return CL_SUCCESS;
|
return CL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
static cl_error_t cli_scanembpe(cli_ctx *ctx, off_t offset)
|
|
||||||
{
|
|
||||||
cl_error_t ret = CL_SUCCESS;
|
|
||||||
int fd;
|
|
||||||
size_t bytes;
|
|
||||||
size_t size = 0;
|
|
||||||
size_t todo;
|
|
||||||
const char *buff;
|
|
||||||
char *tmpname;
|
|
||||||
fmap_t *map = ctx->fmap;
|
|
||||||
unsigned int corrupted_input;
|
|
||||||
|
|
||||||
tmpname = cli_gentemp_with_prefix(ctx->this_layer_tmpdir, "embedded-pe");
|
|
||||||
if (!tmpname)
|
|
||||||
return CL_EMEM;
|
|
||||||
|
|
||||||
if ((fd = open(tmpname, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR)) < 0) {
|
|
||||||
cli_errmsg("cli_scanembpe: Can't create file %s\n", tmpname);
|
|
||||||
free(tmpname);
|
|
||||||
return CL_ECREAT;
|
|
||||||
}
|
|
||||||
|
|
||||||
todo = map->len - offset;
|
|
||||||
while (1) {
|
|
||||||
bytes = MIN(todo, map->pgsz);
|
|
||||||
if (!bytes)
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (!(buff = fmap_need_off_once(map, offset + size, bytes))) {
|
|
||||||
close(fd);
|
|
||||||
if (!ctx->engine->keeptmp) {
|
|
||||||
if (cli_unlink(tmpname)) {
|
|
||||||
free(tmpname);
|
|
||||||
return CL_EUNLINK;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
free(tmpname);
|
|
||||||
return CL_EREAD;
|
|
||||||
}
|
|
||||||
size += bytes;
|
|
||||||
todo -= bytes;
|
|
||||||
|
|
||||||
if (cli_checklimits("cli_scanembpe", ctx, size, 0, 0) != CL_SUCCESS)
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (cli_writen(fd, buff, bytes) != bytes) {
|
|
||||||
cli_dbgmsg("cli_scanembpe: Can't write to temporary file\n");
|
|
||||||
close(fd);
|
|
||||||
if (!ctx->engine->keeptmp) {
|
|
||||||
if (cli_unlink(tmpname)) {
|
|
||||||
free(tmpname);
|
|
||||||
return CL_EUNLINK;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
free(tmpname);
|
|
||||||
return CL_EWRITE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Setting ctx->corrupted_input will prevent the PE parser from reporting "broken executable" for unpacked/reconstructed files that may not be 100% to spec.
|
|
||||||
corrupted_input = ctx->corrupted_input;
|
|
||||||
ctx->corrupted_input = 1;
|
|
||||||
ret = cli_magic_scan_desc(fd, tmpname, ctx, NULL, LAYER_ATTRIBUTES_NONE);
|
|
||||||
ctx->corrupted_input = corrupted_input;
|
|
||||||
if (ret != CL_SUCCESS) {
|
|
||||||
close(fd);
|
|
||||||
if (!ctx->engine->keeptmp) {
|
|
||||||
if (cli_unlink(tmpname)) {
|
|
||||||
free(tmpname);
|
|
||||||
return CL_EUNLINK;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
free(tmpname);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
close(fd);
|
|
||||||
if (!ctx->engine->keeptmp) {
|
|
||||||
if (cli_unlink(tmpname)) {
|
|
||||||
free(tmpname);
|
|
||||||
return CL_EUNLINK;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
free(tmpname);
|
|
||||||
|
|
||||||
return CL_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if defined(_WIN32) || defined(C_LINUX) || defined(C_DARWIN)
|
#if defined(_WIN32) || defined(C_LINUX) || defined(C_DARWIN)
|
||||||
#define PERF_MEASURE
|
#define PERF_MEASURE
|
||||||
#endif
|
#endif
|
||||||
|
@ -3720,26 +3632,33 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi
|
||||||
cli_file_t found_type;
|
cli_file_t found_type;
|
||||||
|
|
||||||
if ((typercg) &&
|
if ((typercg) &&
|
||||||
// We should also omit bzips, but DMG's may be detected in bzips. (type != CL_TYPE_BZ) && /* Omit BZ files because they can contain portions of original files like zip file entries that cause invalid extractions and lots of warnings. Decompress first, then scan! */
|
// Omit embedded files or file types already identified via this process.
|
||||||
(type != CL_TYPE_GZ) && /* Omit GZ files because they can contain portions of original files like zip file entries that cause invalid extractions and lots of warnings. Decompress first, then scan! */
|
(!(ctx->recursion_stack[ctx->recursion_level].attributes & LAYER_ATTRIBUTES_EMBEDDED)) &&
|
||||||
(type != CL_TYPE_CPIO_OLD) && /* Omit CPIO_OLD files because it's an image format that we can extract and scan manually. */
|
// Omit GZ files because they can contain portions of original files like zip file entries that cause invalid extractions and lots of warnings. Decompress first, then scan!
|
||||||
(type != CL_TYPE_ZIP) && /* Omit ZIP files because it'll detect each zip file entry as SFXZIP, which is a waste. We'll extract it and then scan. */
|
(type != CL_TYPE_GZ) &&
|
||||||
(type != CL_TYPE_ZIPSFX) && /* Omit SFX archive types from being checked for embedded content. They should only be parsed for contained files. Those contained files could be EXE's with more SFX, but that's the nature of containers. */
|
// We should also omit bzips, but DMG's may be detected in bzips.
|
||||||
(type != CL_TYPE_ARJSFX) && /* " */
|
//(type != CL_TYPE_BZ) &&
|
||||||
(type != CL_TYPE_RARSFX) && /* " */
|
// Omit CPIO_OLD files because it's an image format that we can extract and scan manually.
|
||||||
(type != CL_TYPE_EGGSFX) && /* " */
|
(type != CL_TYPE_CPIO_OLD) &&
|
||||||
(type != CL_TYPE_CABSFX) && /* " */
|
// Omit ZIP files because it'll detect each zip file entry as SFXZIP, which is a waste. We'll extract it and then scan.
|
||||||
(type != CL_TYPE_7ZSFX) && /* " */
|
(type != CL_TYPE_ZIP) &&
|
||||||
(type != CL_TYPE_OOXML_WORD) && /* Omit OOXML because they are ZIP-based and file-type scanning will double-extract their contents. */
|
// Omit OOXML because they are ZIP-based and file-type scanning will double-extract their contents.
|
||||||
(type != CL_TYPE_OOXML_PPT) && /* " */
|
(type != CL_TYPE_OOXML_WORD) &&
|
||||||
(type != CL_TYPE_OOXML_XL) && /* " */
|
(type != CL_TYPE_OOXML_PPT) &&
|
||||||
(type != CL_TYPE_OOXML_HWP) && /* " */
|
(type != CL_TYPE_OOXML_XL) &&
|
||||||
(type != CL_TYPE_OLD_TAR) && /* Omit OLD TAR files because it's a raw archive format that we can extract and scan manually. */
|
(type != CL_TYPE_OOXML_HWP) &&
|
||||||
(type != CL_TYPE_POSIX_TAR)) { /* Omit POSIX TAR files because it's a raw archive format that we can extract and scan manually. */
|
// Omit OLD TAR files because it's a raw archive format that we can extract and scan manually.
|
||||||
|
(type != CL_TYPE_OLD_TAR) &&
|
||||||
|
// Omit POSIX TAR files because it's a raw archive format that we can extract and scan manually.
|
||||||
|
(type != CL_TYPE_POSIX_TAR)) {
|
||||||
/*
|
/*
|
||||||
* Enable file type recognition scan mode if requested, except for some problematic types (above).
|
* Enable file type recognition scan mode if requested, except for some problematic types (above).
|
||||||
*/
|
*/
|
||||||
acmode |= AC_SCAN_FT;
|
acmode |= AC_SCAN_FT;
|
||||||
|
} else {
|
||||||
|
cli_dbgmsg("scanraw: embedded type recognition disabled or not applicable for type %s %s\n",
|
||||||
|
cli_ftname(type),
|
||||||
|
(ctx->recursion_stack[ctx->recursion_level].attributes & LAYER_ATTRIBUTES_EMBEDDED) ? "(embedded layer)" : "");
|
||||||
}
|
}
|
||||||
|
|
||||||
perf_start(ctx, PERFT_RAW);
|
perf_start(ctx, PERFT_RAW);
|
||||||
|
@ -3960,8 +3879,9 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi
|
||||||
* This restriction will prevent detecting the same embedded content more than once when recursing with
|
* This restriction will prevent detecting the same embedded content more than once when recursing with
|
||||||
* embedded file type recognition deeper within the same buffer.
|
* embedded file type recognition deeper within the same buffer.
|
||||||
*
|
*
|
||||||
* This is necessary because we have no way of knowing the length of a file and cannot prevent a search
|
* This is necessary because we have no way of knowing the length of a file for many formats and cannot
|
||||||
* for embedded files from finding the same embedded content multiple times (like a LOT of times).
|
* prevent a search for embedded files from finding the same embedded content multiple times (like a LOT
|
||||||
|
* of times).
|
||||||
*
|
*
|
||||||
* E.g. if the file is like this:
|
* E.g. if the file is like this:
|
||||||
*
|
*
|
||||||
|
@ -3990,7 +3910,7 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi
|
||||||
* Decompressed: [ data ] [ embedded file ]
|
* Decompressed: [ data ] [ embedded file ]
|
||||||
*
|
*
|
||||||
* So if this happened... then we WOULD want to scan the decompressed file for embedded files.
|
* So if this happened... then we WOULD want to scan the decompressed file for embedded files.
|
||||||
* The problem is, we have way of knowing how long embedded files are.
|
* The problem is, we have no way of knowing how long embedded files are.
|
||||||
* We don't know if we have:
|
* We don't know if we have:
|
||||||
*
|
*
|
||||||
* A. [ data ] [ embedded file ] [ data ] [ embedded file ]
|
* A. [ data ] [ embedded file ] [ data ] [ embedded file ]
|
||||||
|
@ -4035,7 +3955,9 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi
|
||||||
|
|
||||||
switch (fpt->type) {
|
switch (fpt->type) {
|
||||||
case CL_TYPE_RARSFX:
|
case CL_TYPE_RARSFX:
|
||||||
if (type != CL_TYPE_RAR) {
|
if ((have_rar && SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_RAR)) &&
|
||||||
|
(type != CL_TYPE_RAR)) {
|
||||||
|
// TODO: Add header validity check to prevent false positives from being scanned.
|
||||||
nret = cli_magic_scan_nested_fmap_type(
|
nret = cli_magic_scan_nested_fmap_type(
|
||||||
ctx->fmap,
|
ctx->fmap,
|
||||||
fpt->offset,
|
fpt->offset,
|
||||||
|
@ -4048,7 +3970,9 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case CL_TYPE_EGGSFX:
|
case CL_TYPE_EGGSFX:
|
||||||
if (type != CL_TYPE_EGG) {
|
if ((SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_EGG)) &&
|
||||||
|
(type != CL_TYPE_EGG)) {
|
||||||
|
// TODO: Add header validity check to prevent false positives from being scanned.
|
||||||
nret = cli_magic_scan_nested_fmap_type(
|
nret = cli_magic_scan_nested_fmap_type(
|
||||||
ctx->fmap,
|
ctx->fmap,
|
||||||
fpt->offset,
|
fpt->offset,
|
||||||
|
@ -4061,11 +3985,26 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case CL_TYPE_ZIPSFX:
|
case CL_TYPE_ZIPSFX:
|
||||||
if (type != CL_TYPE_ZIP) {
|
if ((SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_ZIP)) &&
|
||||||
|
(type != CL_TYPE_ZIP) &&
|
||||||
|
/* OOXML are ZIP-based. */
|
||||||
|
(type != CL_TYPE_OOXML_WORD) &&
|
||||||
|
(type != CL_TYPE_OOXML_PPT) &&
|
||||||
|
(type != CL_TYPE_OOXML_XL) &&
|
||||||
|
(type != CL_TYPE_OOXML_HWP)) {
|
||||||
|
// Header validity check to prevent false positives from being scanned.
|
||||||
|
size_t zip_size = 0;
|
||||||
|
|
||||||
|
ret = cli_unzip_single_header_check(ctx, fpt->offset, &zip_size);
|
||||||
|
if (ret != CL_SUCCESS) {
|
||||||
|
cli_dbgmsg("ZIP single header check failed: %s (%d)\n", cl_strerror(ret), ret);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
nret = cli_magic_scan_nested_fmap_type(
|
nret = cli_magic_scan_nested_fmap_type(
|
||||||
ctx->fmap,
|
ctx->fmap,
|
||||||
fpt->offset,
|
fpt->offset,
|
||||||
ctx->fmap->len - fpt->offset,
|
zip_size,
|
||||||
ctx,
|
ctx,
|
||||||
CL_TYPE_ZIP,
|
CL_TYPE_ZIP,
|
||||||
NULL,
|
NULL,
|
||||||
|
@ -4074,11 +4013,20 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case CL_TYPE_CABSFX:
|
case CL_TYPE_CABSFX:
|
||||||
if (type != CL_TYPE_MSCAB) {
|
if ((SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_CAB)) &&
|
||||||
|
(type != CL_TYPE_MSCAB)) {
|
||||||
|
// Header validity check to prevent false positives from being scanned.
|
||||||
|
size_t cab_size = 0;
|
||||||
|
ret = cli_mscab_header_check(ctx, fpt->offset, &cab_size);
|
||||||
|
if (ret != CL_SUCCESS) {
|
||||||
|
cli_dbgmsg("CAB header check failed: %s (%d)\n", cl_strerror(ret), ret);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
nret = cli_magic_scan_nested_fmap_type(
|
nret = cli_magic_scan_nested_fmap_type(
|
||||||
ctx->fmap,
|
ctx->fmap,
|
||||||
fpt->offset,
|
fpt->offset,
|
||||||
ctx->fmap->len - fpt->offset,
|
cab_size,
|
||||||
ctx,
|
ctx,
|
||||||
CL_TYPE_MSCAB,
|
CL_TYPE_MSCAB,
|
||||||
NULL,
|
NULL,
|
||||||
|
@ -4087,11 +4035,21 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case CL_TYPE_ARJSFX:
|
case CL_TYPE_ARJSFX:
|
||||||
if (type != CL_TYPE_ARJ) {
|
if ((SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_ARJ)) &&
|
||||||
|
(type != CL_TYPE_ARJ)) {
|
||||||
|
// Header validity check to prevent false positives from being scanned.
|
||||||
|
size_t arj_size = 0;
|
||||||
|
|
||||||
|
ret = cli_unarj_header_check(ctx, fpt->offset, &arj_size);
|
||||||
|
if (ret != CL_SUCCESS) {
|
||||||
|
cli_dbgmsg("ARJ header check failed: %s (%d)\n", cl_strerror(ret), ret);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
nret = cli_magic_scan_nested_fmap_type(
|
nret = cli_magic_scan_nested_fmap_type(
|
||||||
ctx->fmap,
|
ctx->fmap,
|
||||||
fpt->offset,
|
fpt->offset,
|
||||||
ctx->fmap->len - fpt->offset,
|
arj_size,
|
||||||
ctx,
|
ctx,
|
||||||
CL_TYPE_ARJ,
|
CL_TYPE_ARJ,
|
||||||
NULL,
|
NULL,
|
||||||
|
@ -4100,7 +4058,9 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case CL_TYPE_7ZSFX:
|
case CL_TYPE_7ZSFX:
|
||||||
if (type != CL_TYPE_7Z) {
|
if ((SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_7Z)) &&
|
||||||
|
(type != CL_TYPE_7Z)) {
|
||||||
|
// TODO: Add header validity check to prevent false positives from being scanned.
|
||||||
nret = cli_magic_scan_nested_fmap_type(
|
nret = cli_magic_scan_nested_fmap_type(
|
||||||
ctx->fmap,
|
ctx->fmap,
|
||||||
fpt->offset,
|
fpt->offset,
|
||||||
|
@ -4113,8 +4073,10 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case CL_TYPE_NULSFT:
|
case CL_TYPE_NULSFT:
|
||||||
if (type == CL_TYPE_MSEXE && fpt->offset > 4) {
|
|
||||||
// Note: CL_TYPE_NULSFT is special, because the file actually starts 4 bytes before the start of the signature match
|
// Note: CL_TYPE_NULSFT is special, because the file actually starts 4 bytes before the start of the signature match
|
||||||
|
if ((SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_NSIS)) &&
|
||||||
|
(type == CL_TYPE_MSEXE && fpt->offset > 4)) {
|
||||||
|
// TODO: Add header validity check to prevent false positives from being scanned.
|
||||||
nret = cli_magic_scan_nested_fmap_type(
|
nret = cli_magic_scan_nested_fmap_type(
|
||||||
ctx->fmap,
|
ctx->fmap,
|
||||||
fpt->offset - 4,
|
fpt->offset - 4,
|
||||||
|
@ -4127,7 +4089,9 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case CL_TYPE_AUTOIT:
|
case CL_TYPE_AUTOIT:
|
||||||
if (type == CL_TYPE_MSEXE) {
|
if ((SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_AUTOIT)) &&
|
||||||
|
(type == CL_TYPE_MSEXE)) {
|
||||||
|
// TODO: Add header validity check to prevent false positives from being scanned.
|
||||||
nret = cli_magic_scan_nested_fmap_type(
|
nret = cli_magic_scan_nested_fmap_type(
|
||||||
ctx->fmap,
|
ctx->fmap,
|
||||||
fpt->offset,
|
fpt->offset,
|
||||||
|
@ -4140,7 +4104,9 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case CL_TYPE_ISHIELD_MSI:
|
case CL_TYPE_ISHIELD_MSI:
|
||||||
if (type == CL_TYPE_MSEXE) {
|
if ((SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_ISHIELD)) &&
|
||||||
|
(type == CL_TYPE_MSEXE)) {
|
||||||
|
// TODO: Add header validity check to prevent false positives from being scanned.
|
||||||
nret = cli_magic_scan_nested_fmap_type(
|
nret = cli_magic_scan_nested_fmap_type(
|
||||||
ctx->fmap,
|
ctx->fmap,
|
||||||
fpt->offset,
|
fpt->offset,
|
||||||
|
@ -4153,7 +4119,9 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case CL_TYPE_PDF:
|
case CL_TYPE_PDF:
|
||||||
if (type != CL_TYPE_PDF) {
|
if ((SCAN_PARSE_PDF && (DCONF_DOC & DOC_CONF_PDF)) &&
|
||||||
|
(type != CL_TYPE_PDF)) {
|
||||||
|
// TODO: Add header validity check to prevent false positives from being scanned.
|
||||||
nret = cli_magic_scan_nested_fmap_type(
|
nret = cli_magic_scan_nested_fmap_type(
|
||||||
ctx->fmap,
|
ctx->fmap,
|
||||||
fpt->offset,
|
fpt->offset,
|
||||||
|
@ -4166,23 +4134,48 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case CL_TYPE_MSEXE:
|
case CL_TYPE_MSEXE:
|
||||||
if (type == CL_TYPE_MSEXE || type == CL_TYPE_ZIP || type == CL_TYPE_MSOLE2) {
|
if (SCAN_PARSE_PE && ctx->dconf->pe &&
|
||||||
|
(type == CL_TYPE_MSEXE || type == CL_TYPE_ZIP || type == CL_TYPE_MSOLE2)) {
|
||||||
cli_dbgmsg("*** Detected embedded PE file at %u ***\n", (unsigned int)fpt->offset);
|
struct cli_exe_info peinfo;
|
||||||
|
|
||||||
if ((uint64_t)(ctx->fmap->len - fpt->offset) > ctx->engine->maxembeddedpe) {
|
if ((uint64_t)(ctx->fmap->len - fpt->offset) > ctx->engine->maxembeddedpe) {
|
||||||
cli_dbgmsg("scanraw: MaxEmbeddedPE exceeded\n");
|
cli_dbgmsg("scanraw: MaxEmbeddedPE exceeded\n");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cli_exe_info_init(&peinfo, 0);
|
||||||
|
|
||||||
|
// Header validity check to prevent false positives from being scanned.
|
||||||
|
ret = cli_peheader(ctx->fmap, &peinfo, CLI_PEHEADER_OPT_NONE, NULL);
|
||||||
|
|
||||||
|
// peinfo memory may have been allocated and must be freed even if it failed.
|
||||||
|
cli_exe_info_destroy(&peinfo);
|
||||||
|
|
||||||
|
if (CL_SUCCESS != ret) {
|
||||||
|
cli_dbgmsg("Header check for MSEXE detection failed, probably not actually an embedded PE file.\n");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
cli_dbgmsg("*** Detected embedded PE file at %u ***\n", (unsigned int)fpt->offset);
|
||||||
|
|
||||||
|
// Setting ctx->corrupted_input will prevent the PE parser from reporting "broken executable" for unpacked/reconstructed files that may not be 100% to spec.
|
||||||
|
// In here we're just carrying the corrupted_input flag from parent to child, in case the parent's flag was set.
|
||||||
|
unsigned int corrupted_input = ctx->corrupted_input;
|
||||||
|
|
||||||
|
ctx->corrupted_input = 1;
|
||||||
|
|
||||||
nret = cli_magic_scan_nested_fmap_type(
|
nret = cli_magic_scan_nested_fmap_type(
|
||||||
ctx->fmap,
|
ctx->fmap,
|
||||||
fpt->offset,
|
fpt->offset,
|
||||||
|
// Sadly, there is no way from the PE header to determine the length of the PE file.
|
||||||
|
// So we just pass the remaining length of the fmap.
|
||||||
ctx->fmap->len - fpt->offset,
|
ctx->fmap->len - fpt->offset,
|
||||||
ctx,
|
ctx,
|
||||||
CL_TYPE_MSEXE,
|
CL_TYPE_MSEXE,
|
||||||
NULL,
|
NULL,
|
||||||
LAYER_ATTRIBUTES_EMBEDDED);
|
LAYER_ATTRIBUTES_EMBEDDED);
|
||||||
|
|
||||||
|
ctx->corrupted_input = corrupted_input;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -4789,6 +4782,8 @@ cl_error_t cli_magic_scan(cli_ctx *ctx, cli_file_t type)
|
||||||
* If self protection mechanism enabled, do the scanraw() scan first
|
* If self protection mechanism enabled, do the scanraw() scan first
|
||||||
* before extracting with a file type parser.
|
* before extracting with a file type parser.
|
||||||
*/
|
*/
|
||||||
|
cli_dbgmsg("cli_magic_scan: Performing raw scan to pattern match\n");
|
||||||
|
|
||||||
ret = scanraw(ctx, type, 0, &dettype);
|
ret = scanraw(ctx, type, 0, &dettype);
|
||||||
|
|
||||||
// Evaluate the result from the scan to see if it end the scan of this layer early,
|
// Evaluate the result from the scan to see if it end the scan of this layer early,
|
||||||
|
@ -5252,6 +5247,8 @@ cl_error_t cli_magic_scan(cli_ctx *ctx, cli_file_t type)
|
||||||
(type != CL_TYPE_HTML || !(SCAN_PARSE_HTML) || !(DCONF_DOC & DOC_CONF_HTML_SKIPRAW)) &&
|
(type != CL_TYPE_HTML || !(SCAN_PARSE_HTML) || !(DCONF_DOC & DOC_CONF_HTML_SKIPRAW)) &&
|
||||||
(!ctx->engine->sdb)) {
|
(!ctx->engine->sdb)) {
|
||||||
|
|
||||||
|
cli_dbgmsg("cli_magic_scan: Performing raw scan to pattern match and/or detect embedded files\n");
|
||||||
|
|
||||||
ret = scanraw(ctx, type, typercg, &dettype);
|
ret = scanraw(ctx, type, typercg, &dettype);
|
||||||
|
|
||||||
// Evaluate the result from the scan to see if it end the scan of this layer early,
|
// Evaluate the result from the scan to see if it end the scan of this layer early,
|
||||||
|
@ -5290,57 +5287,12 @@ cl_error_t cli_magic_scan(cli_ctx *ctx, cli_file_t type)
|
||||||
case CL_TYPE_MSEXE:
|
case CL_TYPE_MSEXE:
|
||||||
perf_nested_start(ctx, PERFT_PE, PERFT_SCAN);
|
perf_nested_start(ctx, PERFT_PE, PERFT_SCAN);
|
||||||
if (SCAN_PARSE_PE && ctx->dconf->pe) {
|
if (SCAN_PARSE_PE && ctx->dconf->pe) {
|
||||||
if (ctx->recursion_stack[ctx->recursion_level].attributes & LAYER_ATTRIBUTES_EMBEDDED) {
|
|
||||||
/*
|
|
||||||
* Embedded PE files are PE files that were found within another file using file-type scanning in scanraw()
|
|
||||||
* They are parsed differently than normal PE files.
|
|
||||||
*/
|
|
||||||
struct cli_exe_info peinfo;
|
|
||||||
|
|
||||||
cli_exe_info_init(&peinfo, 0);
|
|
||||||
|
|
||||||
// TODO We could probably substitute in a quicker
|
|
||||||
// method of determining whether a PE file exists
|
|
||||||
// at this offset.
|
|
||||||
if (cli_peheader(ctx->fmap, &peinfo, CLI_PEHEADER_OPT_NONE, NULL) != 0) {
|
|
||||||
cli_dbgmsg("Header check for MSEXE detection failed, probably not actually an embedded PE file.\n");
|
|
||||||
|
|
||||||
/* Despite failing, peinfo memory may have been allocated and must be freed. */
|
|
||||||
cli_exe_info_destroy(&peinfo);
|
|
||||||
|
|
||||||
} else {
|
|
||||||
/* Immediately free up peinfo allocated memory, prior to any recursion */
|
|
||||||
cli_exe_info_destroy(&peinfo);
|
|
||||||
|
|
||||||
ret = cli_scanembpe(ctx, 0);
|
|
||||||
|
|
||||||
// TODO This method of embedded PE extraction
|
|
||||||
// is kinda gross in that:
|
|
||||||
// - if you have an executable that contains
|
|
||||||
// 20 other exes, the bytes associated with
|
|
||||||
// the last exe will have been included in
|
|
||||||
// hash computations and things 20 times
|
|
||||||
// (as overlay data to the previously
|
|
||||||
// extracted exes).
|
|
||||||
// - if you have a signed embedded exe, it
|
|
||||||
// will fail to validate after extraction
|
|
||||||
// bc it has overlay data, which is a
|
|
||||||
// violation of the Authenticode spec.
|
|
||||||
// - this method of extraction is subject to
|
|
||||||
// the recursion limit, which is fairly low.
|
|
||||||
//
|
|
||||||
// It'd be awesome if we could compute the PE
|
|
||||||
// size from the PE header and just extract
|
|
||||||
// that.
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Setting ctx->corrupted_input will prevent the PE parser from reporting "broken executable" for unpacked/reconstructed files that may not be 100% to spec.
|
// Setting ctx->corrupted_input will prevent the PE parser from reporting "broken executable" for unpacked/reconstructed files that may not be 100% to spec.
|
||||||
// In here we're just carrying the corrupted_input flag from parent to child, in case the parent's flag was set.
|
// In here we're just carrying the corrupted_input flag from parent to child, in case the parent's flag was set.
|
||||||
unsigned int corrupted_input = ctx->corrupted_input;
|
unsigned int corrupted_input = ctx->corrupted_input;
|
||||||
ret = cli_scanpe(ctx);
|
ret = cli_scanpe(ctx);
|
||||||
ctx->corrupted_input = corrupted_input;
|
ctx->corrupted_input = corrupted_input;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
perf_nested_stop(ctx, PERFT_PE, PERFT_SCAN);
|
perf_nested_stop(ctx, PERFT_PE, PERFT_SCAN);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -5364,8 +5316,9 @@ cl_error_t cli_magic_scan(cli_ctx *ctx, cli_file_t type)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case CL_TYPE_PDF: /* FIXMELIMITS: pdf should be an archive! */
|
case CL_TYPE_PDF: /* FIXMELIMITS: pdf should be an archive! */
|
||||||
if (SCAN_PARSE_PDF && (DCONF_DOC & DOC_CONF_PDF))
|
if (SCAN_PARSE_PDF && (DCONF_DOC & DOC_CONF_PDF)) {
|
||||||
ret = cli_scanpdf(ctx, 0);
|
ret = cli_scanpdf(ctx, 0);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -56,10 +56,6 @@
|
||||||
#define CHAR_BIT (8)
|
#define CHAR_BIT (8)
|
||||||
#endif
|
#endif
|
||||||
#define MAXMATCH 256
|
#define MAXMATCH 256
|
||||||
#ifndef FALSE
|
|
||||||
#define FALSE (0)
|
|
||||||
#define TRUE (1)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define CODE_BIT 16
|
#define CODE_BIT 16
|
||||||
#define NT (CODE_BIT + 3)
|
#define NT (CODE_BIT + 3)
|
||||||
|
@ -814,23 +810,25 @@ static cl_error_t arj_unstore(arj_metadata_t *metadata, int ofd, uint32_t len)
|
||||||
return CL_SUCCESS;
|
return CL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int is_arj_archive(arj_metadata_t *metadata)
|
static bool is_arj_archive(arj_metadata_t *metadata)
|
||||||
{
|
{
|
||||||
const char header_id[2] = {0x60, 0xea};
|
const char header_id[2] = {0x60, 0xea};
|
||||||
const char *mark;
|
const char *mark;
|
||||||
|
|
||||||
mark = fmap_need_off_once(metadata->map, metadata->offset, 2);
|
mark = fmap_need_off_once(metadata->map, metadata->offset, 2);
|
||||||
if (!mark)
|
if (!mark) {
|
||||||
return FALSE;
|
cli_dbgmsg("is_arj_archive: Failed to read the two-byte ARJ header ID at offset %zu\n", metadata->offset);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
metadata->offset += 2;
|
metadata->offset += 2;
|
||||||
if (memcmp(&mark[0], &header_id[0], 2) == 0) {
|
if (memcmp(&mark[0], &header_id[0], 2) == 0) {
|
||||||
return TRUE;
|
return true;
|
||||||
}
|
}
|
||||||
cli_dbgmsg("Not an ARJ archive\n");
|
cli_dbgmsg("is_arj_archive: The two-byte ARJ header ID did not match; This is not an ARJ archive\n");
|
||||||
return FALSE;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int arj_read_main_header(arj_metadata_t *metadata)
|
static bool arj_read_main_header(arj_metadata_t *metadata)
|
||||||
{
|
{
|
||||||
uint16_t header_size, count;
|
uint16_t header_size, count;
|
||||||
arj_main_hdr_t main_hdr;
|
arj_main_hdr_t main_hdr;
|
||||||
|
@ -839,7 +837,7 @@ static int arj_read_main_header(arj_metadata_t *metadata)
|
||||||
struct text_norm_state fnstate, comstate;
|
struct text_norm_state fnstate, comstate;
|
||||||
unsigned char *fnnorm = NULL;
|
unsigned char *fnnorm = NULL;
|
||||||
unsigned char *comnorm = NULL;
|
unsigned char *comnorm = NULL;
|
||||||
uint32_t ret = TRUE;
|
bool ret = true;
|
||||||
|
|
||||||
size_t filename_max_len = 0;
|
size_t filename_max_len = 0;
|
||||||
size_t filename_len = 0;
|
size_t filename_len = 0;
|
||||||
|
@ -848,28 +846,28 @@ static int arj_read_main_header(arj_metadata_t *metadata)
|
||||||
size_t orig_offset = metadata->offset;
|
size_t orig_offset = metadata->offset;
|
||||||
|
|
||||||
if (fmap_readn(metadata->map, &header_size, metadata->offset, 2) != 2)
|
if (fmap_readn(metadata->map, &header_size, metadata->offset, 2) != 2)
|
||||||
return FALSE;
|
return false;
|
||||||
|
|
||||||
metadata->offset += 2;
|
metadata->offset += 2;
|
||||||
header_size = le16_to_host(header_size);
|
header_size = le16_to_host(header_size);
|
||||||
cli_dbgmsg("Header Size: %d\n", header_size);
|
cli_dbgmsg("Header Size: %d\n", header_size);
|
||||||
if (header_size == 0) {
|
if (header_size == 0) {
|
||||||
/* End of archive */
|
/* End of archive */
|
||||||
ret = FALSE;
|
ret = false;
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
if (header_size > HEADERSIZE_MAX) {
|
if (header_size > HEADERSIZE_MAX) {
|
||||||
cli_dbgmsg("arj_read_header: invalid header_size: %u\n", header_size);
|
cli_dbgmsg("arj_read_header: invalid header_size: %u\n", header_size);
|
||||||
ret = FALSE;
|
ret = false;
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
if ((header_size + sizeof(header_size)) > (metadata->map->len - metadata->offset)) {
|
if ((header_size + sizeof(header_size)) > (metadata->map->len - metadata->offset)) {
|
||||||
cli_dbgmsg("arj_read_header: invalid header_size: %u, exceeds length of file.\n", header_size);
|
cli_dbgmsg("arj_read_header: invalid header_size: %u, exceeds length of file.\n", header_size);
|
||||||
ret = FALSE;
|
ret = false;
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
if (fmap_readn(metadata->map, &main_hdr, metadata->offset, 30) != 30) {
|
if (fmap_readn(metadata->map, &main_hdr, metadata->offset, 30) != 30) {
|
||||||
ret = FALSE;
|
ret = false;
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
metadata->offset += 30;
|
metadata->offset += 30;
|
||||||
|
@ -885,7 +883,7 @@ static int arj_read_main_header(arj_metadata_t *metadata)
|
||||||
|
|
||||||
if (main_hdr.first_hdr_size < 30) {
|
if (main_hdr.first_hdr_size < 30) {
|
||||||
cli_dbgmsg("Format error. First Header Size < 30\n");
|
cli_dbgmsg("Format error. First Header Size < 30\n");
|
||||||
ret = FALSE;
|
ret = false;
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
if (main_hdr.first_hdr_size > 30) {
|
if (main_hdr.first_hdr_size > 30) {
|
||||||
|
@ -895,7 +893,7 @@ static int arj_read_main_header(arj_metadata_t *metadata)
|
||||||
filename_max_len = (header_size + sizeof(header_size)) - (metadata->offset - orig_offset);
|
filename_max_len = (header_size + sizeof(header_size)) - (metadata->offset - orig_offset);
|
||||||
if (filename_max_len > header_size) {
|
if (filename_max_len > header_size) {
|
||||||
cli_dbgmsg("UNARJ: Format error. First Header Size invalid\n");
|
cli_dbgmsg("UNARJ: Format error. First Header Size invalid\n");
|
||||||
ret = FALSE;
|
ret = false;
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
if (filename_max_len > 0) {
|
if (filename_max_len > 0) {
|
||||||
|
@ -903,7 +901,7 @@ static int arj_read_main_header(arj_metadata_t *metadata)
|
||||||
filename = fmap_need_offstr(metadata->map, metadata->offset, filename_max_len + 1);
|
filename = fmap_need_offstr(metadata->map, metadata->offset, filename_max_len + 1);
|
||||||
if (!filename || !fnnorm) {
|
if (!filename || !fnnorm) {
|
||||||
cli_dbgmsg("UNARJ: Unable to allocate memory for filename\n");
|
cli_dbgmsg("UNARJ: Unable to allocate memory for filename\n");
|
||||||
ret = FALSE;
|
ret = false;
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
filename_len = CLI_STRNLEN(filename, filename_max_len);
|
filename_len = CLI_STRNLEN(filename, filename_max_len);
|
||||||
|
@ -913,7 +911,7 @@ static int arj_read_main_header(arj_metadata_t *metadata)
|
||||||
comment_max_len = (header_size + sizeof(header_size)) - (metadata->offset - orig_offset);
|
comment_max_len = (header_size + sizeof(header_size)) - (metadata->offset - orig_offset);
|
||||||
if (comment_max_len > header_size) {
|
if (comment_max_len > header_size) {
|
||||||
cli_dbgmsg("UNARJ: Format error. First Header Size invalid\n");
|
cli_dbgmsg("UNARJ: Format error. First Header Size invalid\n");
|
||||||
ret = FALSE;
|
ret = false;
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
if (comment_max_len > 0) {
|
if (comment_max_len > 0) {
|
||||||
|
@ -921,7 +919,7 @@ static int arj_read_main_header(arj_metadata_t *metadata)
|
||||||
comment = fmap_need_offstr(metadata->map, metadata->offset, comment_max_len + 1);
|
comment = fmap_need_offstr(metadata->map, metadata->offset, comment_max_len + 1);
|
||||||
if (!comment || !comnorm) {
|
if (!comment || !comnorm) {
|
||||||
cli_dbgmsg("UNARJ: Unable to allocate memory for comment\n");
|
cli_dbgmsg("UNARJ: Unable to allocate memory for comment\n");
|
||||||
ret = FALSE;
|
ret = false;
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
comment_len = CLI_STRNLEN(comment, comment_max_len);
|
comment_len = CLI_STRNLEN(comment, comment_max_len);
|
||||||
|
@ -942,7 +940,7 @@ static int arj_read_main_header(arj_metadata_t *metadata)
|
||||||
for (;;) {
|
for (;;) {
|
||||||
const uint16_t *countp = fmap_need_off_once(metadata->map, metadata->offset, 2);
|
const uint16_t *countp = fmap_need_off_once(metadata->map, metadata->offset, 2);
|
||||||
if (!countp) {
|
if (!countp) {
|
||||||
ret = FALSE;
|
ret = false;
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
count = cli_readint16(countp);
|
count = cli_readint16(countp);
|
||||||
|
@ -1118,7 +1116,7 @@ static cl_error_t arj_read_file_header(arj_metadata_t *metadata)
|
||||||
metadata->comp_size = file_hdr.comp_size;
|
metadata->comp_size = file_hdr.comp_size;
|
||||||
metadata->orig_size = file_hdr.orig_size;
|
metadata->orig_size = file_hdr.orig_size;
|
||||||
metadata->method = file_hdr.method;
|
metadata->method = file_hdr.method;
|
||||||
metadata->encrypted = ((file_hdr.flags & GARBLE_FLAG) != 0) ? TRUE : FALSE;
|
metadata->encrypted = ((file_hdr.flags & GARBLE_FLAG) != 0) ? true : false;
|
||||||
metadata->ofd = -1;
|
metadata->ofd = -1;
|
||||||
if (!metadata->filename) {
|
if (!metadata->filename) {
|
||||||
ret = CL_EMEM;
|
ret = CL_EMEM;
|
||||||
|
@ -1146,27 +1144,112 @@ cl_error_t cli_unarj_open(fmap_t *map, const char *dirname, arj_metadata_t *meta
|
||||||
metadata->map = map;
|
metadata->map = map;
|
||||||
metadata->offset = 0;
|
metadata->offset = 0;
|
||||||
if (!is_arj_archive(metadata)) {
|
if (!is_arj_archive(metadata)) {
|
||||||
cli_dbgmsg("Not in ARJ format\n");
|
cli_dbgmsg("cli_unarj_open: is_arj_archive check failed\n");
|
||||||
return CL_EFORMAT;
|
return CL_EFORMAT;
|
||||||
}
|
}
|
||||||
if (!arj_read_main_header(metadata)) {
|
if (!arj_read_main_header(metadata)) {
|
||||||
cli_dbgmsg("Failed to read main header\n");
|
cli_dbgmsg("cli_unarj_open: Failed to read main header\n");
|
||||||
return CL_EFORMAT;
|
return CL_EFORMAT;
|
||||||
}
|
}
|
||||||
return CL_SUCCESS;
|
return CL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
cl_error_t cli_unarj_prepare_file(const char *dirname, arj_metadata_t *metadata)
|
cl_error_t cli_unarj_header_check(
|
||||||
|
cli_ctx *ctx,
|
||||||
|
uint32_t offset,
|
||||||
|
size_t *size)
|
||||||
|
{
|
||||||
|
cl_error_t status = CL_EFORMAT;
|
||||||
|
bool bool_ret;
|
||||||
|
cl_error_t ret;
|
||||||
|
arj_metadata_t metadata = {0};
|
||||||
|
int files_found = 0;
|
||||||
|
|
||||||
|
cli_dbgmsg("in cli_unarj_header_check\n");
|
||||||
|
|
||||||
|
if (!ctx || !ctx->fmap || !size) {
|
||||||
|
status = CL_ENULLARG;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
metadata.map = ctx->fmap;
|
||||||
|
metadata.offset = offset;
|
||||||
|
*size = 0;
|
||||||
|
|
||||||
|
bool_ret = is_arj_archive(&metadata);
|
||||||
|
if (false == bool_ret) {
|
||||||
|
cli_dbgmsg("Not in ARJ format\n");
|
||||||
|
status = CL_EFORMAT;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
cli_dbgmsg("cli_unarj_header_check: is_arj_archive-check passed\n");
|
||||||
|
|
||||||
|
bool_ret = arj_read_main_header(&metadata);
|
||||||
|
if (false == bool_ret) {
|
||||||
|
cli_dbgmsg("Failed to read main header\n");
|
||||||
|
status = CL_EFORMAT;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
cli_dbgmsg("cli_unarj_header_check: Successfully read main header\n");
|
||||||
|
|
||||||
|
do {
|
||||||
|
metadata.filename = NULL;
|
||||||
|
metadata.comp_size = 0;
|
||||||
|
metadata.orig_size = 0;
|
||||||
|
|
||||||
|
ret = cli_unarj_prepare_file(&metadata);
|
||||||
|
if (ret == CL_SUCCESS) {
|
||||||
|
cli_dbgmsg("cli_unarj_header_check: Successfully read file header\n");
|
||||||
|
files_found++;
|
||||||
|
|
||||||
|
/* Skip the file data */
|
||||||
|
metadata.offset += metadata.comp_size;
|
||||||
|
|
||||||
|
} else if (ret == CL_BREAK) {
|
||||||
|
cli_dbgmsg("cli_unarj_header_check: End of archive\n");
|
||||||
|
status = CL_BREAK;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
cli_dbgmsg("cli_unarj_header_check: Error reading file header: %s\n", cl_strerror(ret));
|
||||||
|
status = ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
CLI_FREE_AND_SET_NULL(metadata.filename);
|
||||||
|
} while (ret == CL_SUCCESS);
|
||||||
|
|
||||||
|
if (files_found > 0) {
|
||||||
|
/* Successfully found at least one file */
|
||||||
|
status = CL_SUCCESS;
|
||||||
|
*size = metadata.offset - offset;
|
||||||
|
cli_dbgmsg("cli_unarj_header_check: Successfully found %d files in valid ARJ archive of %zu bytes\n", files_found, *size);
|
||||||
|
} else {
|
||||||
|
status = CL_EFORMAT;
|
||||||
|
cli_dbgmsg("cli_unarj_header_check: No files found; Invalid ARJ archive\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
done:
|
||||||
|
CLI_FREE_AND_SET_NULL(metadata.filename);
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
cl_error_t cli_unarj_prepare_file(arj_metadata_t *metadata)
|
||||||
{
|
{
|
||||||
cli_dbgmsg("in cli_unarj_prepare_file\n");
|
cli_dbgmsg("in cli_unarj_prepare_file\n");
|
||||||
if (!metadata || !dirname) {
|
|
||||||
|
if (NULL == metadata) {
|
||||||
|
cli_dbgmsg("cli_unarj_prepare_file: invalid NULL arguments\n");
|
||||||
return CL_ENULLARG;
|
return CL_ENULLARG;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Each file is preceded by the ARJ file marker */
|
/* Each file is preceded by the ARJ file marker */
|
||||||
if (!is_arj_archive(metadata)) {
|
if (!is_arj_archive(metadata)) {
|
||||||
cli_dbgmsg("Not in ARJ format\n");
|
cli_dbgmsg("Not in ARJ format\n");
|
||||||
return CL_EFORMAT;
|
return CL_EFORMAT;
|
||||||
}
|
}
|
||||||
|
|
||||||
return arj_read_file_header(metadata);
|
return arj_read_file_header(metadata);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -24,7 +24,10 @@
|
||||||
#ifndef __UNARJ_H
|
#ifndef __UNARJ_H
|
||||||
#define __UNARJ_H
|
#define __UNARJ_H
|
||||||
|
|
||||||
|
#include "clamav.h"
|
||||||
|
#include "others.h"
|
||||||
#include "fmap.h"
|
#include "fmap.h"
|
||||||
|
|
||||||
typedef struct arj_metadata_tag {
|
typedef struct arj_metadata_tag {
|
||||||
char *filename;
|
char *filename;
|
||||||
uint32_t comp_size;
|
uint32_t comp_size;
|
||||||
|
@ -36,8 +39,20 @@ typedef struct arj_metadata_tag {
|
||||||
size_t offset;
|
size_t offset;
|
||||||
} arj_metadata_t;
|
} arj_metadata_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Verify ARJ file header and get size of ARJ based on headers.
|
||||||
|
*
|
||||||
|
* Does not extract or scan the file.
|
||||||
|
*
|
||||||
|
* @param[in,out] ctx Scan context
|
||||||
|
* @param offset Offset of the file header
|
||||||
|
* @param[out] size Will be set to the size of the file header + file data.
|
||||||
|
* @return cl_error_t CL_SUCCESS on success, or an error code on failure.
|
||||||
|
*/
|
||||||
|
cl_error_t cli_unarj_header_check(cli_ctx *ctx, uint32_t offset, size_t *size);
|
||||||
|
|
||||||
cl_error_t cli_unarj_open(fmap_t *map, const char *dirname, arj_metadata_t *metadata);
|
cl_error_t cli_unarj_open(fmap_t *map, const char *dirname, arj_metadata_t *metadata);
|
||||||
cl_error_t cli_unarj_prepare_file(const char *dirname, arj_metadata_t *metadata);
|
cl_error_t cli_unarj_prepare_file(arj_metadata_t *metadata);
|
||||||
cl_error_t cli_unarj_extract_file(const char *dirname, arj_metadata_t *metadata);
|
cl_error_t cli_unarj_extract_file(const char *dirname, arj_metadata_t *metadata);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -825,6 +825,48 @@ done:
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cl_error_t cli_unzip_single_header_check(
|
||||||
|
cli_ctx *ctx,
|
||||||
|
uint32_t offset,
|
||||||
|
size_t *size)
|
||||||
|
{
|
||||||
|
cl_error_t status = CL_ERROR;
|
||||||
|
struct zip_record file_record = {0};
|
||||||
|
cl_error_t ret;
|
||||||
|
|
||||||
|
ret = parse_local_file_header(
|
||||||
|
ctx,
|
||||||
|
offset,
|
||||||
|
NULL, /* num_files_unzipped */
|
||||||
|
0, /* file_count */
|
||||||
|
NULL, /* central_header */
|
||||||
|
NULL, /* tmpd */
|
||||||
|
false, /* detect_encrypted */
|
||||||
|
NULL, /* zcb */
|
||||||
|
&file_record,
|
||||||
|
size);
|
||||||
|
if (ret != CL_SUCCESS) {
|
||||||
|
cli_dbgmsg("cli_unzip: single header check - failed to parse local file header: %s (%d)\n", cl_strerror(ret), ret);
|
||||||
|
status = ret;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (file_record.compressed_size == 0 || file_record.uncompressed_size == 0) {
|
||||||
|
cli_dbgmsg("cli_unzip: single header check - empty file\n");
|
||||||
|
status = CL_EFORMAT;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
status = CL_SUCCESS;
|
||||||
|
|
||||||
|
done:
|
||||||
|
if (file_record.original_filename) {
|
||||||
|
free(file_record.original_filename);
|
||||||
|
}
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Parse, extract, and scan a file by iterating the central directory.
|
* @brief Parse, extract, and scan a file by iterating the central directory.
|
||||||
*
|
*
|
||||||
|
|
|
@ -119,6 +119,18 @@ cl_error_t cli_unzip(cli_ctx *ctx);
|
||||||
*/
|
*/
|
||||||
cl_error_t cli_unzip_single(cli_ctx *ctx, size_t local_header_offset);
|
cl_error_t cli_unzip_single(cli_ctx *ctx, size_t local_header_offset);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Verify a single local file header.
|
||||||
|
*
|
||||||
|
* Does not extract or scan the file.
|
||||||
|
*
|
||||||
|
* @param[in,out] ctx Scan context
|
||||||
|
* @param offset Offset of the local file header
|
||||||
|
* @param[out] size Will be set to the size of the file header + file data.
|
||||||
|
* @return cl_error_t CL_SUCCESS on success, or an error code on failure.
|
||||||
|
*/
|
||||||
|
cl_error_t cli_unzip_single_header_check(cli_ctx *ctx, uint32_t offset, size_t *size);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Unzip a single file from a zip archive.
|
* @brief Unzip a single file from a zip archive.
|
||||||
*
|
*
|
||||||
|
|
130
unit_tests/clamscan/embedded_files_test.py
Normal file
130
unit_tests/clamscan/embedded_files_test.py
Normal file
|
@ -0,0 +1,130 @@
|
||||||
|
# Copyright (C) 2020-2025 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
|
||||||
|
|
||||||
|
"""
|
||||||
|
Run clamscan tests.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
from zipfile import ZIP_DEFLATED, ZipFile
|
||||||
|
|
||||||
|
sys.path.append('../unit_tests')
|
||||||
|
import testcase
|
||||||
|
|
||||||
|
|
||||||
|
class TC(testcase.TestCase):
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls):
|
||||||
|
super(TC, cls).setUpClass()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def tearDownClass(cls):
|
||||||
|
super(TC, cls).tearDownClass()
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
super(TC, self).setUp()
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
super(TC, self).tearDown()
|
||||||
|
self.verify_valgrind_log()
|
||||||
|
|
||||||
|
def test_embedded_zips(self):
|
||||||
|
self.step_name('Test that clamav can successfully extract and alert on multiple embedded ZIP files')
|
||||||
|
|
||||||
|
path_db = TC.path_source / 'unit_tests' / 'input' / 'embedded_testfiles' / 'signatures'
|
||||||
|
testfiles = TC.path_source / 'unit_tests' / 'input' / 'embedded_testfiles' / 'test.png.emb-zips'
|
||||||
|
|
||||||
|
command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfiles} --gen-json --debug --allmatch'.format(
|
||||||
|
valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan,
|
||||||
|
path_db=path_db,
|
||||||
|
testfiles=testfiles,
|
||||||
|
)
|
||||||
|
output = self.execute_command(command)
|
||||||
|
|
||||||
|
assert output.ec == 1 # no virus, no failures
|
||||||
|
|
||||||
|
expected_stdout = [
|
||||||
|
'test.png.emb-zips: test-file-1-1.UNOFFICIAL FOUND',
|
||||||
|
'test.png.emb-zips: test-file-1-2.UNOFFICIAL FOUND',
|
||||||
|
'test.png.emb-zips: test-file-2-1.UNOFFICIAL FOUND',
|
||||||
|
'test.png.emb-zips: test-file-2-2.UNOFFICIAL FOUND',
|
||||||
|
]
|
||||||
|
unexpected_stdout = [
|
||||||
|
'OK',
|
||||||
|
]
|
||||||
|
self.verify_output(output.out, expected=expected_stdout, unexpected=unexpected_stdout)
|
||||||
|
|
||||||
|
def test_embedded_arjs(self):
|
||||||
|
self.step_name('Test that clamav can successfully extract and alert on multiple embedded ARJ files')
|
||||||
|
|
||||||
|
path_db = TC.path_source / 'unit_tests' / 'input' / 'embedded_testfiles' / 'signatures'
|
||||||
|
testfiles = TC.path_source / 'unit_tests' / 'input' / 'embedded_testfiles' / 'test.png.emb-arjs'
|
||||||
|
|
||||||
|
command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfiles} --gen-json --debug --allmatch'.format(
|
||||||
|
valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan,
|
||||||
|
path_db=path_db,
|
||||||
|
testfiles=testfiles,
|
||||||
|
)
|
||||||
|
output = self.execute_command(command)
|
||||||
|
|
||||||
|
assert output.ec == 1 # no virus, no failures
|
||||||
|
|
||||||
|
expected_stdout = [
|
||||||
|
'test.png.emb-arjs: test-file-1-1.UNOFFICIAL FOUND',
|
||||||
|
'test.png.emb-arjs: test-file-1-2.UNOFFICIAL FOUND',
|
||||||
|
'test.png.emb-arjs: test-file-2-1.UNOFFICIAL FOUND',
|
||||||
|
'test.png.emb-arjs: test-file-2-2.UNOFFICIAL FOUND',
|
||||||
|
]
|
||||||
|
unexpected_stdout = [
|
||||||
|
'OK',
|
||||||
|
]
|
||||||
|
self.verify_output(output.out, expected=expected_stdout, unexpected=unexpected_stdout)
|
||||||
|
|
||||||
|
def test_embedded_cabs(self):
|
||||||
|
self.step_name('Test that clamav can successfully extract and alert on multiple embedded CAB files')
|
||||||
|
|
||||||
|
path_db = TC.path_source / 'unit_tests' / 'input' / 'embedded_testfiles' / 'signatures'
|
||||||
|
testfiles = TC.path_source / 'unit_tests' / 'input' / 'embedded_testfiles' / 'test.png.emb-cabs'
|
||||||
|
|
||||||
|
command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfiles} --gen-json --debug --allmatch'.format(
|
||||||
|
valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan,
|
||||||
|
path_db=path_db,
|
||||||
|
testfiles=testfiles,
|
||||||
|
)
|
||||||
|
output = self.execute_command(command)
|
||||||
|
|
||||||
|
assert output.ec == 1 # no virus, no failures
|
||||||
|
|
||||||
|
expected_stdout = [
|
||||||
|
'test.png.emb-cabs: test-file-1-1.UNOFFICIAL FOUND',
|
||||||
|
'test.png.emb-cabs: test-file-1-2.UNOFFICIAL FOUND',
|
||||||
|
'test.png.emb-cabs: test-file-2-1.UNOFFICIAL FOUND',
|
||||||
|
'test.png.emb-cabs: test-file-2-2.UNOFFICIAL FOUND',
|
||||||
|
]
|
||||||
|
unexpected_stdout = [
|
||||||
|
'OK',
|
||||||
|
]
|
||||||
|
self.verify_output(output.out, expected=expected_stdout, unexpected=unexpected_stdout)
|
||||||
|
|
||||||
|
def test_embedded_exes(self):
|
||||||
|
self.step_name('Test that clamav can successfully extract and alert on multiple embedded EXE files')
|
||||||
|
|
||||||
|
path_db = TC.path_source / 'unit_tests' / 'input' / 'embedded_testfiles' / 'signatures'
|
||||||
|
testfiles = TC.path_source / 'unit_tests' / 'input' / 'embedded_testfiles' / 'clam.exe.emb-exes'
|
||||||
|
|
||||||
|
command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfiles} --gen-json --debug --allmatch'.format(
|
||||||
|
valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan,
|
||||||
|
path_db=path_db,
|
||||||
|
testfiles=testfiles,
|
||||||
|
)
|
||||||
|
output = self.execute_command(command)
|
||||||
|
|
||||||
|
assert output.ec == 1 # no virus, no failures
|
||||||
|
|
||||||
|
expected_stdout = [
|
||||||
|
'clam.exe.emb-exes: Win.Test.LilEXE.UNOFFICIAL FOUND',
|
||||||
|
'clam.exe.emb-exes: Win.Test.SmolEXE.UNOFFICIAL FOUND',
|
||||||
|
]
|
||||||
|
unexpected_stdout = [
|
||||||
|
'OK',
|
||||||
|
]
|
||||||
|
self.verify_output(output.out, expected=expected_stdout, unexpected=unexpected_stdout)
|
BIN
unit_tests/input/embedded_testfiles/clam.exe.emb-exes
Normal file
BIN
unit_tests/input/embedded_testfiles/clam.exe.emb-exes
Normal file
Binary file not shown.
BIN
unit_tests/input/embedded_testfiles/emb/1/test-file-2.ref
Executable file
BIN
unit_tests/input/embedded_testfiles/emb/1/test-file-2.ref
Executable file
Binary file not shown.
BIN
unit_tests/input/embedded_testfiles/emb/1/test-file.ref
Executable file
BIN
unit_tests/input/embedded_testfiles/emb/1/test-file.ref
Executable file
Binary file not shown.
BIN
unit_tests/input/embedded_testfiles/emb/2/test-file-2.ref
Executable file
BIN
unit_tests/input/embedded_testfiles/emb/2/test-file-2.ref
Executable file
Binary file not shown.
BIN
unit_tests/input/embedded_testfiles/emb/2/test-file.ref
Executable file
BIN
unit_tests/input/embedded_testfiles/emb/2/test-file.ref
Executable file
Binary file not shown.
7
unit_tests/input/embedded_testfiles/emb/smol_exe/Cargo.lock
generated
Executable file
7
unit_tests/input/embedded_testfiles/emb/smol_exe/Cargo.lock
generated
Executable file
|
@ -0,0 +1,7 @@
|
||||||
|
# This file is automatically @generated by Cargo.
|
||||||
|
# It is not intended for manual editing.
|
||||||
|
version = 4
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "t1"
|
||||||
|
version = "0.1.0"
|
13
unit_tests/input/embedded_testfiles/emb/smol_exe/Cargo.toml
Executable file
13
unit_tests/input/embedded_testfiles/emb/smol_exe/Cargo.toml
Executable file
|
@ -0,0 +1,13 @@
|
||||||
|
[package]
|
||||||
|
name = "t1"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2024"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
|
||||||
|
[profile.release]
|
||||||
|
strip = true
|
||||||
|
opt-level = "z"
|
||||||
|
lto = true
|
||||||
|
codegen-units = 1
|
||||||
|
panic = "abort"
|
37
unit_tests/input/embedded_testfiles/emb/smol_exe/src/main.rs
Executable file
37
unit_tests/input/embedded_testfiles/emb/smol_exe/src/main.rs
Executable file
|
@ -0,0 +1,37 @@
|
||||||
|
#![no_std]
|
||||||
|
#![no_main]
|
||||||
|
|
||||||
|
use core::arch::asm;
|
||||||
|
|
||||||
|
///
|
||||||
|
/// This is basically a Windows port of the example from: <https://darkcoding.net/software/a-very-small-rust-binary-indeed/>
|
||||||
|
/// With one minor change to print a message instead of only exiting with a code.
|
||||||
|
/// Thank you to the author.
|
||||||
|
///
|
||||||
|
/// Build with:
|
||||||
|
/// ```powershell
|
||||||
|
/// $env:RUSTFLAGS="-Ctarget-cpu=native -Clink-args=/ENTRY:_start -Clink-args=/SUBSYSTEM:CONSOLE -Clink-args=/LARGEADDRESSAWARE:NO -Clink-args=ucrt.lib -Crelocation-model=static -Clink-args=-Wl,-n,-N,--no-dynamic-linker,--no-pie,--build-id=none,--no-eh-frame-hdr"
|
||||||
|
/// cargo +nightly build -Z build-std=std,panic_abort -Z build-std-features="optimize_for_size" --target x86_64-pc-windows-msvc --release
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
|
||||||
|
#[unsafe(no_mangle)]
|
||||||
|
pub extern "C" fn _start() -> ! {
|
||||||
|
let s = b"Lil EXE\n\0";
|
||||||
|
unsafe {
|
||||||
|
asm!(
|
||||||
|
"mov rcx, {0}",
|
||||||
|
"call puts",
|
||||||
|
in(reg) s.as_ptr(),
|
||||||
|
options(nostack, noreturn)
|
||||||
|
)
|
||||||
|
// nostack prevents `asm!` from push/pop rax
|
||||||
|
// noreturn prevents it putting a 'ret' at the end
|
||||||
|
// but it does put a ud2 (undefined instruction) instead
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[panic_handler]
|
||||||
|
fn my_panic(_info: &core::panic::PanicInfo) -> ! {
|
||||||
|
loop {}
|
||||||
|
}
|
1
unit_tests/input/embedded_testfiles/signatures/1.1.hsb
Normal file
1
unit_tests/input/embedded_testfiles/signatures/1.1.hsb
Normal file
|
@ -0,0 +1 @@
|
||||||
|
579de681add9f8c686fa791c49d1222a63c236febff37769b5fb50659b007491:16:test-file-1-1
|
1
unit_tests/input/embedded_testfiles/signatures/1.2.hsb
Normal file
1
unit_tests/input/embedded_testfiles/signatures/1.2.hsb
Normal file
|
@ -0,0 +1 @@
|
||||||
|
b56f04ceb6cadbc0f50f9acfadbedc81257a6af21f6212ef57a70a599fc8bf38:16:test-file-1-2
|
1
unit_tests/input/embedded_testfiles/signatures/2.1.hsb
Normal file
1
unit_tests/input/embedded_testfiles/signatures/2.1.hsb
Normal file
|
@ -0,0 +1 @@
|
||||||
|
1cce5c6d7f11469ffa6153481b7d6275534ce7c62bc34f12f7d742c5e6cf026b:24:test-file-2-1
|
1
unit_tests/input/embedded_testfiles/signatures/2.2.hsb
Normal file
1
unit_tests/input/embedded_testfiles/signatures/2.2.hsb
Normal file
|
@ -0,0 +1 @@
|
||||||
|
6d1c6cae0a30435b52d362544bea666492d06173ded04504bf30f369abfadd50:27:test-file-2-2
|
|
@ -0,0 +1,2 @@
|
||||||
|
# Match on "Lil EXE\n"
|
||||||
|
Win.Test.LilEXE;Engine:90-255,Target:1;0;1552:4c696c204558450a
|
|
@ -0,0 +1,2 @@
|
||||||
|
# Match on "Smol EXE\n"
|
||||||
|
Win.Test.SmolEXE;Engine:90-255,Target:1;0;1552:536d6f6c204558450a
|
BIN
unit_tests/input/embedded_testfiles/test.png.emb-arjs
Normal file
BIN
unit_tests/input/embedded_testfiles/test.png.emb-arjs
Normal file
Binary file not shown.
After Width: | Height: | Size: 25 KiB |
BIN
unit_tests/input/embedded_testfiles/test.png.emb-cabs
Normal file
BIN
unit_tests/input/embedded_testfiles/test.png.emb-cabs
Normal file
Binary file not shown.
After Width: | Height: | Size: 25 KiB |
BIN
unit_tests/input/embedded_testfiles/test.png.emb-zips
Normal file
BIN
unit_tests/input/embedded_testfiles/test.png.emb-zips
Normal file
Binary file not shown.
After Width: | Height: | Size: 26 KiB |
Loading…
Add table
Add a link
Reference in a new issue