Merge pull request #1606 from val-ms/1.5.1-prep

1.5.1 patch version changes
2025-10-19 10:23:17 +00:00 · 2025-10-15 16:29:32 -04:00 · 2025-10-15 16:29:32 -04:00 · 0a6802e88a
commit 0a6802e88a
parent 83fd7f14fb 2412d40bb1
14 changed files with 841 additions and 594 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -22,7 +22,7 @@ string(TIMESTAMP TODAY "%Y%m%d")
 set(VERSION_SUFFIX "")

 project( ClamAV
-         VERSION "1.5.0"
+         VERSION "1.5.1"
         DESCRIPTION "ClamAV open source email, web, and end-point anti-virus toolkit." )

 set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/2
+++ b/2
@ -10,7 +10,7 @@ properties(
        parameters(
            [
                string(name: 'VERSION',
-                       defaultValue: '1.5.0',
+                       defaultValue: '1.5.1',
                       description: 'ClamAV version string'),
                string(name: 'FRAMEWORK_BRANCH',
                       defaultValue: '1.5',
--- a/NEWS.md
+++ b/NEWS.md
@ -3,6 +3,35 @@
 Note: This file refers to the official packages. Things described here may
 differ slightly from third-party binary packages.

+## 1.5.1
+
+ClamAV 1.5.1 is a patch release with the following fixes:
+
+- Fixed a significant performance issue when scanning some PE files.
+
+- Fixed an issue recording file entries from a ZIP archive central directory
+  which resulted in "Heuristics.Limits.Exceeded.MaxFiles" alerts when using the
+  ClamScan `--alert-exceeds-max` command line option or ClamD `AlertExceedsMax`
+  config file option.
+
+- Improved performance when scanning TNEF email attachments.
+
+- Fixed an issue with recording metadata for OOXML office documents.
+
+- Fixed an issue with signature matches for VBA in OLE2 office documents.
+
+- Loosened overly restrictive rules for embedded file identification and
+  increased the limit for finding PE files embedded in other PE files.
+
+- Fixed an issue with extracting some RAR archives embedded in other files.
+
+- Fixed an issue with calculating fuzzy hashes affecting some images by updating
+  the version for several Rust library dependencies.
+  - This release does not require a newer version of the Rust compiler toolchain
+    than what was required for ClamAV 1.5.0.
+
+[GitHub pull request](https://github.com/Cisco-Talos/clamav/pull/1606)
+
 ## 1.5.0

 ClamAV 1.5.0 includes the following improvements and changes:
--- a/libclamav/bytecode_api.h
+++ b/libclamav/bytecode_api.h
@ -175,6 +175,9 @@ enum FunctionalityLevels {
    FUNC_LEVEL_1_0_4 = 164, /**< LibClamAV release 1.0.4 */
    FUNC_LEVEL_1_0_5 = 165, /**< LibClamAV release 1.0.5 */
    FUNC_LEVEL_1_0_6 = 166, /**< LibClamAV release 1.0.6 */
+    FUNC_LEVEL_1_0_7 = 167, /**< LibClamAV release 1.0.7 */
+    FUNC_LEVEL_1_0_8 = 168, /**< LibClamAV release 1.0.8 */
+    FUNC_LEVEL_1_0_9 = 169, /**< LibClamAV release 1.0.9 */

    FUNC_LEVEL_1_1   = 180, /**< LibClamAV release 1.1.0 */
    FUNC_LEVEL_1_1_1 = 181, /**< LibClamAV release 1.1.1 */
@ -188,10 +191,15 @@ enum FunctionalityLevels {

    FUNC_LEVEL_1_3   = 200, /**< LibClamAV release 1.3.0 */
    FUNC_LEVEL_1_3_1 = 201, /**< LibClamAV release 1.3.1 */
+    FUNC_LEVEL_1_3_2 = 202, /**< LibClamAV release 1.3.2 */

-    FUNC_LEVEL_1_4 = 210, /**< LibClamAV release 1.4.0 */
+    FUNC_LEVEL_1_4   = 210, /**< LibClamAV release 1.4.0 */
+    FUNC_LEVEL_1_4_1 = 211, /**< LibClamAV release 1.4.1 */
+    FUNC_LEVEL_1_4_2 = 212, /**< LibClamAV release 1.4.2 */
+    FUNC_LEVEL_1_4_3 = 213, /**< LibClamAV release 1.4.3 */

-    FUNC_LEVEL_1_5 = 220, /**< LibClamAV release 1.5.0 */
+    FUNC_LEVEL_1_5   = 230, /**< LibClamAV release 1.5.0 */
+    FUNC_LEVEL_1_5_1 = 231, /**< LibClamAV release 1.5.1 */
 };

 /**
--- a/libclamav/matcher-ac.c
+++ b/libclamav/matcher-ac.c
@ -833,8 +833,9 @@ int cli_ac_chklsig(const char *expr, const char *end, uint32_t *lsigcnt, unsigne
                    return -1;
                }

-                for (i += 2; i + 1 < len && (isdigit(expr[i + 1]) || expr[i + 1] == ','); i++)
-                    ;
+                for (i += 2; i + 1 < len && (isdigit(expr[i + 1]) || expr[i + 1] == ','); i++) {
+                    continue;
+                }
            }

            if (&expr[i + 1] == rend)
@ -1625,19 +1626,23 @@ void cli_ac_freedata(struct cli_ac_data *data)
    }
 }

-/* returns only CL_SUCCESS or CL_EMEM */
-inline static int ac_addtype(struct cli_matched_type **list, cli_file_t type, off_t offset, const cli_ctx *ctx)
+/**
+ * @brief Add a match for an object type to the list of matched types.
+ *
+ * Important: The caller is responsible for checking limits!
+ *
+ * @param list Pointer to the list of matched types. *list may be NULL if no types have been added yet.
+ * @param type The type of the embedded object.
+ * @param offset The offset of the embedded object.
+ * @param ctx The context information. May be NULL.
+ * @return cl_error_t CL_SUCCESS regardless if added, or CL_EMEM if memory allocation failed.
+ */
+inline static cl_error_t ac_addtype(struct cli_matched_type **list, cli_file_t type, off_t offset, const cli_ctx *ctx)
 {
-    struct cli_matched_type *tnode, *tnode_last;
+    struct cli_matched_type *tnode;

-    if (type == CL_TYPE_ZIPSFX) {
-        if (*list && ctx && ctx->engine->maxfiles && (*list)->cnt > ctx->engine->maxfiles)
-            return CL_SUCCESS;
-    } else if (*list && (*list)->cnt >= MAX_EMBEDDED_OBJ) {
-        return CL_SUCCESS;
-    }
-
-    if (!(tnode = calloc(1, sizeof(struct cli_matched_type)))) {
+    tnode = calloc(1, sizeof(struct cli_matched_type));
+    if (NULL == tnode) {
        cli_errmsg("cli_ac_addtype: Can't allocate memory for new type node\n");
        return CL_EMEM;
    }
@ -1645,16 +1650,25 @@ inline static int ac_addtype(struct cli_matched_type **list, cli_file_t type, of
    tnode->type   = type;
    tnode->offset = offset;

-    tnode_last = *list;
-    while (tnode_last && tnode_last->next)
-        tnode_last = tnode_last->next;
+    if (*list) {
+        // Add to end of existing list.
+        struct cli_matched_type *tnode_last = *list;

-    if (tnode_last)
+        while (tnode_last && tnode_last->next) {
+            tnode_last = tnode_last->next;
+        }
        tnode_last->next = tnode;
-    else
+    } else {
+        // First type in the list.
        *list = tnode;
+    }

    (*list)->cnt++;
+
+    if (UNLIKELY(cli_get_debug_flag())) {
+        cli_dbgmsg("ac_addtype: added %s embedded object at offset " STDi64 ". Embedded object count: %d\n", cli_ftname(type), (uint64_t)offset, (*list)->cnt);
+    }
+
    return CL_SUCCESS;
 }

@ -1999,14 +2013,65 @@ cl_error_t cli_ac_scanbuff(

                                        cli_dbgmsg("Matched signature for file type %s\n", pt->virname);
                                        type = pt->type;
-                                        if ((ftoffset != NULL) &&
-                                            ((*ftoffset == NULL) || (*ftoffset)->cnt < MAX_EMBEDDED_OBJ || type == CL_TYPE_ZIPSFX) && (type >= CL_TYPE_SFX || ((ftype == CL_TYPE_MSEXE || ftype == CL_TYPE_ZIP || ftype == CL_TYPE_MSOLE2) && type == CL_TYPE_MSEXE))) {
-                                            /* FIXME: the first offset in the array is most likely the correct one but
-                                             * it may happen it is not
-                                             */
-                                            for (j = 1; j <= CLI_DEFAULT_AC_TRACKLEN + 1 && offmatrix[0][j] != (uint32_t)-1; j++)
-                                                if (ac_addtype(ftoffset, type, offmatrix[pt->parts - 1][j], ctx))
-                                                    return CL_EMEM;
+
+                                        if (ftoffset != NULL) {
+                                            // Caller provided a pointer to record matched types.
+                                            bool too_many_types = false;
+                                            bool supported_type = false;
+
+                                            if (*ftoffset != NULL) {
+                                                // Have some type matches already. Check limits.
+
+                                                if (ctx && ((type == CL_TYPE_ZIPSFX) ||
+                                                            (type == CL_TYPE_MSEXE && ftype == CL_TYPE_MSEXE))) {
+                                                    // When ctx present, limit the number of type matches using ctx->engine->maxfiles for specific types.
+                                                    // Reasoning:
+                                                    //   ZIP local file header entries likely to be numerous if a single ZIP appended to the scanned file.
+                                                    //   MSEXE can contain many embedded MSEXE entries and MSEXE type false positives matches.
+
+                                                    if (ctx->engine->maxfiles == 0) {
+                                                        // Max-files limit is disabled.
+                                                    } else if ((*ftoffset)->cnt >= ctx->engine->maxfiles) {
+                                                        if (UNLIKELY(cli_get_debug_flag())) {
+                                                            cli_dbgmsg("ac_addtype: Can't add %s type at offset " STDu64 " to list of embedded type matches. Reached maxfiles limit of %u\n", cli_ftname(type), (*ftoffset)->offset, ctx->engine->maxfiles);
+                                                        }
+                                                        too_many_types = true;
+                                                    }
+                                                } else {
+                                                    // Limit the number of type matches using MAX_EMBEDDED_OBJ.
+                                                    if ((*ftoffset)->cnt >= MAX_EMBEDDED_OBJ) {
+                                                        if (UNLIKELY(cli_get_debug_flag())) {
+                                                            cli_dbgmsg("ac_addtype: Can't add %s type at offset " STDu64 " to list of embedded type matches. Reached MAX_EMBEDDED_OBJ limit of %u\n", cli_ftname(type), (*ftoffset)->offset, MAX_EMBEDDED_OBJ);
+                                                        }
+                                                        too_many_types = true;
+                                                    }
+                                                }
+                                            }
+
+                                            // Filter to supported types.
+                                            if (
+                                                // Found type is MBR.
+                                                type == CL_TYPE_MBR ||
+                                                // Found type is any SFX type (i.e., ZIPSFX, RARSFX, 7ZSSFX, etc.).
+                                                type >= CL_TYPE_SFX ||
+                                                // Found type is an MSEXE, but only if host file type is one of MSEXE, ZIP, or MSOLE2.
+                                                (type == CL_TYPE_MSEXE && (ftype == CL_TYPE_MSEXE || ftype == CL_TYPE_ZIP || ftype == CL_TYPE_MSOLE2))) {
+
+                                                supported_type = true;
+                                            }
+
+                                            if (supported_type && !too_many_types) {
+                                                /* FIXME: the first offset in the array is most likely the correct one but
+                                                 * it may happen it is not
+                                                 * Until we're certain and can fix this, we add all offsets in the list.
+                                                 */
+                                                for (j = 1; j <= CLI_DEFAULT_AC_TRACKLEN + 1 && offmatrix[0][j] != (uint32_t)-1; j++) {
+                                                    ret = ac_addtype(ftoffset, type, offmatrix[pt->parts - 1][j], ctx);
+                                                    if (CL_SUCCESS != ret) {
+                                                        return ret;
+                                                    }
+                                                }
+                                            }
                                        }

                                        memset(offmatrix[0], (uint32_t)-1, pt->parts * (CLI_DEFAULT_AC_TRACKLEN + 2) * sizeof(uint32_t));
@ -2066,11 +2131,59 @@ cl_error_t cli_ac_scanbuff(

                                    cli_dbgmsg("Matched signature for file type %s at %u\n", pt->virname, realoff);
                                    type = pt->type;
-                                    if ((ftoffset != NULL) &&
-                                        ((*ftoffset == NULL) || (*ftoffset)->cnt < MAX_EMBEDDED_OBJ || type == CL_TYPE_ZIPSFX) && (type == CL_TYPE_MBR || type >= CL_TYPE_SFX || ((ftype == CL_TYPE_MSEXE || ftype == CL_TYPE_ZIP || ftype == CL_TYPE_MSOLE2) && type == CL_TYPE_MSEXE))) {

-                                        if (ac_addtype(ftoffset, type, realoff, ctx))
-                                            return CL_EMEM;
+                                    if (ftoffset != NULL) {
+                                        // Caller provided a pointer to record matched types.
+                                        bool too_many_types = false;
+                                        bool supported_type = false;
+
+                                        if (*ftoffset != NULL) {
+                                            // Have some type matches already. Check limits.
+
+                                            if (ctx && ((type == CL_TYPE_ZIPSFX) ||
+                                                        (type == CL_TYPE_MSEXE && ftype == CL_TYPE_MSEXE))) {
+                                                // When ctx present, limit the number of type matches using ctx->engine->maxfiles for specific types.
+                                                // Reasoning:
+                                                //   ZIP local file header entries likely to be numerous if a single ZIP appended to the scanned file.
+                                                //   MSEXE can contain many embedded MSEXE entries and MSEXE type false positives matches.
+
+                                                if (ctx->engine->maxfiles == 0) {
+                                                    // Max-files limit is disabled.
+                                                } else if ((*ftoffset)->cnt >= ctx->engine->maxfiles) {
+                                                    if (UNLIKELY(cli_get_debug_flag())) {
+                                                        cli_dbgmsg("ac_addtype: Can't add %s type at offset " STDu64 " to list of embedded type matches. Reached maxfiles limit of %u\n", cli_ftname(type), (*ftoffset)->offset, ctx->engine->maxfiles);
+                                                    }
+                                                    too_many_types = true;
+                                                }
+                                            } else {
+                                                // Limit the number of type matches using MAX_EMBEDDED_OBJ.
+                                                if ((*ftoffset)->cnt >= MAX_EMBEDDED_OBJ) {
+                                                    if (UNLIKELY(cli_get_debug_flag())) {
+                                                        cli_dbgmsg("ac_addtype: Can't add %s type at offset " STDu64 " to list of embedded type matches. Reached MAX_EMBEDDED_OBJ limit of %u\n", cli_ftname(type), (*ftoffset)->offset, MAX_EMBEDDED_OBJ);
+                                                    }
+                                                    too_many_types = true;
+                                                }
+                                            }
+                                        }
+
+                                        // Filter to supported types.
+                                        if (
+                                            // Found type is MBR.
+                                            type == CL_TYPE_MBR ||
+                                            // Found type is any SFX type (i.e., ZIPSFX, RARSFX, 7ZSSFX, etc.).
+                                            type >= CL_TYPE_SFX ||
+                                            // Found type is an MSEXE, but only if host file type is one of MSEXE, ZIP, or MSOLE2.
+                                            (type == CL_TYPE_MSEXE && (ftype == CL_TYPE_MSEXE || ftype == CL_TYPE_ZIP || ftype == CL_TYPE_MSOLE2))) {
+
+                                            supported_type = true;
+                                        }
+
+                                        if (supported_type && !too_many_types) {
+                                            ret = ac_addtype(ftoffset, type, realoff, ctx);
+                                            if (CL_SUCCESS != ret) {
+                                                return ret;
+                                            }
+                                        }
                                    }
                                }
                            } else {
--- a/libclamav/matcher.h
+++ b/libclamav/matcher.h
@ -390,6 +390,6 @@ cl_error_t cli_matchmeta(cli_ctx *ctx, const char *fname, size_t fsizec, size_t
 *               - 9 - MachO
 * @param ctx The current scan context
 */
-void cli_targetinfo(struct cli_target_info *info, unsigned int target, cli_ctx *ctx);
+void cli_targetinfo(struct cli_target_info *info, cli_target_t target, cli_ctx *ctx);

 #endif
--- a/libclamav/others.h
+++ b/libclamav/others.h
@ -71,7 +71,7 @@
 * in re-enabling affected modules.
 */

-#define CL_FLEVEL 230
+#define CL_FLEVEL 231
 #define CL_FLEVEL_DCONF CL_FLEVEL
 #define CL_FLEVEL_SIGTOOL CL_FLEVEL

--- a/libclamav/pe.c
+++ b/libclamav/pe.c
@ -2781,7 +2781,7 @@ int cli_scanpe(cli_ctx *ctx)

    cli_exe_info_init(peinfo, 0);

-    peheader_ret = cli_peheader(map, peinfo, opts, ctx);
+    peheader_ret = cli_peheader(ctx, peinfo, opts);

    // Warn the user if PE header parsing failed - if it's a binary that runs
    // successfully on Windows, we need to relax our PE parsing standards so
@ -4360,7 +4360,7 @@ int cli_scanpe(cli_ctx *ctx)

 cl_error_t cli_pe_targetinfo(cli_ctx *ctx, struct cli_exe_info *peinfo)
 {
-    return cli_peheader(ctx->fmap, peinfo, CLI_PEHEADER_OPT_EXTRACT_VINFO, NULL);
+    return cli_peheader(ctx, peinfo, CLI_PEHEADER_OPT_EXTRACT_VINFO);
 }

 /** Parse the PE header and, if successful, populate peinfo
@ -4420,7 +4420,7 @@ cl_error_t cli_pe_targetinfo(cli_ctx *ctx, struct cli_exe_info *peinfo)
 *
 * TODO Same as above but with JSON creation
 */
-cl_error_t cli_peheader(fmap_t *map, struct cli_exe_info *peinfo, uint32_t opts, cli_ctx *ctx)
+cl_error_t cli_peheader(cli_ctx *ctx, struct cli_exe_info *peinfo, uint32_t opts)
 {
    cl_error_t ret = CL_ERROR;

@ -4446,17 +4446,20 @@ cl_error_t cli_peheader(fmap_t *map, struct cli_exe_info *peinfo, uint32_t opts,
    size_t read;
    uint32_t temp;

+    fmap_t *map = NULL;
+
    int toval                   = 0;
    struct json_object *pe_json = NULL;
    char jsonbuf[128];

-    if (ctx == NULL &&
-        (opts & CLI_PEHEADER_OPT_COLLECT_JSON ||
-         opts & CLI_PEHEADER_OPT_DBG_PRINT_INFO)) {
-        cli_errmsg("cli_peheader: ctx can't be NULL for options specified\n");
+    if (ctx == NULL) {
+        cli_errmsg("cli_peheader: ctx can't be NULL\n");
+        ret = CL_EARG;
        goto done;
    }

+    map = ctx->fmap;
+
    if (opts & CLI_PEHEADER_OPT_COLLECT_JSON) {
        pe_json = get_pe_property(ctx);
    }
@ -5469,7 +5472,7 @@ cl_error_t cli_check_auth_header(cli_ctx *ctx, struct cli_exe_info *peinfo)
        peinfo = &_peinfo;
        cli_exe_info_init(peinfo, 0);

-        if (CL_SUCCESS != cli_peheader(ctx->fmap, peinfo, CLI_PEHEADER_OPT_NONE, NULL)) {
+        if (CL_SUCCESS != cli_peheader(ctx, peinfo, CLI_PEHEADER_OPT_NONE)) {
            cli_exe_info_destroy(peinfo);
            return CL_EFORMAT;
        }
@ -5741,7 +5744,7 @@ cl_error_t cli_genhash_pe(cli_ctx *ctx, unsigned int class, cli_hash_type_t type
    // if so, use that to avoid having to re-parse the header
    cli_exe_info_init(peinfo, 0);

-    if (cli_peheader(ctx->fmap, peinfo, CLI_PEHEADER_OPT_NONE, NULL) != CL_SUCCESS) {
+    if (cli_peheader(ctx, peinfo, CLI_PEHEADER_OPT_NONE) != CL_SUCCESS) {
        cli_exe_info_destroy(peinfo);
        return CL_EFORMAT;
    }
--- a/libclamav/pe.h
+++ b/libclamav/pe.h
@ -88,7 +88,7 @@ enum {
 #define CLI_PEHEADER_OPT_REMOVE_MISSING_SECTIONS 0x10

 cl_error_t cli_pe_targetinfo(cli_ctx *ctx, struct cli_exe_info *peinfo);
-cl_error_t cli_peheader(fmap_t *map, struct cli_exe_info *peinfo, uint32_t opts, cli_ctx *ctx);
+cl_error_t cli_peheader(cli_ctx *ctx, struct cli_exe_info *peinfo, uint32_t opts);

 cl_error_t cli_check_auth_header(cli_ctx *ctx, struct cli_exe_info *peinfo);
 cl_error_t cli_genhash_pe(cli_ctx *ctx, unsigned int class, cli_hash_type_t type, stats_section_t *hashes);
--- a/libclamav/scanners.c
+++ b/libclamav/scanners.c
@ -418,11 +418,7 @@ static cl_error_t cli_scanrar_file(const char *filepath, int desc, cli_ctx *ctx)
                     * File should be extracted...
                     * ... make sure we have read permissions to the file.
                     */
-#ifdef _WIN32
-                    if (0 != _access_s(extract_fullpath, R_OK)) {
-#else
                    if (0 != access(extract_fullpath, R_OK)) {
-#endif
                        cli_dbgmsg("RAR: Don't have read permissions, attempting to change file permissions to make it readable..\n");
 #ifdef _WIN32
                        if (0 != _chmod(extract_fullpath, _S_IREAD)) {
@ -533,11 +529,11 @@ static cl_error_t cli_scanrar(cli_ctx *ctx)
    char *tmpname = NULL;
    int tmpfd     = -1;

-#ifdef _WIN32
-    if ((SCAN_UNPRIVILEGED) || (NULL == ctx->fmap->path) || (0 != _access_s(ctx->fmap->path, R_OK))) {
-#else
-    if ((SCAN_UNPRIVILEGED) || (NULL == ctx->fmap->path) || (0 != access(ctx->fmap->path, R_OK))) {
-#endif
+    if ((SCAN_UNPRIVILEGED) ||
+        (NULL == ctx->fmap->path) ||
+        (0 != access(ctx->fmap->path, R_OK)) ||
+        (ctx->fmap->nested_offset > 0) || (ctx->fmap->len < ctx->fmap->real_len)) {
+
        /* If map is not file-backed have to dump to file for scanrar. */
        status = fmap_dump_to_file(ctx->fmap, ctx->fmap->path, ctx->this_layer_tmpdir, &tmpname, &tmpfd, 0, SIZE_MAX);
        if (status != CL_SUCCESS) {
@ -1665,7 +1661,7 @@ static cl_error_t cli_ole2_tempdir_scan_vba_new(const char *dir, cli_ctx *ctx, s
                goto done;
            }

-            ret = cli_scan_desc(tempfd, ctx, CL_TYPE_SCRIPT, false, NULL, AC_SCAN_VIR, NULL, "extracted-vba-project", tempfile, LAYER_ATTRIBUTES_NORMALIZED);
+            ret = cli_scan_desc(tempfd, ctx, CL_TYPE_SCRIPT, false, NULL, AC_SCAN_VIR, NULL, "extracted-vba-project", tempfile, LAYER_ATTRIBUTES_NONE);
            if (CL_SUCCESS != ret) {
                goto done;
            }
@ -3650,7 +3646,9 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi
        // Omit OLD TAR files because it's a raw archive format that we can extract and scan manually.
        (type != CL_TYPE_OLD_TAR) &&
        // Omit POSIX TAR files because it's a raw archive format that we can extract and scan manually.
-        (type != CL_TYPE_POSIX_TAR)) {
+        (type != CL_TYPE_POSIX_TAR) &&
+        // Omit TNEF files because TNEF message attachments are raw / not compressed. Document and ZIP attachments would be likely to have double-extraction issues.
+        (type != CL_TYPE_TNEF)) {
        /*
         * Enable file type recognition scan mode if requested, except for some problematic types (above).
         */
@ -3668,6 +3666,8 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi
    // In allmatch-mode, ret will never be CL_VIRUS, so ret may be used exclusively for file type detection and for terminal errors.
    // When not in allmatch-mode, it's more important to return right away if ret is CL_VIRUS, so we don't care if file type matches were found.
    if (ret >= CL_TYPENO) {
+        size_t last_offset = 0;
+
        // Matched 1+ file type signatures. Handle them.
        found_type = (cli_file_t)ret;

@ -3676,11 +3676,16 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi
        fpt = ftoffset;

        while (fpt) {
-            if (fpt->offset > 0) {
+            if ((fpt->offset > 0) &&
+                // Only handle each offset once to prevent duplicate processing like if two signatures are found at the same offset.
+                ((size_t)fpt->offset > last_offset)) {
+
                bool type_has_been_handled = true;
                bool ancestor_was_embedded = false;
                size_t i;

+                last_offset = (size_t)fpt->offset;
+
                /*
                 * First, use "embedded type recognition" to identify a file's actual type.
                 * (a.k.a. not embedded files, but file type detection corrections)
@ -3871,84 +3876,10 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi
                    break;
                }

-                /*
-                 * Only scan embedded files if we are not already in an embedded context.
-                 * That is, if this or a previous layer was identified with embedded file type recognition, then we do
-                 * not scan for embedded files again.
-                 *
-                 * This restriction will prevent detecting the same embedded content more than once when recursing with
-                 * embedded file type recognition deeper within the same buffer.
-                 *
-                 * This is necessary because we have no way of knowing the length of a file for many formats and cannot
-                 * prevent a search for embedded files from finding the same embedded content multiple times (like a LOT
-                 * of times).
-                 *
-                 * E.g. if the file is like this:
-                 *
-                 *          [ data ] [ embedded file ] [ data ] [ embedded file ]
-                 *
-                 * The first time we do it we'll find "two" embedded files, like this:
-                 *
-                 *  Emb. File #1:    [ embedded file ] [ data ] [ embedded file ]
-                 *  Emb. File #2:                               [ embedded file ]
-                 *
-                 * We must not scan Emb. File #1 again for embedded files, because it would double-extract Emb. File #2.
-                 *
-                 * There is a flaw in this logic, though. Suppose that we actually have:
-                 *
-                 *          [ data ] [ compressed file w. recognizable magic bytes ]
-                 *
-                 * A first pass of the above will again identify "two" embedded files:
-                 *
-                 *  Emb. File #1:    [ compressed archive w. recognizable magic bytes ]
-                 *  Emb. File #2:                               [ magic bytes         ] <- Compressed data/Not real file
-                 *
-                 * In this case, the magic bytes of a contained, compressed file is somehow still identifiable despite
-                 * compression. The result is the Emb. File #2 will fail to be parsed and when we decompress Emb. File
-                 * #1, then we maybe get something like this:
-                 *
-                 *  Decompressed:    [ data                   ] [ embedded file ]
-                 *
-                 * So if this happened... then we WOULD want to scan the decompressed file for embedded files.
-                 * The problem is, we have no way of knowing how long embedded files are.
-                 * We don't know if we have:
-                 *
-                 * A.       [ data ] [ embedded file ] [ data ] [ embedded file ]
-                 *  or
-                 * B.       [ data ] [ embedded compressed archive w. recognizable magic bytes ]
-                 *  or
-                 * C.       [ data ] [ embedded uncompressed archive w. multiple file entries [ file 1 ] [ file 2 ] [ file 2 ] ]
-                 *
-                 * Some ideas for a more accurate solution:
-                 *
-                 * 1. Record the offset and size of each file extracted by the parsers.
-                 *    Then, when we do embedded file type recognition, we can check if the offset and size of the
-                 *    embedded file matches the offset and size of a file that was extracted by a parser.
-                 *    This falls apart a little bit for multiple layers of archives unless we also compare offsets within
-                 *    each layer. We could do that, but it would be a lot of work. And we'd probably want to take into
-                 *    consideration if files were decompressed or decrypted. ... I don't know a clean solution.
-                 *
-                 * 2. Have all parsers to run before embedded file type recognition and they each determine the length
-                 *    of the file they parsed, so we can differentiate between embedded files and appended files.
-                 *    For appended files, we would know they weren't extracted by a parser module and the parser for
-                 *    each of those would report the length of the file it parsed so we can use that to mitigate
-                 *    overlapping embedded file type recognition.
-                 *    But I highly doubt all file types can be parsed to determine the correct length of the file.
-                 */
-                for (i = ctx->recursion_level; i > 0; i--) {
-                    if (ctx->recursion_stack[i].attributes & LAYER_ATTRIBUTES_EMBEDDED) {
-                        // Found an ancestor that was embedded.
-                        // Do not scan embedded files again.
-                        ancestor_was_embedded = true;
-                        break;
-                    }
-                }
-
                /*
                 * Next, check for actual embedded files.
                 */
-                if ((false == ancestor_was_embedded) &&
-                    (false == type_has_been_handled)) {
+                if (false == type_has_been_handled) {
                    cli_dbgmsg("%s signature found at %u\n", cli_ftname(fpt->type), (unsigned int)fpt->offset);

                    type_has_been_handled = true;
@ -4001,6 +3932,9 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi
                                    break;
                                }

+                                // Increment last_offset to ignore any file type matches that occured within this legitimate archive.
+                                last_offset += zip_size - 1; // Note: size is definitely > 0 because header_check succeeded.
+
                                nret = cli_magic_scan_nested_fmap_type(
                                    ctx->fmap,
                                    fpt->offset,
@ -4023,6 +3957,9 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi
                                    break;
                                }

+                                // Increment last_offset to ignore any file type matches that occured within this legitimate archive.
+                                last_offset += cab_size - 1; // Note: size is definitely > 0 because header_check succeeded.
+
                                nret = cli_magic_scan_nested_fmap_type(
                                    ctx->fmap,
                                    fpt->offset,
@ -4046,6 +3983,9 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi
                                    break;
                                }

+                                // Increment last_offset to ignore any file type matches that occured within this legitimate archive.
+                                last_offset += arj_size - 1; // Note: size is definitely > 0 because header_check succeeded.
+
                                nret = cli_magic_scan_nested_fmap_type(
                                    ctx->fmap,
                                    fpt->offset,
@ -4143,10 +4083,10 @@ static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_fi
                                    break;
                                }

-                                cli_exe_info_init(&peinfo, 0);
+                                cli_exe_info_init(&peinfo, fpt->offset);

                                // Header validity check to prevent false positives from being scanned.
-                                ret = cli_peheader(ctx->fmap, &peinfo, CLI_PEHEADER_OPT_NONE, NULL);
+                                ret = cli_peheader(ctx, &peinfo, CLI_PEHEADER_OPT_NONE);

                                // peinfo memory may have been allocated and must be freed even if it failed.
                                cli_exe_info_destroy(&peinfo);
--- a/libclamav/unzip.c
+++ b/libclamav/unzip.c
@ -717,17 +717,35 @@ static cl_error_t parse_local_file_header(
    zip += LOCAL_HEADER_elen;
    bytes_remaining -= LOCAL_HEADER_elen;

-    if (!csize) { /* FIXME: what's used for method0 files? csize or usize? Nothing in the specs, needs testing */
-        cli_dbgmsg("cli_unzip: local header - skipping empty file\n");
-    } else {
-        if (bytes_remaining < csize) {
-            cli_dbgmsg("cli_unzip: local header - stream out of file\n");
-            status = CL_EPARSE;
-            goto done;
-        }
+    if (bytes_remaining < csize) {
+        cli_dbgmsg("cli_unzip: local header - stream out of file\n");
+        status = CL_EPARSE;
+        goto done;
+    }

+    if (NULL != record) {
        /* Don't actually unzip if we're just collecting the file record information (offset, sizes) */
-        if (NULL == record) {
+        if (NULL == original_filename) {
+            record->original_filename = NULL;
+        } else {
+            record->original_filename = CLI_STRNDUP(original_filename, strlen(original_filename));
+        }
+        record->local_header_offset = loff;
+        record->local_header_size   = zip - local_header;
+        record->compressed_size     = csize;
+        record->uncompressed_size   = usize;
+        record->method              = LOCAL_HEADER_method;
+        record->flags               = LOCAL_HEADER_flags;
+        record->encrypted           = (LOCAL_HEADER_flags & F_ENCR) ? 1 : 0;
+
+        status = CL_SUCCESS;
+    } else {
+        /*
+         * Unzip or decompress & then unzip.
+         */
+        if (!csize) { /* FIXME: what's used for method0 files? csize or usize? Nothing in the specs, needs testing */
+            cli_dbgmsg("cli_unzip: local header - skipping empty file\n");
+        } else {
            zip = fmap_need_ptr_once(ctx->fmap, zip, csize);
            if (NULL == zip) {
                cli_dbgmsg("cli_unzip: local header - data out of file\n");
@ -751,27 +769,12 @@ static cl_error_t parse_local_file_header(
                    goto done;
                }
            }
-        } else {
-            if (NULL == original_filename) {
-                record->original_filename = NULL;
-            } else {
-                record->original_filename = CLI_STRNDUP(original_filename, strlen(original_filename));
-            }
-            record->local_header_offset = loff;
-            record->local_header_size   = zip - local_header;
-            record->compressed_size     = csize;
-            record->uncompressed_size   = usize;
-            record->method              = LOCAL_HEADER_method;
-            record->flags               = LOCAL_HEADER_flags;
-            record->encrypted           = (LOCAL_HEADER_flags & F_ENCR) ? 1 : 0;
-
-            status = CL_SUCCESS;
        }
-
-        zip += csize;
-        bytes_remaining -= csize;
    }

+    zip += csize;
+    bytes_remaining -= csize;
+
    if (LOCAL_HEADER_flags & F_USEDD) {
        if (bytes_remaining < 12) {
            cli_dbgmsg("cli_unzip: local header - data desc out of file\n");
@ -910,8 +913,8 @@ static cl_error_t parse_central_directory_file_header(

    central_header = fmap_need_off(ctx->fmap, central_file_header_offset, SIZEOF_CENTRAL_HEADER);
    if (NULL == central_header) {
-        cli_dbgmsg("cli_unzip: central header - file header offset out of file\n");
-        status = CL_EPARSE;
+        cli_dbgmsg("cli_unzip: central header - reached end of central directory.\n");
+        status = CL_BREAK;
        goto done;
    }

@ -1310,7 +1313,8 @@ cl_error_t index_local_file_headers_within_bounds(
    index             = *num_records;

    if (start_offset > fsize || end_offset > fsize || start_offset > end_offset) {
-        cli_errmsg("index_local_file_headers_within_bounds: Invalid offset arguments\n");
+        cli_errmsg("index_local_file_headers_within_bounds: Invalid offset arguments: start_offset=%u, end_offset=%u, fsize=%u\n",
+                   start_offset, end_offset, fsize);
        status = CL_EPARSE;
        goto done;
    }
@ -2075,6 +2079,9 @@ cl_error_t unzip_search(cli_ctx *ctx, struct zip_requests *requests)
                status = CL_ETIMEOUT;
                goto done;
            }
+
+            // Increment to the next central file header.
+            central_file_header_offset += file_record_size;
        } while ((ret == CL_SUCCESS) && (file_record_size > 0));
    } else {
        cli_dbgmsg("unzip_search: Cannot locate central directory. unzip_search failed.\n");
@ -2106,7 +2113,7 @@ cl_error_t unzip_search_single(cli_ctx *ctx, const char *name, size_t nlen, uint

    // Search for the zip file entry in the current layer.
    status = unzip_search(ctx, &requests);
-    if (CL_SUCCESS == status) {
+    if (CL_VIRUS == status) {
        *loff = requests.loff;
    }

--- a/libclamav/xlm_extract.c
+++ b/libclamav/xlm_extract.c
@ -4950,7 +4950,7 @@ cl_error_t cli_extract_xlm_macros_and_images(const char *dir, cli_ctx *ctx, char
    }

    if (CL_VIRUS == cli_scan_desc(out_fd, ctx, CL_TYPE_SCRIPT, false, NULL, AC_SCAN_VIR,
-                                  NULL, "xlm-macro", tempfile, LAYER_ATTRIBUTES_NORMALIZED)) {
+                                  NULL, "xlm-macro", tempfile, LAYER_ATTRIBUTES_NONE)) {
        status = CL_VIRUS;
        goto done;
    }
--- a/unit_tests/valgrind.supp
+++ b/unit_tests/valgrind.supp
@ -376,3 +376,12 @@
   fun:start_thread
   fun:clone
 }
+{
+   <image-fuzzy-hash-png-decoder>
+   Memcheck:Cond
+   ...
+   fun:read_header_info<std::io::cursor::Cursor<&[u8]>>
+   ...
+   fun:fuzzy_hash_calculate_image
+   ...
+}