2007-12-13 19:47:07 +00:00
|
|
|
/*
|
2025-02-14 10:24:30 -05:00
|
|
|
* Copyright (C) 2013-2025 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
|
2013-03-12 13:13:14 -04:00
|
|
|
* Copyright (C) 2007-2013 Sourcefire, Inc.
|
2008-04-02 15:24:51 +00:00
|
|
|
*
|
|
|
|
* Authors: Alberto Wu
|
2007-12-13 19:47:07 +00:00
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
|
|
* published by the Free Software Foundation.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
|
|
* MA 02110-1301, USA.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* FIXME: get a clue about masked stuff */
|
|
|
|
|
|
|
|
#if HAVE_CONFIG_H
|
|
|
|
#include "clamav-config.h"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#ifdef HAVE_UNISTD_H
|
|
|
|
#include <unistd.h>
|
|
|
|
#endif
|
|
|
|
#if HAVE_STRING_H
|
|
|
|
#include <string.h>
|
|
|
|
#endif
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
|
|
|
|
#include <zlib.h>
|
|
|
|
#include "inflate64.h"
|
2024-03-25 11:33:56 -04:00
|
|
|
|
2007-12-13 19:47:07 +00:00
|
|
|
#include <bzlib.h>
|
|
|
|
|
2008-01-01 23:09:15 +00:00
|
|
|
#include "explode.h"
|
2007-12-13 19:47:07 +00:00
|
|
|
#include "others.h"
|
|
|
|
#include "clamav.h"
|
|
|
|
#include "scanners.h"
|
|
|
|
#include "matcher.h"
|
2009-08-24 17:02:27 +02:00
|
|
|
#include "fmap.h"
|
2014-06-25 13:08:18 -04:00
|
|
|
#include "json_api.h"
|
2020-03-19 21:23:54 -04:00
|
|
|
#include "str.h"
|
2007-12-13 19:47:07 +00:00
|
|
|
|
|
|
|
#define UNZIP_PRIVATE
|
|
|
|
#include "unzip.h"
|
|
|
|
|
2019-07-11 20:20:48 -04:00
|
|
|
// clang-format off
|
|
|
|
#define ZIP_MAGIC_CENTRAL_DIRECTORY_RECORD_BEGIN (0x02014b50)
|
|
|
|
#define ZIP_MAGIC_CENTRAL_DIRECTORY_RECORD_END (0x06054b50)
|
|
|
|
#define ZIP_MAGIC_LOCAL_FILE_HEADER (0x04034b50)
|
|
|
|
#define ZIP_MAGIC_FILE_BEGIN_SPLIT_OR_SPANNED (0x08074b50)
|
|
|
|
// clang-format on
|
|
|
|
|
2022-07-18 11:38:00 -07:00
|
|
|
// Non-malicious zips in enterprise critical JAR-ZIPs have been observed with a 1-byte overlap.
|
|
|
|
// The goal with overlap detection is to alert on non-recursive zip bombs, so this tiny overlap isn't a concern.
|
|
|
|
// We'll allow a 2-byte overlap so we don't alert on such zips.
|
|
|
|
#define ZIP_RECORD_OVERLAP_FUDGE_FACTOR 2
|
2019-09-08 23:17:43 -04:00
|
|
|
#define ZIP_MAX_NUM_OVERLAPPING_FILES 5
|
2019-07-11 22:14:39 -04:00
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
#define ZIP_CRC32(r, c, b, l) \
|
|
|
|
do { \
|
|
|
|
r = crc32(~c, b, l); \
|
|
|
|
r = ~r; \
|
|
|
|
} while (0)
|
2015-07-14 17:25:01 -04:00
|
|
|
|
2019-08-30 14:48:57 -04:00
|
|
|
#define ZIP_RECORDS_CHECK_BLOCKSIZE 100
|
|
|
|
struct zip_record {
|
2019-09-08 23:17:43 -04:00
|
|
|
uint32_t local_header_offset;
|
2019-08-30 14:48:57 -04:00
|
|
|
uint32_t local_header_size;
|
2019-09-08 23:17:43 -04:00
|
|
|
uint32_t compressed_size;
|
|
|
|
uint32_t uncompressed_size;
|
|
|
|
uint16_t method;
|
|
|
|
uint16_t flags;
|
|
|
|
int encrypted;
|
2020-03-19 21:23:54 -04:00
|
|
|
char *original_filename;
|
2019-08-30 14:48:57 -04:00
|
|
|
};
|
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
static int wrap_inflateinit2(void *a, int b)
|
|
|
|
{
|
|
|
|
return inflateInit2(a, b);
|
2007-12-13 19:47:07 +00:00
|
|
|
}
|
|
|
|
|
2019-07-11 20:20:48 -04:00
|
|
|
/**
|
|
|
|
* @brief uncompress file from zip
|
|
|
|
*
|
2019-07-11 22:14:39 -04:00
|
|
|
* @param src pointer to compressed data
|
|
|
|
* @param csize size of compressed data
|
|
|
|
* @param usize expected size of uncompressed data
|
|
|
|
* @param method compression method
|
|
|
|
* @param flags local header flags
|
|
|
|
* @param[in,out] num_files_unzipped current number of files that have been unzipped
|
|
|
|
* @param[in,out] ctx scan context
|
|
|
|
* @param tmpd temp directory path name
|
|
|
|
* @param zcb callback function to invoke after extraction (default: scan)
|
|
|
|
* @return cl_error_t CL_EPARSE = could not apply a password
|
2019-07-11 20:20:48 -04:00
|
|
|
*/
|
|
|
|
static cl_error_t unz(
|
|
|
|
const uint8_t *src,
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
uint64_t csize,
|
|
|
|
uint64_t usize,
|
2019-07-11 20:20:48 -04:00
|
|
|
uint16_t method,
|
|
|
|
uint16_t flags,
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
size_t *num_files_unzipped,
|
2019-07-11 20:20:48 -04:00
|
|
|
cli_ctx *ctx,
|
|
|
|
char *tmpd,
|
2020-03-19 21:23:54 -04:00
|
|
|
zip_cb zcb,
|
2022-03-09 22:26:40 -08:00
|
|
|
const char *original_filename,
|
|
|
|
bool decrypted)
|
2018-12-03 12:40:13 -05:00
|
|
|
{
|
2022-05-16 21:29:25 -04:00
|
|
|
char obuf[BUFSIZ] = {0};
|
|
|
|
char *tempfile = NULL;
|
2025-03-22 22:41:29 -04:00
|
|
|
int out_file, ret = CL_SUCCESS;
|
2019-05-04 15:54:54 -04:00
|
|
|
int res = 1;
|
|
|
|
size_t written = 0;
|
2018-12-03 12:40:13 -05:00
|
|
|
|
|
|
|
if (tmpd) {
|
2020-03-19 21:23:54 -04:00
|
|
|
if (ctx->engine->keeptmp && (NULL != original_filename)) {
|
|
|
|
if (!(tempfile = cli_gentemp_with_prefix(tmpd, original_filename))) return CL_EMEM;
|
|
|
|
} else {
|
|
|
|
if (!(tempfile = cli_gentemp(tmpd))) return CL_EMEM;
|
|
|
|
}
|
2007-12-13 19:47:07 +00:00
|
|
|
} else {
|
2020-03-19 21:23:54 -04:00
|
|
|
if (ctx->engine->keeptmp && (NULL != original_filename)) {
|
libclamav: Add engine option to toggle temp directory recursion
Temp directory recursion in ClamAV is when each layer of a scan gets its
own temp directory in the parent layer's temp directory.
In addition to temp directory recursion, ClamAV has been creating a new
subdirectory for each file scan as a risk-adverse method to ensure
no temporary file leaks fill up the disk.
Creating a directory is relatively slow on Windows in particular if
scanning a lot of very small files.
This commit:
1. Separates the temp directory recursion feature from the leave-temps
feature so that libclamav can leave temp files without making
subdirectories for each file scanned.
2. Makes it so that when temp directory recursion is off, libclamav
will just use the configure temp directory for all files.
The new option to enable temp directory recursion is for libclamav-only
at this time. It is off by default, and you can enable it like this:
```c
cl_engine_set_num(engine, CL_ENGINE_TMPDIR_RECURSION, 1);
```
For the `clamscan` and `clamd` programs, temp directory recursion will
be enabled when `--leave-temps` / `LeaveTemporaryFiles` is enabled.
The difference is that when disabled, it will return to using the
configured temp directory without making a subdirectory for each file
scanned, so as to improve scan performance for small files, mostly on
Windows.
Under the hood, this commit also:
1. Cleans up how we keep track of tmpdirs for each layer.
The goal here is to align how we keep track of layer-specific stuff
using the scan_layer structure.
2. Cleans up how we record metadata JSON for embedded files.
Note: Embedded files being different from Contained files, as they
are extracted not with a parser, but by finding them with
file type magic signatures.
CLAM-1583
2025-06-09 20:42:31 -04:00
|
|
|
if (!(tempfile = cli_gentemp_with_prefix(ctx->this_layer_tmpdir, original_filename))) return CL_EMEM;
|
2020-03-19 21:23:54 -04:00
|
|
|
} else {
|
libclamav: Add engine option to toggle temp directory recursion
Temp directory recursion in ClamAV is when each layer of a scan gets its
own temp directory in the parent layer's temp directory.
In addition to temp directory recursion, ClamAV has been creating a new
subdirectory for each file scan as a risk-adverse method to ensure
no temporary file leaks fill up the disk.
Creating a directory is relatively slow on Windows in particular if
scanning a lot of very small files.
This commit:
1. Separates the temp directory recursion feature from the leave-temps
feature so that libclamav can leave temp files without making
subdirectories for each file scanned.
2. Makes it so that when temp directory recursion is off, libclamav
will just use the configure temp directory for all files.
The new option to enable temp directory recursion is for libclamav-only
at this time. It is off by default, and you can enable it like this:
```c
cl_engine_set_num(engine, CL_ENGINE_TMPDIR_RECURSION, 1);
```
For the `clamscan` and `clamd` programs, temp directory recursion will
be enabled when `--leave-temps` / `LeaveTemporaryFiles` is enabled.
The difference is that when disabled, it will return to using the
configured temp directory without making a subdirectory for each file
scanned, so as to improve scan performance for small files, mostly on
Windows.
Under the hood, this commit also:
1. Cleans up how we keep track of tmpdirs for each layer.
The goal here is to align how we keep track of layer-specific stuff
using the scan_layer structure.
2. Cleans up how we record metadata JSON for embedded files.
Note: Embedded files being different from Contained files, as they
are extracted not with a parser, but by finding them with
file type magic signatures.
CLAM-1583
2025-06-09 20:42:31 -04:00
|
|
|
if (!(tempfile = cli_gentemp(ctx->this_layer_tmpdir))) return CL_EMEM;
|
2020-03-19 21:23:54 -04:00
|
|
|
}
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
2019-07-11 20:20:48 -04:00
|
|
|
if ((out_file = open(tempfile, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR)) == -1) {
|
2018-12-03 12:40:13 -05:00
|
|
|
cli_warnmsg("cli_unzip: failed to create temporary file %s\n", tempfile);
|
2020-03-19 21:23:54 -04:00
|
|
|
free(tempfile);
|
2018-12-03 12:40:13 -05:00
|
|
|
return CL_ETMPFILE;
|
|
|
|
}
|
|
|
|
switch (method) {
|
|
|
|
case ALG_STORED:
|
|
|
|
if (csize < usize) {
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
size_t fake = *num_files_unzipped + 1;
|
2018-12-03 12:40:13 -05:00
|
|
|
cli_dbgmsg("cli_unzip: attempting to inflate stored file with inconsistent size\n");
|
2025-03-22 22:41:29 -04:00
|
|
|
if (CL_SUCCESS == (ret = unz(src, csize, usize, ALG_DEFLATE, 0, &fake, ctx,
|
|
|
|
tmpd, zcb, original_filename, decrypted))) {
|
2019-07-11 20:20:48 -04:00
|
|
|
(*num_files_unzipped)++;
|
|
|
|
res = fake - (*num_files_unzipped);
|
2018-12-03 12:40:13 -05:00
|
|
|
} else
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (res == 1) {
|
|
|
|
if (ctx->engine->maxfilesize && csize > ctx->engine->maxfilesize) {
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
cli_dbgmsg("cli_unzip: trimming output size to maxfilesize (" STDu64 ")\n",
|
|
|
|
ctx->engine->maxfilesize);
|
2018-12-03 12:40:13 -05:00
|
|
|
csize = ctx->engine->maxfilesize;
|
|
|
|
}
|
2019-07-11 20:20:48 -04:00
|
|
|
if (cli_writen(out_file, src, csize) != csize)
|
2018-12-03 12:40:13 -05:00
|
|
|
ret = CL_EWRITE;
|
|
|
|
else
|
|
|
|
res = 0;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ALG_DEFLATE:
|
|
|
|
case ALG_DEFLATE64: {
|
|
|
|
union {
|
|
|
|
z_stream64 strm64;
|
|
|
|
z_stream strm;
|
|
|
|
} strm;
|
|
|
|
typedef int (*unz_init_)(void *, int);
|
|
|
|
typedef int (*unz_unz_)(void *, int);
|
|
|
|
typedef int (*unz_end_)(void *);
|
|
|
|
unz_init_ unz_init;
|
|
|
|
unz_unz_ unz_unz;
|
|
|
|
unz_end_ unz_end;
|
|
|
|
int wbits;
|
|
|
|
void **next_in;
|
|
|
|
void **next_out;
|
|
|
|
unsigned int *avail_in;
|
|
|
|
unsigned int *avail_out;
|
|
|
|
|
|
|
|
if (method == ALG_DEFLATE64) {
|
|
|
|
unz_init = (unz_init_)inflate64Init2;
|
|
|
|
unz_unz = (unz_unz_)inflate64;
|
|
|
|
unz_end = (unz_end_)inflate64End;
|
|
|
|
next_in = (void *)&strm.strm64.next_in;
|
|
|
|
next_out = (void *)&strm.strm64.next_out;
|
|
|
|
avail_in = &strm.strm64.avail_in;
|
|
|
|
avail_out = &strm.strm64.avail_out;
|
|
|
|
wbits = MAX_WBITS64;
|
|
|
|
} else {
|
|
|
|
unz_init = (unz_init_)wrap_inflateinit2;
|
|
|
|
unz_unz = (unz_unz_)inflate;
|
|
|
|
unz_end = (unz_end_)inflateEnd;
|
|
|
|
next_in = (void *)&strm.strm.next_in;
|
|
|
|
next_out = (void *)&strm.strm.next_out;
|
|
|
|
avail_in = &strm.strm.avail_in;
|
|
|
|
avail_out = &strm.strm.avail_out;
|
|
|
|
wbits = MAX_WBITS;
|
|
|
|
}
|
2007-12-13 19:47:07 +00:00
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
memset(&strm, 0, sizeof(strm));
|
|
|
|
|
|
|
|
*next_in = (void *)src;
|
|
|
|
*next_out = obuf;
|
|
|
|
*avail_in = csize;
|
|
|
|
*avail_out = sizeof(obuf);
|
|
|
|
if (unz_init(&strm, -wbits) != Z_OK) {
|
|
|
|
cli_dbgmsg("cli_unzip: zinit failed\n");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
while (1) {
|
|
|
|
while ((res = unz_unz(&strm, Z_NO_FLUSH)) == Z_OK) {
|
|
|
|
};
|
|
|
|
if (*avail_out != sizeof(obuf)) {
|
|
|
|
written += sizeof(obuf) - (*avail_out);
|
|
|
|
if (ctx->engine->maxfilesize && written > ctx->engine->maxfilesize) {
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
cli_dbgmsg("cli_unzip: trimming output size to maxfilesize (" STDu64 ")\n",
|
|
|
|
ctx->engine->maxfilesize);
|
2018-12-03 12:40:13 -05:00
|
|
|
res = Z_STREAM_END;
|
|
|
|
break;
|
|
|
|
}
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
if (cli_writen(out_file, obuf, sizeof(obuf) - *avail_out) != sizeof(obuf) - *avail_out) {
|
|
|
|
cli_warnmsg("cli_unzip: failed to write %zu inflated bytes\n",
|
|
|
|
sizeof(obuf) - *avail_out);
|
2018-12-03 12:40:13 -05:00
|
|
|
ret = CL_EWRITE;
|
|
|
|
res = 100;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
*next_out = obuf;
|
|
|
|
*avail_out = sizeof(obuf);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
unz_end(&strm);
|
|
|
|
if ((res == Z_STREAM_END) | (res == Z_BUF_ERROR)) res = 0;
|
|
|
|
break;
|
|
|
|
}
|
2007-12-13 19:47:07 +00:00
|
|
|
|
2008-04-28 15:28:04 +00:00
|
|
|
#ifdef NOBZ2PREFIX
|
2008-04-29 17:54:01 +00:00
|
|
|
#define BZ2_bzDecompress bzDecompress
|
|
|
|
#define BZ2_bzDecompressEnd bzDecompressEnd
|
|
|
|
#define BZ2_bzDecompressInit bzDecompressInit
|
2008-04-28 15:28:04 +00:00
|
|
|
#endif
|
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
case ALG_BZIP2: {
|
|
|
|
bz_stream strm;
|
|
|
|
memset(&strm, 0, sizeof(strm));
|
|
|
|
strm.next_in = (char *)src;
|
|
|
|
strm.next_out = obuf;
|
|
|
|
strm.avail_in = csize;
|
|
|
|
strm.avail_out = sizeof(obuf);
|
|
|
|
if (BZ2_bzDecompressInit(&strm, 0, 0) != BZ_OK) {
|
|
|
|
cli_dbgmsg("cli_unzip: bzinit failed\n");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
while ((res = BZ2_bzDecompress(&strm)) == BZ_OK || res == BZ_STREAM_END) {
|
|
|
|
if (strm.avail_out != sizeof(obuf)) {
|
|
|
|
written += sizeof(obuf) - strm.avail_out;
|
|
|
|
if (ctx->engine->maxfilesize && written > ctx->engine->maxfilesize) {
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
cli_dbgmsg("cli_unzip: trimming output size to maxfilesize (" STDu64 ")\n", ctx->engine->maxfilesize);
|
2018-12-03 12:40:13 -05:00
|
|
|
res = BZ_STREAM_END;
|
|
|
|
break;
|
|
|
|
}
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
if (cli_writen(out_file, obuf, sizeof(obuf) - strm.avail_out) != sizeof(obuf) - strm.avail_out) {
|
|
|
|
cli_warnmsg("cli_unzip: failed to write %zu bunzipped bytes\n", sizeof(obuf) - strm.avail_out);
|
2018-12-03 12:40:13 -05:00
|
|
|
ret = CL_EWRITE;
|
|
|
|
res = 100;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
strm.next_out = obuf;
|
|
|
|
strm.avail_out = sizeof(obuf);
|
|
|
|
if (res == BZ_OK) continue; /* after returning BZ_STREAM_END once, decompress returns an error */
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
BZ2_bzDecompressEnd(&strm);
|
|
|
|
if (res == BZ_STREAM_END) res = 0;
|
|
|
|
break;
|
|
|
|
}
|
2007-12-13 19:47:07 +00:00
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
case ALG_IMPLODE: {
|
|
|
|
struct xplstate strm;
|
|
|
|
strm.next_in = (void *)src;
|
|
|
|
strm.next_out = (uint8_t *)obuf;
|
|
|
|
strm.avail_in = csize;
|
|
|
|
strm.avail_out = sizeof(obuf);
|
|
|
|
if (explode_init(&strm, flags) != EXPLODE_OK) {
|
|
|
|
cli_dbgmsg("cli_unzip: explode_init() failed\n");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
while ((res = explode(&strm)) == EXPLODE_OK) {
|
|
|
|
if (strm.avail_out != sizeof(obuf)) {
|
|
|
|
written += sizeof(obuf) - strm.avail_out;
|
|
|
|
if (ctx->engine->maxfilesize && written > ctx->engine->maxfilesize) {
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
cli_dbgmsg("cli_unzip: trimming output size to maxfilesize (" STDu64 ")\n", ctx->engine->maxfilesize);
|
2018-12-03 12:40:13 -05:00
|
|
|
res = 0;
|
|
|
|
break;
|
|
|
|
}
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
if (cli_writen(out_file, obuf, sizeof(obuf) - strm.avail_out) != sizeof(obuf) - strm.avail_out) {
|
|
|
|
cli_warnmsg("cli_unzip: failed to write %zu exploded bytes\n", sizeof(obuf) - strm.avail_out);
|
2018-12-03 12:40:13 -05:00
|
|
|
ret = CL_EWRITE;
|
|
|
|
res = 100;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
strm.next_out = (uint8_t *)obuf;
|
|
|
|
strm.avail_out = sizeof(obuf);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2008-01-01 23:09:15 +00:00
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
case ALG_LZMA:
|
|
|
|
/* easy but there's not a single sample in the zoo */
|
2007-12-13 19:47:07 +00:00
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
case ALG_SHRUNK:
|
|
|
|
case ALG_REDUCE1:
|
|
|
|
case ALG_REDUCE2:
|
|
|
|
case ALG_REDUCE3:
|
|
|
|
case ALG_REDUCE4:
|
|
|
|
case ALG_TOKENZD:
|
|
|
|
case ALG_OLDTERSE:
|
|
|
|
case ALG_RSVD1:
|
|
|
|
case ALG_RSVD2:
|
|
|
|
case ALG_RSVD3:
|
|
|
|
case ALG_RSVD4:
|
|
|
|
case ALG_RSVD5:
|
|
|
|
case ALG_NEWTERSE:
|
|
|
|
case ALG_LZ77:
|
|
|
|
case ALG_WAVPACK:
|
|
|
|
case ALG_PPMD:
|
|
|
|
cli_dbgmsg("cli_unzip: unsupported method (%d)\n", method);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
cli_dbgmsg("cli_unzip: unknown method (%d)\n", method);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!res) {
|
2019-07-11 20:20:48 -04:00
|
|
|
(*num_files_unzipped)++;
|
2018-12-03 12:40:13 -05:00
|
|
|
cli_dbgmsg("cli_unzip: extracted to %s\n", tempfile);
|
2019-07-11 20:20:48 -04:00
|
|
|
if (lseek(out_file, 0, SEEK_SET) == -1) {
|
2018-12-03 12:40:13 -05:00
|
|
|
cli_dbgmsg("cli_unzip: call to lseek() failed\n");
|
2020-03-19 21:23:54 -04:00
|
|
|
free(tempfile);
|
2019-07-11 20:20:48 -04:00
|
|
|
close(out_file);
|
2018-12-03 12:40:13 -05:00
|
|
|
return CL_ESEEK;
|
|
|
|
}
|
2022-03-09 22:26:40 -08:00
|
|
|
ret = zcb(out_file, tempfile, ctx, original_filename, decrypted);
|
2019-07-11 20:20:48 -04:00
|
|
|
close(out_file);
|
2018-12-03 12:40:13 -05:00
|
|
|
if (!ctx->engine->keeptmp)
|
|
|
|
if (cli_unlink(tempfile)) ret = CL_EUNLINK;
|
2020-03-19 21:23:54 -04:00
|
|
|
free(tempfile);
|
2018-12-03 12:40:13 -05:00
|
|
|
return ret;
|
2013-02-28 21:01:40 -05:00
|
|
|
}
|
2018-12-03 12:40:13 -05:00
|
|
|
|
2019-07-11 20:20:48 -04:00
|
|
|
close(out_file);
|
2018-12-03 12:40:13 -05:00
|
|
|
if (!ctx->engine->keeptmp)
|
|
|
|
if (cli_unlink(tempfile)) ret = CL_EUNLINK;
|
2020-03-19 21:23:54 -04:00
|
|
|
free(tempfile);
|
2018-12-03 12:40:13 -05:00
|
|
|
cli_dbgmsg("cli_unzip: extraction failed\n");
|
2007-12-13 19:47:07 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2015-07-14 18:26:22 -04:00
|
|
|
/* zip update keys, taken from zip specification */
|
|
|
|
static inline void zupdatekey(uint32_t key[3], unsigned char input)
|
|
|
|
{
|
|
|
|
unsigned char tmp[1];
|
|
|
|
|
|
|
|
tmp[0] = input;
|
|
|
|
ZIP_CRC32(key[0], key[0], tmp, 1);
|
|
|
|
|
|
|
|
key[1] = key[1] + (key[0] & 0xff);
|
|
|
|
key[1] = key[1] * 134775813 + 1;
|
|
|
|
|
|
|
|
tmp[0] = key[1] >> 24;
|
|
|
|
ZIP_CRC32(key[2], key[2], tmp, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* zip init keys */
|
2015-07-20 15:00:18 -04:00
|
|
|
static inline void zinitkey(uint32_t key[3], struct cli_pwdb *password)
|
2015-07-14 18:26:22 -04:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2018-02-21 15:00:59 -05:00
|
|
|
/* initialize keys, these are specified but the zip specification */
|
2015-07-14 18:26:22 -04:00
|
|
|
key[0] = 305419896L;
|
|
|
|
key[1] = 591751049L;
|
|
|
|
key[2] = 878082192L;
|
|
|
|
|
|
|
|
/* update keys with password */
|
|
|
|
for (i = 0; i < password->length; i++)
|
2018-12-03 12:40:13 -05:00
|
|
|
zupdatekey(key, password->passwd[i]);
|
2015-07-14 18:26:22 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* zip decrypt byte */
|
|
|
|
static inline unsigned char zdecryptbyte(uint32_t key[3])
|
|
|
|
{
|
|
|
|
unsigned short temp;
|
|
|
|
temp = key[2] | 2;
|
|
|
|
return ((temp * (temp ^ 1)) >> 8);
|
|
|
|
}
|
|
|
|
|
2019-07-11 20:20:48 -04:00
|
|
|
/**
|
|
|
|
* @brief zip decrypt.
|
|
|
|
*
|
|
|
|
* TODO - search for strong encryption header (0x0017) and handle them
|
|
|
|
*
|
|
|
|
* @param src
|
2019-07-11 22:14:39 -04:00
|
|
|
* @param csize size of compressed data; includes the decryption header
|
|
|
|
* @param usize expected size of uncompressed data
|
2019-07-11 20:20:48 -04:00
|
|
|
* @param local_header
|
2019-07-11 22:14:39 -04:00
|
|
|
* @param[in,out] num_files_unzipped current number of files that have been unzipped
|
|
|
|
* @param[in,out] ctx scan context
|
|
|
|
* @param tmpd temp directory path name
|
|
|
|
* @param zcb callback function to invoke after extraction (default: scan)
|
|
|
|
* @return cl_error_t CL_EPARSE = could not apply a password
|
2019-07-11 20:20:48 -04:00
|
|
|
*/
|
|
|
|
static inline cl_error_t zdecrypt(
|
|
|
|
const uint8_t *src,
|
|
|
|
uint32_t csize,
|
|
|
|
uint32_t usize,
|
|
|
|
const uint8_t *local_header,
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
size_t *num_files_unzipped,
|
2019-07-11 20:20:48 -04:00
|
|
|
cli_ctx *ctx,
|
|
|
|
char *tmpd,
|
2020-03-19 21:23:54 -04:00
|
|
|
zip_cb zcb,
|
|
|
|
const char *original_filename)
|
2015-07-14 18:26:22 -04:00
|
|
|
{
|
2019-05-04 15:54:54 -04:00
|
|
|
cl_error_t ret;
|
|
|
|
int v = 0;
|
|
|
|
uint32_t i;
|
2015-07-14 18:26:22 -04:00
|
|
|
uint32_t key[3];
|
2019-07-11 20:20:48 -04:00
|
|
|
uint8_t encryption_header[12]; /* encryption header buffer */
|
2015-07-20 15:00:18 -04:00
|
|
|
struct cli_pwdb *password, *pass_any, *pass_zip;
|
2015-07-14 18:26:22 -04:00
|
|
|
|
|
|
|
if (!ctx || !ctx->engine)
|
2018-12-03 12:40:13 -05:00
|
|
|
return CL_ENULLARG;
|
2015-07-14 18:26:22 -04:00
|
|
|
|
2015-07-16 12:35:38 -04:00
|
|
|
/* dconf */
|
|
|
|
if (ctx->dconf && !(ctx->dconf->archive & ARCH_CONF_PASSWD)) {
|
2018-12-03 12:40:13 -05:00
|
|
|
cli_dbgmsg("cli_unzip: decrypt - skipping encrypted file\n");
|
|
|
|
return CL_SUCCESS;
|
2015-07-16 12:35:38 -04:00
|
|
|
}
|
2015-07-14 18:26:22 -04:00
|
|
|
|
2015-07-20 15:00:18 -04:00
|
|
|
pass_any = ctx->engine->pwdbs[CLI_PWDB_ANY];
|
|
|
|
pass_zip = ctx->engine->pwdbs[CLI_PWDB_ZIP];
|
2015-07-14 18:26:22 -04:00
|
|
|
|
2015-07-20 15:00:18 -04:00
|
|
|
while (pass_any || pass_zip) {
|
2018-12-03 12:40:13 -05:00
|
|
|
password = pass_zip ? pass_zip : pass_any;
|
|
|
|
|
|
|
|
zinitkey(key, password);
|
|
|
|
|
|
|
|
/* decrypting the encryption header */
|
2019-07-11 20:20:48 -04:00
|
|
|
memcpy(encryption_header, src, SIZEOF_ENCRYPTION_HEADER);
|
2018-12-03 12:40:13 -05:00
|
|
|
|
2019-07-11 20:20:48 -04:00
|
|
|
for (i = 0; i < SIZEOF_ENCRYPTION_HEADER; i++) {
|
|
|
|
encryption_header[i] ^= zdecryptbyte(key);
|
|
|
|
zupdatekey(key, encryption_header[i]);
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
/* verify that the password is correct */
|
2019-07-11 20:20:48 -04:00
|
|
|
if (LOCAL_HEADER_version > 20) { /* higher than 2.0 */
|
|
|
|
uint16_t a = encryption_header[SIZEOF_ENCRYPTION_HEADER - 1];
|
2018-12-03 12:40:13 -05:00
|
|
|
|
2019-07-11 20:20:48 -04:00
|
|
|
if (LOCAL_HEADER_flags & F_USEDD) {
|
|
|
|
cli_dbgmsg("cli_unzip: decrypt - (v%u) >> 0x%02x 0x%x (moddate)\n", LOCAL_HEADER_version, a, LOCAL_HEADER_mtime);
|
|
|
|
if (a == ((LOCAL_HEADER_mtime >> 8) & 0xff))
|
2018-12-03 12:40:13 -05:00
|
|
|
v = 1;
|
|
|
|
} else {
|
2019-07-11 20:20:48 -04:00
|
|
|
cli_dbgmsg("cli_unzip: decrypt - (v%u) >> 0x%02x 0x%x (crc32)\n", LOCAL_HEADER_version, a, LOCAL_HEADER_crc32);
|
|
|
|
if (a == ((LOCAL_HEADER_crc32 >> 24) & 0xff))
|
2018-12-03 12:40:13 -05:00
|
|
|
v = 1;
|
|
|
|
}
|
|
|
|
} else {
|
2019-07-11 20:20:48 -04:00
|
|
|
uint16_t a = encryption_header[SIZEOF_ENCRYPTION_HEADER - 1], b = encryption_header[SIZEOF_ENCRYPTION_HEADER - 2];
|
2018-12-03 12:40:13 -05:00
|
|
|
|
2019-07-11 20:20:48 -04:00
|
|
|
if (LOCAL_HEADER_flags & F_USEDD) {
|
|
|
|
cli_dbgmsg("cli_unzip: decrypt - (v%u) >> 0x0000%02x%02x 0x%x (moddate)\n", LOCAL_HEADER_version, a, b, LOCAL_HEADER_mtime);
|
libclamav: Fix scan recursion tracking
Scan recursion is the process of identifying files embedded in other
files and then scanning them, recursively.
Internally this process is more complex than it may sound because a file
may have multiple layers of types before finding a new "file".
At present we treat the recursion count in the scanning context as an
index into both our fmap list AND our container list. These two lists
are conceptually a part of the same thing and should be unified.
But what's concerning is that the "recursion level" isn't actually
incremented or decremented at the same time that we add a layer to the
fmap or container lists but instead is more touchy-feely, increasing
when we find a new "file".
To account for this shadiness, the size of the fmap and container lists
has always been a little longer than our "max scan recursion" limit so
we don't accidentally overflow the fmap or container arrays (!).
I've implemented a single recursion-stack as an array, similar to before,
which includes a pointer to each fmap at each layer, along with the size
and type. Push and pop functions add and remove layers whenever a new
fmap is added. A boolean argument when pushing indicates if the new layer
represents a new buffer or new file (descriptor). A new buffer will reset
the "nested fmap level" (described below).
This commit also provides a solution for an issue where we detect
embedded files more than once during scan recursion.
For illustration, imagine a tarball named foo.tar.gz with this structure:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| └── baz.exe | PE | 2 | 1 |
But suppose baz.exe embeds a ZIP archive and a 7Z archive, like this:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| baz.exe | PE | 0 | 0 |
| ├── sfx.zip | ZIP | 1 | 1 |
| │  └── hello.txt | ASCII | 2 | 0 |
| └── sfx.7z | 7Z | 1 | 1 |
|   └── world.txt | ASCII | 2 | 0 |
(A) If we scan for embedded files at any layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| ├── foo.tar | TAR | 1 | 0 |
| │ ├── bar.zip | ZIP | 2 | 1 |
| │ │  └── hola.txt | ASCII | 3 | 0 |
| │ ├── baz.exe | PE | 2 | 1 |
| │ │ ├── sfx.zip | ZIP | 3 | 1 |
| │ │ │  └── hello.txt | ASCII | 4 | 0 |
| │ │ └── sfx.7z | 7Z | 3 | 1 |
| │ │   └── world.txt | ASCII | 4 | 0 |
| │ ├── sfx.zip | ZIP | 2 | 1 |
| │ │  └── hello.txt | ASCII | 3 | 0 |
| │ └── sfx.7z | 7Z | 2 | 1 |
| │  └── world.txt | ASCII | 3 | 0 |
| ├── sfx.zip | ZIP | 1 | 1 |
| └── sfx.7z | 7Z | 1 | 1 |
(A) is bad because it scans content more than once.
Note that for the GZ layer, it may detect the ZIP and 7Z if the
signature hits on the compressed data, which it might, though
extracting the ZIP and 7Z will likely fail.
The reason the above doesn't happen now is that we restrict embedded
type scans for a bunch of archive formats to include GZ and TAR.
(B) If we scan for embedded files at the foo.tar layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| ├── baz.exe | PE | 2 | 1 |
| ├── sfx.zip | ZIP | 2 | 1 |
| │  └── hello.txt | ASCII | 3 | 0 |
| └── sfx.7z | 7Z | 2 | 1 |
|   └── world.txt | ASCII | 3 | 0 |
(B) is almost right. But we can achieve it easily enough only scanning for
embedded content in the current fmap when the "nested fmap level" is 0.
The upside is that it should safely detect all embedded content, even if
it may think the sfz.zip and sfx.7z are in foo.tar instead of in baz.exe.
The biggest risk I can think of affects ZIPs. SFXZIP detection
is identical to ZIP detection, which is why we don't allow SFXZIP to be
detected if insize of a ZIP. If we only allow embedded type scanning at
fmap-layer 0 in each buffer, this will fail to detect the embedded ZIP
if the bar.exe was not compressed in foo.zip and if non-compressed files
extracted from ZIPs aren't extracted as new buffers:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.zip | ZIP | 0 | 0 |
| └── bar.exe | PE | 1 | 1 |
| └── sfx.zip | ZIP | 2 | 2 |
Provided that we ensure all files extracted from zips are scanned in
new buffers, option (B) should be safe.
(C) If we scan for embedded files at the baz.exe layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| └── baz.exe | PE | 2 | 1 |
| ├── sfx.zip | ZIP | 3 | 1 |
| │  └── hello.txt | ASCII | 4 | 0 |
| └── sfx.7z | 7Z | 3 | 1 |
|   └── world.txt | ASCII | 4 | 0 |
(C) is right. But it's harder to achieve. For this example we can get it by
restricting 7ZSFX and ZIPSFX detection only when scanning an executable.
But that may mean losing detection of archives embedded elsewhere.
And we'd have to identify allowable container types for each possible
embedded type, which would be very difficult.
So this commit aims to solve the issue the (B)-way.
Note that in all situations, we still have to scan with file typing
enabled to determine if we need to reassign the current file type, such
as re-identifying a Bzip2 archive as a DMG that happens to be Bzip2-
compressed. Detection of DMG and a handful of other types rely on
finding data partway through or near the ned of a file before
reassigning the entire file as the new type.
Other fixes and considerations in this commit:
- The utf16 HTML parser has weak error handling, particularly with respect
to creating a nested fmap for scanning the ascii decoded file.
This commit cleans up the error handling and wraps the nested scan with
the recursion-stack push()/pop() for correct recursion tracking.
Before this commit, each container layer had a flag to indicate if the
container layer is valid.
We need something similar so that the cli_recursion_stack_get_*()
functions ignore normalized layers. Details...
Imagine an LDB signature for HTML content that specifies a ZIP
container. If the signature actually alerts on the normalized HTML and
you don't ignore normalized layers for the container check, it will
appear as though the alert is in an HTML container rather than a ZIP
container.
This commit accomplishes this with a boolean you set in the scan context
before scanning a new layer. Then when the new fmap is created, it will
use that flag to set similar flag for the layer. The context flag is
reset those that anything after this doesn't have that flag.
The flag allows the new recursion_stack_get() function to ignore
normalized layers when iterating the stack to return a layer at a
requested index, negative or positive.
Scanning normalized extracted/normalized javascript and VBA should also
use the 'layer is normalized' flag.
- This commit also fixes Heuristic.Broken.Executable alert for ELF files
to make sure that:
A) these only alert if cli_append_virus() returns CL_VIRUS (aka it
respects the FP check).
B) all broken-executable alerts for ELF only happen if the
SCAN_HEURISTIC_BROKEN option is enabled.
- This commit also cleans up the error handling in cli_magic_scan_dir().
This was needed so we could correctly apply the layer-is-normalized-flag
to all VBA macros extracted to a directory when scanning the directory.
- Also fix an issue where exceeding scan maximums wouldn't cause embedded
file detection scans to abort. Granted we don't actually want to abort
if max filesize or max recursion depth are exceeded... only if max
scansize, max files, and max scantime are exceeded.
Add 'abort_scan' flag to scan context, to protect against depending on
correct error propagation for fatal conditions. Instead, setting this
flag in the scan context should guarantee that a fatal condition deep in
scan recursion isn't lost which result in more stuff being scanned
instead of aborting. This shouldn't be necessary, but some status codes
like CL_ETIMEOUT never used to be fatal and it's easier to do this than
to verify every parser only returns CL_ETIMEOUT and other "fatal
status codes" in fatal conditions.
- Remove duplicate is_tar() prototype from filestypes.c and include
is_tar.h instead.
- Presently we create the fmap hash when creating the fmap.
This wastes a bit of CPU if the hash is never needed.
Now that we're creating fmap's for all embedded files discovered with
file type recognition scans, this is a much more frequent occurence and
really slows things down.
This commit fixes the issue by only creating fmap hashes as needed.
This should not only resolve the perfomance impact of creating fmap's
for all embedded files, but also should improve performance in general.
- Add allmatch check to the zip parser after the central-header meta
match. That way we don't multiple alerts with the same match except in
allmatch mode. Clean up error handling in the zip parser a tiny bit.
- Fixes to ensure that the scan limits such as scansize, filesize,
recursion depth, # of embedded files, and scantime are always reported
if AlertExceedsMax (--alert-exceeds-max) is enabled.
- Fixed an issue where non-fatal alerts for exceeding scan maximums may
mask signature matches later on. I changed it so these alerts use the
"possibly unwanted" alert-type and thus only alert if no other alerts
were found or if all-match or heuristic-precedence are enabled.
- Added the "Heuristics.Limits.Exceeded.*" events to the JSON metadata
when the --gen-json feature is enabled. These will show up once under
"ParseErrors" the first time a limit is exceeded. In the present
implementation, only one limits-exceeded events will be added, so as to
prevent a malicious or malformed sample from filling the JSON buffer
with millions of events and using a tonne of RAM.
2021-09-11 14:15:21 -07:00
|
|
|
if ((uint32_t)(b | (a << 8)) == (LOCAL_HEADER_mtime & 0xffff))
|
2018-12-03 12:40:13 -05:00
|
|
|
v = 1;
|
|
|
|
} else {
|
2019-07-11 20:20:48 -04:00
|
|
|
cli_dbgmsg("cli_unzip: decrypt - (v%u) >> 0x0000%02x%02x 0x%x (crc32)\n", LOCAL_HEADER_version, encryption_header[SIZEOF_ENCRYPTION_HEADER - 1], encryption_header[SIZEOF_ENCRYPTION_HEADER - 2], LOCAL_HEADER_crc32);
|
libclamav: Fix scan recursion tracking
Scan recursion is the process of identifying files embedded in other
files and then scanning them, recursively.
Internally this process is more complex than it may sound because a file
may have multiple layers of types before finding a new "file".
At present we treat the recursion count in the scanning context as an
index into both our fmap list AND our container list. These two lists
are conceptually a part of the same thing and should be unified.
But what's concerning is that the "recursion level" isn't actually
incremented or decremented at the same time that we add a layer to the
fmap or container lists but instead is more touchy-feely, increasing
when we find a new "file".
To account for this shadiness, the size of the fmap and container lists
has always been a little longer than our "max scan recursion" limit so
we don't accidentally overflow the fmap or container arrays (!).
I've implemented a single recursion-stack as an array, similar to before,
which includes a pointer to each fmap at each layer, along with the size
and type. Push and pop functions add and remove layers whenever a new
fmap is added. A boolean argument when pushing indicates if the new layer
represents a new buffer or new file (descriptor). A new buffer will reset
the "nested fmap level" (described below).
This commit also provides a solution for an issue where we detect
embedded files more than once during scan recursion.
For illustration, imagine a tarball named foo.tar.gz with this structure:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| └── baz.exe | PE | 2 | 1 |
But suppose baz.exe embeds a ZIP archive and a 7Z archive, like this:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| baz.exe | PE | 0 | 0 |
| ├── sfx.zip | ZIP | 1 | 1 |
| │  └── hello.txt | ASCII | 2 | 0 |
| └── sfx.7z | 7Z | 1 | 1 |
|   └── world.txt | ASCII | 2 | 0 |
(A) If we scan for embedded files at any layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| ├── foo.tar | TAR | 1 | 0 |
| │ ├── bar.zip | ZIP | 2 | 1 |
| │ │  └── hola.txt | ASCII | 3 | 0 |
| │ ├── baz.exe | PE | 2 | 1 |
| │ │ ├── sfx.zip | ZIP | 3 | 1 |
| │ │ │  └── hello.txt | ASCII | 4 | 0 |
| │ │ └── sfx.7z | 7Z | 3 | 1 |
| │ │   └── world.txt | ASCII | 4 | 0 |
| │ ├── sfx.zip | ZIP | 2 | 1 |
| │ │  └── hello.txt | ASCII | 3 | 0 |
| │ └── sfx.7z | 7Z | 2 | 1 |
| │  └── world.txt | ASCII | 3 | 0 |
| ├── sfx.zip | ZIP | 1 | 1 |
| └── sfx.7z | 7Z | 1 | 1 |
(A) is bad because it scans content more than once.
Note that for the GZ layer, it may detect the ZIP and 7Z if the
signature hits on the compressed data, which it might, though
extracting the ZIP and 7Z will likely fail.
The reason the above doesn't happen now is that we restrict embedded
type scans for a bunch of archive formats to include GZ and TAR.
(B) If we scan for embedded files at the foo.tar layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| ├── baz.exe | PE | 2 | 1 |
| ├── sfx.zip | ZIP | 2 | 1 |
| │  └── hello.txt | ASCII | 3 | 0 |
| └── sfx.7z | 7Z | 2 | 1 |
|   └── world.txt | ASCII | 3 | 0 |
(B) is almost right. But we can achieve it easily enough only scanning for
embedded content in the current fmap when the "nested fmap level" is 0.
The upside is that it should safely detect all embedded content, even if
it may think the sfz.zip and sfx.7z are in foo.tar instead of in baz.exe.
The biggest risk I can think of affects ZIPs. SFXZIP detection
is identical to ZIP detection, which is why we don't allow SFXZIP to be
detected if insize of a ZIP. If we only allow embedded type scanning at
fmap-layer 0 in each buffer, this will fail to detect the embedded ZIP
if the bar.exe was not compressed in foo.zip and if non-compressed files
extracted from ZIPs aren't extracted as new buffers:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.zip | ZIP | 0 | 0 |
| └── bar.exe | PE | 1 | 1 |
| └── sfx.zip | ZIP | 2 | 2 |
Provided that we ensure all files extracted from zips are scanned in
new buffers, option (B) should be safe.
(C) If we scan for embedded files at the baz.exe layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| └── baz.exe | PE | 2 | 1 |
| ├── sfx.zip | ZIP | 3 | 1 |
| │  └── hello.txt | ASCII | 4 | 0 |
| └── sfx.7z | 7Z | 3 | 1 |
|   └── world.txt | ASCII | 4 | 0 |
(C) is right. But it's harder to achieve. For this example we can get it by
restricting 7ZSFX and ZIPSFX detection only when scanning an executable.
But that may mean losing detection of archives embedded elsewhere.
And we'd have to identify allowable container types for each possible
embedded type, which would be very difficult.
So this commit aims to solve the issue the (B)-way.
Note that in all situations, we still have to scan with file typing
enabled to determine if we need to reassign the current file type, such
as re-identifying a Bzip2 archive as a DMG that happens to be Bzip2-
compressed. Detection of DMG and a handful of other types rely on
finding data partway through or near the ned of a file before
reassigning the entire file as the new type.
Other fixes and considerations in this commit:
- The utf16 HTML parser has weak error handling, particularly with respect
to creating a nested fmap for scanning the ascii decoded file.
This commit cleans up the error handling and wraps the nested scan with
the recursion-stack push()/pop() for correct recursion tracking.
Before this commit, each container layer had a flag to indicate if the
container layer is valid.
We need something similar so that the cli_recursion_stack_get_*()
functions ignore normalized layers. Details...
Imagine an LDB signature for HTML content that specifies a ZIP
container. If the signature actually alerts on the normalized HTML and
you don't ignore normalized layers for the container check, it will
appear as though the alert is in an HTML container rather than a ZIP
container.
This commit accomplishes this with a boolean you set in the scan context
before scanning a new layer. Then when the new fmap is created, it will
use that flag to set similar flag for the layer. The context flag is
reset those that anything after this doesn't have that flag.
The flag allows the new recursion_stack_get() function to ignore
normalized layers when iterating the stack to return a layer at a
requested index, negative or positive.
Scanning normalized extracted/normalized javascript and VBA should also
use the 'layer is normalized' flag.
- This commit also fixes Heuristic.Broken.Executable alert for ELF files
to make sure that:
A) these only alert if cli_append_virus() returns CL_VIRUS (aka it
respects the FP check).
B) all broken-executable alerts for ELF only happen if the
SCAN_HEURISTIC_BROKEN option is enabled.
- This commit also cleans up the error handling in cli_magic_scan_dir().
This was needed so we could correctly apply the layer-is-normalized-flag
to all VBA macros extracted to a directory when scanning the directory.
- Also fix an issue where exceeding scan maximums wouldn't cause embedded
file detection scans to abort. Granted we don't actually want to abort
if max filesize or max recursion depth are exceeded... only if max
scansize, max files, and max scantime are exceeded.
Add 'abort_scan' flag to scan context, to protect against depending on
correct error propagation for fatal conditions. Instead, setting this
flag in the scan context should guarantee that a fatal condition deep in
scan recursion isn't lost which result in more stuff being scanned
instead of aborting. This shouldn't be necessary, but some status codes
like CL_ETIMEOUT never used to be fatal and it's easier to do this than
to verify every parser only returns CL_ETIMEOUT and other "fatal
status codes" in fatal conditions.
- Remove duplicate is_tar() prototype from filestypes.c and include
is_tar.h instead.
- Presently we create the fmap hash when creating the fmap.
This wastes a bit of CPU if the hash is never needed.
Now that we're creating fmap's for all embedded files discovered with
file type recognition scans, this is a much more frequent occurence and
really slows things down.
This commit fixes the issue by only creating fmap hashes as needed.
This should not only resolve the perfomance impact of creating fmap's
for all embedded files, but also should improve performance in general.
- Add allmatch check to the zip parser after the central-header meta
match. That way we don't multiple alerts with the same match except in
allmatch mode. Clean up error handling in the zip parser a tiny bit.
- Fixes to ensure that the scan limits such as scansize, filesize,
recursion depth, # of embedded files, and scantime are always reported
if AlertExceedsMax (--alert-exceeds-max) is enabled.
- Fixed an issue where non-fatal alerts for exceeding scan maximums may
mask signature matches later on. I changed it so these alerts use the
"possibly unwanted" alert-type and thus only alert if no other alerts
were found or if all-match or heuristic-precedence are enabled.
- Added the "Heuristics.Limits.Exceeded.*" events to the JSON metadata
when the --gen-json feature is enabled. These will show up once under
"ParseErrors" the first time a limit is exceeded. In the present
implementation, only one limits-exceeded events will be added, so as to
prevent a malicious or malformed sample from filling the JSON buffer
with millions of events and using a tonne of RAM.
2021-09-11 14:15:21 -07:00
|
|
|
if ((uint32_t)(b | (a << 8)) == ((LOCAL_HEADER_crc32 >> 16) & 0xffff))
|
2018-12-03 12:40:13 -05:00
|
|
|
v = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (v) {
|
|
|
|
char name[1024], obuf[BUFSIZ];
|
2019-05-04 15:54:54 -04:00
|
|
|
char *tempfile = name;
|
|
|
|
size_t written = 0, total = 0;
|
2018-12-03 12:40:13 -05:00
|
|
|
fmap_t *dcypt_map;
|
|
|
|
const uint8_t *dcypt_zip;
|
2019-07-11 20:20:48 -04:00
|
|
|
int out_file;
|
2018-12-03 12:40:13 -05:00
|
|
|
|
|
|
|
cli_dbgmsg("cli_unzip: decrypt - password [%s] matches\n", password->name);
|
|
|
|
|
|
|
|
/* output decrypted data to tempfile */
|
|
|
|
if (tmpd) {
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
snprintf(name, sizeof(name), "%s" PATHSEP "zip.decrypt.%03zu", tmpd, *num_files_unzipped);
|
2018-12-03 12:40:13 -05:00
|
|
|
name[sizeof(name) - 1] = '\0';
|
|
|
|
} else {
|
libclamav: Add engine option to toggle temp directory recursion
Temp directory recursion in ClamAV is when each layer of a scan gets its
own temp directory in the parent layer's temp directory.
In addition to temp directory recursion, ClamAV has been creating a new
subdirectory for each file scan as a risk-adverse method to ensure
no temporary file leaks fill up the disk.
Creating a directory is relatively slow on Windows in particular if
scanning a lot of very small files.
This commit:
1. Separates the temp directory recursion feature from the leave-temps
feature so that libclamav can leave temp files without making
subdirectories for each file scanned.
2. Makes it so that when temp directory recursion is off, libclamav
will just use the configure temp directory for all files.
The new option to enable temp directory recursion is for libclamav-only
at this time. It is off by default, and you can enable it like this:
```c
cl_engine_set_num(engine, CL_ENGINE_TMPDIR_RECURSION, 1);
```
For the `clamscan` and `clamd` programs, temp directory recursion will
be enabled when `--leave-temps` / `LeaveTemporaryFiles` is enabled.
The difference is that when disabled, it will return to using the
configured temp directory without making a subdirectory for each file
scanned, so as to improve scan performance for small files, mostly on
Windows.
Under the hood, this commit also:
1. Cleans up how we keep track of tmpdirs for each layer.
The goal here is to align how we keep track of layer-specific stuff
using the scan_layer structure.
2. Cleans up how we record metadata JSON for embedded files.
Note: Embedded files being different from Contained files, as they
are extracted not with a parser, but by finding them with
file type magic signatures.
CLAM-1583
2025-06-09 20:42:31 -04:00
|
|
|
if (!(tempfile = cli_gentemp_with_prefix(ctx->this_layer_tmpdir, "zip-decrypt"))) return CL_EMEM;
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
2019-07-11 20:20:48 -04:00
|
|
|
if ((out_file = open(tempfile, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR)) == -1) {
|
2018-12-03 12:40:13 -05:00
|
|
|
cli_warnmsg("cli_unzip: decrypt - failed to create temporary file %s\n", tempfile);
|
|
|
|
if (!tmpd) free(tempfile);
|
|
|
|
return CL_ETMPFILE;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 12; i < csize; i++) {
|
|
|
|
obuf[written] = src[i] ^ zdecryptbyte(key);
|
|
|
|
zupdatekey(key, obuf[written]);
|
|
|
|
|
|
|
|
written++;
|
|
|
|
if (written >= BUFSIZ) {
|
2019-07-11 20:20:48 -04:00
|
|
|
if (cli_writen(out_file, obuf, written) != written) {
|
2018-12-03 12:40:13 -05:00
|
|
|
ret = CL_EWRITE;
|
|
|
|
goto zd_clean;
|
|
|
|
}
|
|
|
|
total += written;
|
|
|
|
written = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (written) {
|
2019-07-11 20:20:48 -04:00
|
|
|
if (cli_writen(out_file, obuf, written) != written) {
|
2018-12-03 12:40:13 -05:00
|
|
|
ret = CL_EWRITE;
|
|
|
|
goto zd_clean;
|
|
|
|
}
|
|
|
|
total += written;
|
|
|
|
written = 0;
|
|
|
|
}
|
|
|
|
|
2019-05-04 15:54:54 -04:00
|
|
|
cli_dbgmsg("cli_unzip: decrypt - decrypted %zu bytes to %s\n", total, tempfile);
|
2018-12-03 12:40:13 -05:00
|
|
|
|
|
|
|
/* decrypt data to new fmap -> buffer */
|
2025-06-08 01:12:33 -04:00
|
|
|
if (!(dcypt_map = fmap_new(out_file, 0, total, NULL, tempfile))) {
|
2018-12-03 12:40:13 -05:00
|
|
|
cli_warnmsg("cli_unzip: decrypt - failed to create fmap on decrypted file %s\n", tempfile);
|
|
|
|
ret = CL_EMAP;
|
|
|
|
goto zd_clean;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!(dcypt_zip = fmap_need_off_once(dcypt_map, 0, total))) {
|
|
|
|
cli_warnmsg("cli_unzip: decrypt - failed to acquire buffer on decrypted file %s\n", tempfile);
|
2025-06-08 01:12:33 -04:00
|
|
|
fmap_free(dcypt_map);
|
2018-12-03 12:40:13 -05:00
|
|
|
ret = CL_EREAD;
|
|
|
|
goto zd_clean;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* call unz on decrypted output */
|
2022-03-09 22:26:40 -08:00
|
|
|
ret = unz(dcypt_zip, csize - SIZEOF_ENCRYPTION_HEADER, usize, LOCAL_HEADER_method, LOCAL_HEADER_flags,
|
|
|
|
num_files_unzipped, ctx, tmpd, zcb, original_filename, true);
|
2018-12-03 12:40:13 -05:00
|
|
|
|
|
|
|
/* clean-up and return */
|
2025-06-08 01:12:33 -04:00
|
|
|
fmap_free(dcypt_map);
|
2018-12-03 12:40:13 -05:00
|
|
|
zd_clean:
|
2019-07-11 20:20:48 -04:00
|
|
|
close(out_file);
|
2018-12-03 12:40:13 -05:00
|
|
|
if (!ctx->engine->keeptmp)
|
2015-08-17 12:17:42 -04:00
|
|
|
if (cli_unlink(tempfile)) {
|
2018-12-03 12:40:13 -05:00
|
|
|
if (!tmpd) free(tempfile);
|
|
|
|
return CL_EUNLINK;
|
|
|
|
}
|
2015-07-14 18:26:22 -04:00
|
|
|
if (!tmpd) free(tempfile);
|
2018-12-03 12:40:13 -05:00
|
|
|
return ret;
|
|
|
|
}
|
2015-07-14 18:26:22 -04:00
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
if (pass_zip)
|
|
|
|
pass_zip = pass_zip->next;
|
|
|
|
else
|
|
|
|
pass_any = pass_any->next;
|
2015-07-14 18:26:22 -04:00
|
|
|
}
|
|
|
|
|
2025-03-22 22:41:29 -04:00
|
|
|
cli_dbgmsg("cli_unzip: decrypt failed - will attempt to unzip as if it were not encrypted\n");
|
|
|
|
|
|
|
|
ret = unz(src, csize, usize, LOCAL_HEADER_method, LOCAL_HEADER_flags,
|
|
|
|
num_files_unzipped, ctx, tmpd, zcb, original_filename, false);
|
|
|
|
|
2015-07-14 18:26:22 -04:00
|
|
|
return CL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
2019-07-11 20:20:48 -04:00
|
|
|
/**
|
|
|
|
* @brief Parse, extract, and scan a file using the local file header.
|
|
|
|
*
|
2019-08-30 14:48:57 -04:00
|
|
|
* Usage of the `record` parameter will alter behavior so it only collect file record metadata and does not extract or scan any files.
|
|
|
|
*
|
2025-08-14 21:17:46 -04:00
|
|
|
* @param[in,out] ctx scan context
|
2019-07-11 22:14:39 -04:00
|
|
|
* @param loff offset of the local file header
|
|
|
|
* @param[in,out] num_files_unzipped current number of files that have been unzipped
|
|
|
|
* @param file_count current number of files that have been discovered
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
* @param central_header pointer to central directory header
|
2019-07-11 22:14:39 -04:00
|
|
|
* @param tmpd temp directory path name
|
|
|
|
* @param detect_encrypted bool: if encrypted files should raise heuristic alert
|
|
|
|
* @param zcb callback function to invoke after extraction (default: scan)
|
2019-08-30 14:48:57 -04:00
|
|
|
* @param record (optional) a pointer to a struct to store file record information.
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
* @param[out] file_record_size (optional) if not NULL, will be set to the size of the file header + file data.
|
|
|
|
* @return cl_error_t CL_SUCCESS on success, or an error code on failure.
|
2019-07-11 20:20:48 -04:00
|
|
|
*/
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
static cl_error_t parse_local_file_header(
|
2025-08-14 21:17:46 -04:00
|
|
|
cli_ctx *ctx,
|
2019-07-11 20:20:48 -04:00
|
|
|
uint32_t loff,
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
size_t *num_files_unzipped,
|
|
|
|
size_t file_count,
|
|
|
|
const uint8_t *central_header,
|
2019-07-11 20:20:48 -04:00
|
|
|
char *tmpd,
|
|
|
|
int detect_encrypted,
|
2019-07-11 22:14:39 -04:00
|
|
|
zip_cb zcb,
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
struct zip_record *record,
|
|
|
|
size_t *file_record_size)
|
2018-12-03 12:40:13 -05:00
|
|
|
{
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
cl_error_t status = CL_ERROR;
|
|
|
|
cl_error_t ret;
|
2025-08-14 21:17:46 -04:00
|
|
|
const uint8_t *local_header = NULL;
|
|
|
|
char name[256] = {0};
|
|
|
|
char *original_filename = NULL;
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
uint32_t csize = 0, usize = 0;
|
|
|
|
|
|
|
|
uint32_t name_size = 0;
|
|
|
|
const char *src = NULL;
|
2018-12-03 12:40:13 -05:00
|
|
|
|
2025-08-14 21:17:46 -04:00
|
|
|
const uint8_t *zip = NULL;
|
|
|
|
size_t bytes_remaining = 0;
|
|
|
|
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
if (NULL != file_record_size) {
|
|
|
|
*file_record_size = 0;
|
|
|
|
}
|
2023-08-23 17:41:40 -07:00
|
|
|
|
2025-08-14 21:17:46 -04:00
|
|
|
local_header = fmap_need_off(ctx->fmap, loff, SIZEOF_LOCAL_HEADER);
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
if (NULL == local_header) {
|
|
|
|
cli_dbgmsg("cli_unzip: local header - out of file or work complete\n");
|
|
|
|
status = CL_EPARSE;
|
2020-04-14 11:52:12 -04:00
|
|
|
goto done;
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
2019-07-11 20:20:48 -04:00
|
|
|
if (LOCAL_HEADER_magic != ZIP_MAGIC_LOCAL_FILE_HEADER) {
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
cli_dbgmsg("cli_unzip: local header - bad magic\n");
|
|
|
|
status = CL_EFORMAT;
|
2020-04-14 11:52:12 -04:00
|
|
|
goto done;
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
2025-08-14 21:17:46 -04:00
|
|
|
bytes_remaining = ctx->fmap->len - loff;
|
2018-12-03 12:40:13 -05:00
|
|
|
|
2019-07-11 20:20:48 -04:00
|
|
|
zip = local_header + SIZEOF_LOCAL_HEADER;
|
2025-08-14 21:17:46 -04:00
|
|
|
bytes_remaining -= SIZEOF_LOCAL_HEADER;
|
2018-12-03 12:40:13 -05:00
|
|
|
|
2025-08-14 21:17:46 -04:00
|
|
|
if (bytes_remaining <= LOCAL_HEADER_flen) {
|
2019-07-11 20:20:48 -04:00
|
|
|
cli_dbgmsg("cli_unzip: local header - fname out of file\n");
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
status = CL_EPARSE;
|
2020-04-14 11:52:12 -04:00
|
|
|
goto done;
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
2023-08-23 17:41:40 -07:00
|
|
|
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
name_size = LOCAL_HEADER_flen >= (sizeof(name) - 1) ? sizeof(name) - 1 : LOCAL_HEADER_flen;
|
|
|
|
cli_dbgmsg("cli_unzip: name_size %u\n", name_size);
|
2025-08-14 21:17:46 -04:00
|
|
|
src = fmap_need_ptr_once(ctx->fmap, zip, name_size);
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
if (name_size && (NULL != src)) {
|
|
|
|
memcpy(name, zip, name_size);
|
2025-08-11 18:02:09 -04:00
|
|
|
if (CL_SUCCESS != cli_basename(name, name_size, &original_filename, true /* posix_support_backslash_pathsep */)) {
|
2023-08-23 17:41:40 -07:00
|
|
|
original_filename = NULL;
|
|
|
|
}
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
2023-08-23 17:41:40 -07:00
|
|
|
|
2019-07-11 20:20:48 -04:00
|
|
|
zip += LOCAL_HEADER_flen;
|
2025-08-14 21:17:46 -04:00
|
|
|
bytes_remaining -= LOCAL_HEADER_flen;
|
2018-12-03 12:40:13 -05:00
|
|
|
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
/* Print ZMD container metadata signature and try matching the metadata AFTER we have all the metadata. */
|
|
|
|
cli_dbgmsg("cli_unzip: local header - ZMDNAME:%d:%s:%u:%u:%x:%u:%zu:%u\n",
|
libclamav: Fix scan recursion tracking
Scan recursion is the process of identifying files embedded in other
files and then scanning them, recursively.
Internally this process is more complex than it may sound because a file
may have multiple layers of types before finding a new "file".
At present we treat the recursion count in the scanning context as an
index into both our fmap list AND our container list. These two lists
are conceptually a part of the same thing and should be unified.
But what's concerning is that the "recursion level" isn't actually
incremented or decremented at the same time that we add a layer to the
fmap or container lists but instead is more touchy-feely, increasing
when we find a new "file".
To account for this shadiness, the size of the fmap and container lists
has always been a little longer than our "max scan recursion" limit so
we don't accidentally overflow the fmap or container arrays (!).
I've implemented a single recursion-stack as an array, similar to before,
which includes a pointer to each fmap at each layer, along with the size
and type. Push and pop functions add and remove layers whenever a new
fmap is added. A boolean argument when pushing indicates if the new layer
represents a new buffer or new file (descriptor). A new buffer will reset
the "nested fmap level" (described below).
This commit also provides a solution for an issue where we detect
embedded files more than once during scan recursion.
For illustration, imagine a tarball named foo.tar.gz with this structure:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| └── baz.exe | PE | 2 | 1 |
But suppose baz.exe embeds a ZIP archive and a 7Z archive, like this:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| baz.exe | PE | 0 | 0 |
| ├── sfx.zip | ZIP | 1 | 1 |
| │  └── hello.txt | ASCII | 2 | 0 |
| └── sfx.7z | 7Z | 1 | 1 |
|   └── world.txt | ASCII | 2 | 0 |
(A) If we scan for embedded files at any layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| ├── foo.tar | TAR | 1 | 0 |
| │ ├── bar.zip | ZIP | 2 | 1 |
| │ │  └── hola.txt | ASCII | 3 | 0 |
| │ ├── baz.exe | PE | 2 | 1 |
| │ │ ├── sfx.zip | ZIP | 3 | 1 |
| │ │ │  └── hello.txt | ASCII | 4 | 0 |
| │ │ └── sfx.7z | 7Z | 3 | 1 |
| │ │   └── world.txt | ASCII | 4 | 0 |
| │ ├── sfx.zip | ZIP | 2 | 1 |
| │ │  └── hello.txt | ASCII | 3 | 0 |
| │ └── sfx.7z | 7Z | 2 | 1 |
| │  └── world.txt | ASCII | 3 | 0 |
| ├── sfx.zip | ZIP | 1 | 1 |
| └── sfx.7z | 7Z | 1 | 1 |
(A) is bad because it scans content more than once.
Note that for the GZ layer, it may detect the ZIP and 7Z if the
signature hits on the compressed data, which it might, though
extracting the ZIP and 7Z will likely fail.
The reason the above doesn't happen now is that we restrict embedded
type scans for a bunch of archive formats to include GZ and TAR.
(B) If we scan for embedded files at the foo.tar layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| ├── baz.exe | PE | 2 | 1 |
| ├── sfx.zip | ZIP | 2 | 1 |
| │  └── hello.txt | ASCII | 3 | 0 |
| └── sfx.7z | 7Z | 2 | 1 |
|   └── world.txt | ASCII | 3 | 0 |
(B) is almost right. But we can achieve it easily enough only scanning for
embedded content in the current fmap when the "nested fmap level" is 0.
The upside is that it should safely detect all embedded content, even if
it may think the sfz.zip and sfx.7z are in foo.tar instead of in baz.exe.
The biggest risk I can think of affects ZIPs. SFXZIP detection
is identical to ZIP detection, which is why we don't allow SFXZIP to be
detected if insize of a ZIP. If we only allow embedded type scanning at
fmap-layer 0 in each buffer, this will fail to detect the embedded ZIP
if the bar.exe was not compressed in foo.zip and if non-compressed files
extracted from ZIPs aren't extracted as new buffers:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.zip | ZIP | 0 | 0 |
| └── bar.exe | PE | 1 | 1 |
| └── sfx.zip | ZIP | 2 | 2 |
Provided that we ensure all files extracted from zips are scanned in
new buffers, option (B) should be safe.
(C) If we scan for embedded files at the baz.exe layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| └── baz.exe | PE | 2 | 1 |
| ├── sfx.zip | ZIP | 3 | 1 |
| │  └── hello.txt | ASCII | 4 | 0 |
| └── sfx.7z | 7Z | 3 | 1 |
|   └── world.txt | ASCII | 4 | 0 |
(C) is right. But it's harder to achieve. For this example we can get it by
restricting 7ZSFX and ZIPSFX detection only when scanning an executable.
But that may mean losing detection of archives embedded elsewhere.
And we'd have to identify allowable container types for each possible
embedded type, which would be very difficult.
So this commit aims to solve the issue the (B)-way.
Note that in all situations, we still have to scan with file typing
enabled to determine if we need to reassign the current file type, such
as re-identifying a Bzip2 archive as a DMG that happens to be Bzip2-
compressed. Detection of DMG and a handful of other types rely on
finding data partway through or near the ned of a file before
reassigning the entire file as the new type.
Other fixes and considerations in this commit:
- The utf16 HTML parser has weak error handling, particularly with respect
to creating a nested fmap for scanning the ascii decoded file.
This commit cleans up the error handling and wraps the nested scan with
the recursion-stack push()/pop() for correct recursion tracking.
Before this commit, each container layer had a flag to indicate if the
container layer is valid.
We need something similar so that the cli_recursion_stack_get_*()
functions ignore normalized layers. Details...
Imagine an LDB signature for HTML content that specifies a ZIP
container. If the signature actually alerts on the normalized HTML and
you don't ignore normalized layers for the container check, it will
appear as though the alert is in an HTML container rather than a ZIP
container.
This commit accomplishes this with a boolean you set in the scan context
before scanning a new layer. Then when the new fmap is created, it will
use that flag to set similar flag for the layer. The context flag is
reset those that anything after this doesn't have that flag.
The flag allows the new recursion_stack_get() function to ignore
normalized layers when iterating the stack to return a layer at a
requested index, negative or positive.
Scanning normalized extracted/normalized javascript and VBA should also
use the 'layer is normalized' flag.
- This commit also fixes Heuristic.Broken.Executable alert for ELF files
to make sure that:
A) these only alert if cli_append_virus() returns CL_VIRUS (aka it
respects the FP check).
B) all broken-executable alerts for ELF only happen if the
SCAN_HEURISTIC_BROKEN option is enabled.
- This commit also cleans up the error handling in cli_magic_scan_dir().
This was needed so we could correctly apply the layer-is-normalized-flag
to all VBA macros extracted to a directory when scanning the directory.
- Also fix an issue where exceeding scan maximums wouldn't cause embedded
file detection scans to abort. Granted we don't actually want to abort
if max filesize or max recursion depth are exceeded... only if max
scansize, max files, and max scantime are exceeded.
Add 'abort_scan' flag to scan context, to protect against depending on
correct error propagation for fatal conditions. Instead, setting this
flag in the scan context should guarantee that a fatal condition deep in
scan recursion isn't lost which result in more stuff being scanned
instead of aborting. This shouldn't be necessary, but some status codes
like CL_ETIMEOUT never used to be fatal and it's easier to do this than
to verify every parser only returns CL_ETIMEOUT and other "fatal
status codes" in fatal conditions.
- Remove duplicate is_tar() prototype from filestypes.c and include
is_tar.h instead.
- Presently we create the fmap hash when creating the fmap.
This wastes a bit of CPU if the hash is never needed.
Now that we're creating fmap's for all embedded files discovered with
file type recognition scans, this is a much more frequent occurence and
really slows things down.
This commit fixes the issue by only creating fmap hashes as needed.
This should not only resolve the perfomance impact of creating fmap's
for all embedded files, but also should improve performance in general.
- Add allmatch check to the zip parser after the central-header meta
match. That way we don't multiple alerts with the same match except in
allmatch mode. Clean up error handling in the zip parser a tiny bit.
- Fixes to ensure that the scan limits such as scansize, filesize,
recursion depth, # of embedded files, and scantime are always reported
if AlertExceedsMax (--alert-exceeds-max) is enabled.
- Fixed an issue where non-fatal alerts for exceeding scan maximums may
mask signature matches later on. I changed it so these alerts use the
"possibly unwanted" alert-type and thus only alert if no other alerts
were found or if all-match or heuristic-precedence are enabled.
- Added the "Heuristics.Limits.Exceeded.*" events to the JSON metadata
when the --gen-json feature is enabled. These will show up once under
"ParseErrors" the first time a limit is exceeded. In the present
implementation, only one limits-exceeded events will be added, so as to
prevent a malicious or malformed sample from filling the JSON buffer
with millions of events and using a tonne of RAM.
2021-09-11 14:15:21 -07:00
|
|
|
((LOCAL_HEADER_flags & F_ENCR) != 0), name, LOCAL_HEADER_usize, LOCAL_HEADER_csize, LOCAL_HEADER_crc32, LOCAL_HEADER_method, file_count, ctx->recursion_level);
|
2018-12-03 12:40:13 -05:00
|
|
|
/* ZMDfmt virname:encrypted(0-1):filename(exact|*):usize(exact|*):csize(exact|*):crc32(exact|*):method(exact|*):fileno(exact|*):maxdepth(exact|*) */
|
|
|
|
|
2019-07-11 20:20:48 -04:00
|
|
|
/* Scan file header metadata. */
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
ret = cli_matchmeta(ctx, name, LOCAL_HEADER_csize, LOCAL_HEADER_usize, (LOCAL_HEADER_flags & F_ENCR) != 0, file_count, LOCAL_HEADER_crc32);
|
|
|
|
if (ret != CL_SUCCESS) {
|
|
|
|
status = ret;
|
2022-08-27 10:30:51 -07:00
|
|
|
goto done;
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
|
|
|
|
2019-07-11 20:20:48 -04:00
|
|
|
if (LOCAL_HEADER_flags & F_MSKED) {
|
|
|
|
cli_dbgmsg("cli_unzip: local header - header has got unusable masked data\n");
|
2018-12-03 12:40:13 -05:00
|
|
|
/* FIXME: need to find/craft a sample */
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
status = CL_EPARSE;
|
2020-04-14 11:52:12 -04:00
|
|
|
goto done;
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
|
|
|
|
2019-07-11 20:20:48 -04:00
|
|
|
if (detect_encrypted && (LOCAL_HEADER_flags & F_ENCR) && SCAN_HEURISTIC_ENCRYPTED_ARCHIVE) {
|
2018-12-03 12:40:13 -05:00
|
|
|
cli_dbgmsg("cli_unzip: Encrypted files found in archive.\n");
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
ret = cli_append_potentially_unwanted(ctx, "Heuristics.Encrypted.Zip");
|
|
|
|
if (ret != CL_SUCCESS) {
|
|
|
|
status = ret;
|
2020-04-14 11:52:12 -04:00
|
|
|
goto done;
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-07-11 20:20:48 -04:00
|
|
|
if (LOCAL_HEADER_flags & F_USEDD) {
|
|
|
|
cli_dbgmsg("cli_unzip: local header - has data desc\n");
|
|
|
|
if (!central_header) {
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
status = CL_EPARSE;
|
2020-04-14 11:52:12 -04:00
|
|
|
goto done;
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
|
|
|
|
usize = CENTRAL_HEADER_usize;
|
|
|
|
csize = CENTRAL_HEADER_csize;
|
2018-12-03 12:40:13 -05:00
|
|
|
} else {
|
2019-07-11 20:20:48 -04:00
|
|
|
usize = LOCAL_HEADER_usize;
|
|
|
|
csize = LOCAL_HEADER_csize;
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
|
|
|
|
2025-08-14 21:17:46 -04:00
|
|
|
if (bytes_remaining <= LOCAL_HEADER_elen) {
|
2019-07-11 20:20:48 -04:00
|
|
|
cli_dbgmsg("cli_unzip: local header - extra out of file\n");
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
status = CL_EPARSE;
|
2020-04-14 11:52:12 -04:00
|
|
|
goto done;
|
2016-06-08 16:25:34 -04:00
|
|
|
}
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
|
2019-07-11 20:20:48 -04:00
|
|
|
zip += LOCAL_HEADER_elen;
|
2025-08-14 21:17:46 -04:00
|
|
|
bytes_remaining -= LOCAL_HEADER_elen;
|
2018-12-03 12:40:13 -05:00
|
|
|
|
2025-10-12 19:12:06 -04:00
|
|
|
if (bytes_remaining < csize) {
|
|
|
|
cli_dbgmsg("cli_unzip: local header - stream out of file\n");
|
|
|
|
status = CL_EPARSE;
|
|
|
|
goto done;
|
|
|
|
}
|
2019-07-11 22:14:39 -04:00
|
|
|
|
2025-10-12 19:12:06 -04:00
|
|
|
if (NULL != record) {
|
2019-08-30 14:48:57 -04:00
|
|
|
/* Don't actually unzip if we're just collecting the file record information (offset, sizes) */
|
2025-10-12 19:12:06 -04:00
|
|
|
if (NULL == original_filename) {
|
|
|
|
record->original_filename = NULL;
|
|
|
|
} else {
|
|
|
|
record->original_filename = CLI_STRNDUP(original_filename, strlen(original_filename));
|
|
|
|
}
|
|
|
|
record->local_header_offset = loff;
|
|
|
|
record->local_header_size = zip - local_header;
|
|
|
|
record->compressed_size = csize;
|
|
|
|
record->uncompressed_size = usize;
|
|
|
|
record->method = LOCAL_HEADER_method;
|
|
|
|
record->flags = LOCAL_HEADER_flags;
|
|
|
|
record->encrypted = (LOCAL_HEADER_flags & F_ENCR) ? 1 : 0;
|
|
|
|
|
|
|
|
status = CL_SUCCESS;
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Unzip or decompress & then unzip.
|
|
|
|
*/
|
|
|
|
if (!csize) { /* FIXME: what's used for method0 files? csize or usize? Nothing in the specs, needs testing */
|
|
|
|
cli_dbgmsg("cli_unzip: local header - skipping empty file\n");
|
|
|
|
} else {
|
2025-08-14 21:17:46 -04:00
|
|
|
zip = fmap_need_ptr_once(ctx->fmap, zip, csize);
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
if (NULL == zip) {
|
|
|
|
cli_dbgmsg("cli_unzip: local header - data out of file\n");
|
|
|
|
status = CL_EPARSE;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
2019-08-30 14:48:57 -04:00
|
|
|
if (LOCAL_HEADER_flags & F_ENCR) {
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
ret = zdecrypt(zip, csize, usize, local_header, num_files_unzipped, ctx, tmpd, zcb, original_filename);
|
|
|
|
if (ret != CL_SUCCESS) {
|
|
|
|
cli_dbgmsg("cli_unzip: local header - zdecrypt failed with %d\n", ret);
|
|
|
|
status = ret;
|
|
|
|
goto done;
|
|
|
|
}
|
2019-08-30 14:48:57 -04:00
|
|
|
} else {
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
ret = unz(zip, csize, usize, LOCAL_HEADER_method, LOCAL_HEADER_flags, num_files_unzipped,
|
|
|
|
ctx, tmpd, zcb, original_filename, false);
|
|
|
|
if (ret != CL_SUCCESS) {
|
|
|
|
cli_dbgmsg("cli_unzip: local header - unz failed with %d\n", ret);
|
|
|
|
status = ret;
|
|
|
|
goto done;
|
|
|
|
}
|
2019-08-30 14:48:57 -04:00
|
|
|
}
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-10-12 19:12:06 -04:00
|
|
|
zip += csize;
|
|
|
|
bytes_remaining -= csize;
|
|
|
|
|
2019-07-11 20:20:48 -04:00
|
|
|
if (LOCAL_HEADER_flags & F_USEDD) {
|
2025-08-14 21:17:46 -04:00
|
|
|
if (bytes_remaining < 12) {
|
2019-07-11 20:20:48 -04:00
|
|
|
cli_dbgmsg("cli_unzip: local header - data desc out of file\n");
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
status = CL_EPARSE;
|
2020-04-14 11:52:12 -04:00
|
|
|
goto done;
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
2025-08-14 21:17:46 -04:00
|
|
|
bytes_remaining -= 12;
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Get the next 4 bytes to check if ZIP is split or spanned.
|
|
|
|
*
|
|
|
|
* 8.5.3 Spanned/Split archives created using PKZIP for Windows
|
|
|
|
* (V2.50 or greater), PKZIP Command Line (V2.50 or greater),
|
|
|
|
* or PKZIP Explorer will include a special spanning
|
|
|
|
* signature as the first 4 bytes of the first segment of
|
|
|
|
* the archive. This signature (0x08074b50) will be
|
|
|
|
* followed immediately by the local header signature for
|
|
|
|
* the first file in the archive.
|
|
|
|
*/
|
2025-08-14 21:17:46 -04:00
|
|
|
zip = fmap_need_ptr_once(ctx->fmap, zip, 4);
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
if (NULL == zip) {
|
|
|
|
cli_dbgmsg("cli_unzip: local header - data desc out of file\n");
|
|
|
|
status = CL_EPARSE;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (cli_readint32(zip) == ZIP_MAGIC_FILE_BEGIN_SPLIT_OR_SPANNED) {
|
|
|
|
cli_dbgmsg("cli_unzip: local header - split/spanned archive detected\n");
|
|
|
|
/* skip the split/spanned signature */
|
|
|
|
zip += 4;
|
2025-08-14 21:17:46 -04:00
|
|
|
bytes_remaining -= 4;
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
|
|
|
zip += 12;
|
|
|
|
}
|
2020-04-14 11:52:12 -04:00
|
|
|
|
|
|
|
/* Success */
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
if (file_record_size) {
|
|
|
|
*file_record_size = zip - local_header;
|
|
|
|
}
|
|
|
|
status = CL_SUCCESS;
|
2020-04-14 11:52:12 -04:00
|
|
|
|
|
|
|
done:
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
if (NULL != local_header) {
|
2025-08-14 21:17:46 -04:00
|
|
|
fmap_unneed_off(ctx->fmap, loff, SIZEOF_LOCAL_HEADER);
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
}
|
|
|
|
|
2020-04-14 11:52:12 -04:00
|
|
|
if (NULL != original_filename) {
|
|
|
|
free(original_filename);
|
|
|
|
}
|
|
|
|
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
return status;
|
2007-12-13 19:47:07 +00:00
|
|
|
}
|
|
|
|
|
Reduce unnecessary scanning of embedded file FPs (#1571)
When embedded file type recognition finds a possible embedded file, it
is being scanned as a new embedded file even if it turns out it was a
false positive and parsing fails. My solution is to pre-parse the file
headers as little possible to determine if it is valid. If possible,
also determine the file size based on the headers. That will make it so
we don't have to scan additional data when the embedded file is not at
the very end.
This commit adds header checks prior to embedded ZIP, ARJ, and CAB
scanning. For these types I was also able to use the header checks to
determine the object size so as to prevent excessive pattern matching.
TODO: Add the same for RAR, EGG, 7Z, NULSFT, AUTOIT, IShield, and PDF.
This commit also removes duplicate matching for embedded MSEXE.
The embedded MSEXE detection and scanning logic was accidentally
creating an extra duplicate layer in between scanning and detection
because of the logic within the `cli_scanembpe()` function.
That function was effectively doing the header check which this commit
adds for ZIP, ARJ, and CAB but minus the size check.
Note: It is unfortunately not possible to get an accurage size from PE
file headers.
The `cli_scanembpe()` function also used to dump to a temp file for no
reason since FMAPs were extended to support windows into other FMAPs.
So this commit removes the intermediate layer as well as dropping a temp
file for each embedded PE file.
Further, this commit adds configuration and DCONF safeguards around all
embedded file type scanning.
Finally, this commit adds a set of tests to validate proper extraction
of embedded ZIP, ARJ, CAB, and MSEXE files.
CLAM-2862
Co-authored-by: TheRaynMan <draynor@sourcefire.com>
2025-09-23 15:57:28 -04:00
|
|
|
cl_error_t cli_unzip_single_header_check(
|
|
|
|
cli_ctx *ctx,
|
|
|
|
uint32_t offset,
|
|
|
|
size_t *size)
|
|
|
|
{
|
|
|
|
cl_error_t status = CL_ERROR;
|
|
|
|
struct zip_record file_record = {0};
|
|
|
|
cl_error_t ret;
|
|
|
|
|
|
|
|
ret = parse_local_file_header(
|
|
|
|
ctx,
|
|
|
|
offset,
|
|
|
|
NULL, /* num_files_unzipped */
|
|
|
|
0, /* file_count */
|
|
|
|
NULL, /* central_header */
|
|
|
|
NULL, /* tmpd */
|
|
|
|
false, /* detect_encrypted */
|
|
|
|
NULL, /* zcb */
|
|
|
|
&file_record,
|
|
|
|
size);
|
|
|
|
if (ret != CL_SUCCESS) {
|
|
|
|
cli_dbgmsg("cli_unzip: single header check - failed to parse local file header: %s (%d)\n", cl_strerror(ret), ret);
|
|
|
|
status = ret;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (file_record.compressed_size == 0 || file_record.uncompressed_size == 0) {
|
|
|
|
cli_dbgmsg("cli_unzip: single header check - empty file\n");
|
|
|
|
status = CL_EFORMAT;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
status = CL_SUCCESS;
|
|
|
|
|
|
|
|
done:
|
|
|
|
if (file_record.original_filename) {
|
|
|
|
free(file_record.original_filename);
|
|
|
|
}
|
|
|
|
|
|
|
|
return status;
|
|
|
|
}
|
|
|
|
|
2019-07-11 20:20:48 -04:00
|
|
|
/**
|
|
|
|
* @brief Parse, extract, and scan a file by iterating the central directory.
|
|
|
|
*
|
2019-08-30 14:48:57 -04:00
|
|
|
* Usage of the `record` parameter will alter behavior so it only collect file record metadata and does not extract or scan any files.
|
|
|
|
*
|
2025-08-14 21:17:46 -04:00
|
|
|
* @param[in,out] ctx scan context
|
|
|
|
* @param central_file_header_offset offset of the file header in the central directory
|
2019-07-11 22:14:39 -04:00
|
|
|
* @param[in,out] num_files_unzipped current number of files that have been unzipped
|
|
|
|
* @param file_count current number of files that have been discovered
|
|
|
|
* @param tmpd temp directory path name
|
|
|
|
* @param requests (optional) structure use to search the zip for files by name
|
2019-08-30 14:48:57 -04:00
|
|
|
* @param record (optional) a pointer to a struct to store file record information.
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
* @param[out] file_record_size A pointer to a variable to store the size of the file record.
|
|
|
|
* @return cl_error_t CL_SUCCESS on success, or an error code on failure.
|
2019-07-11 20:20:48 -04:00
|
|
|
*/
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
static cl_error_t parse_central_directory_file_header(
|
2025-08-14 21:17:46 -04:00
|
|
|
cli_ctx *ctx,
|
|
|
|
size_t central_file_header_offset,
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
size_t *num_files_unzipped,
|
|
|
|
size_t file_count,
|
2019-07-11 20:20:48 -04:00
|
|
|
char *tmpd,
|
2019-07-11 22:14:39 -04:00
|
|
|
struct zip_requests *requests,
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
struct zip_record *record,
|
|
|
|
size_t *file_record_size)
|
2018-12-03 12:40:13 -05:00
|
|
|
{
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
cl_error_t status = CL_ERROR;
|
|
|
|
cl_error_t ret;
|
|
|
|
|
|
|
|
char name[256] = {0};
|
|
|
|
|
libclamav: Fix scan recursion tracking
Scan recursion is the process of identifying files embedded in other
files and then scanning them, recursively.
Internally this process is more complex than it may sound because a file
may have multiple layers of types before finding a new "file".
At present we treat the recursion count in the scanning context as an
index into both our fmap list AND our container list. These two lists
are conceptually a part of the same thing and should be unified.
But what's concerning is that the "recursion level" isn't actually
incremented or decremented at the same time that we add a layer to the
fmap or container lists but instead is more touchy-feely, increasing
when we find a new "file".
To account for this shadiness, the size of the fmap and container lists
has always been a little longer than our "max scan recursion" limit so
we don't accidentally overflow the fmap or container arrays (!).
I've implemented a single recursion-stack as an array, similar to before,
which includes a pointer to each fmap at each layer, along with the size
and type. Push and pop functions add and remove layers whenever a new
fmap is added. A boolean argument when pushing indicates if the new layer
represents a new buffer or new file (descriptor). A new buffer will reset
the "nested fmap level" (described below).
This commit also provides a solution for an issue where we detect
embedded files more than once during scan recursion.
For illustration, imagine a tarball named foo.tar.gz with this structure:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| └── baz.exe | PE | 2 | 1 |
But suppose baz.exe embeds a ZIP archive and a 7Z archive, like this:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| baz.exe | PE | 0 | 0 |
| ├── sfx.zip | ZIP | 1 | 1 |
| │  └── hello.txt | ASCII | 2 | 0 |
| └── sfx.7z | 7Z | 1 | 1 |
|   └── world.txt | ASCII | 2 | 0 |
(A) If we scan for embedded files at any layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| ├── foo.tar | TAR | 1 | 0 |
| │ ├── bar.zip | ZIP | 2 | 1 |
| │ │  └── hola.txt | ASCII | 3 | 0 |
| │ ├── baz.exe | PE | 2 | 1 |
| │ │ ├── sfx.zip | ZIP | 3 | 1 |
| │ │ │  └── hello.txt | ASCII | 4 | 0 |
| │ │ └── sfx.7z | 7Z | 3 | 1 |
| │ │   └── world.txt | ASCII | 4 | 0 |
| │ ├── sfx.zip | ZIP | 2 | 1 |
| │ │  └── hello.txt | ASCII | 3 | 0 |
| │ └── sfx.7z | 7Z | 2 | 1 |
| │  └── world.txt | ASCII | 3 | 0 |
| ├── sfx.zip | ZIP | 1 | 1 |
| └── sfx.7z | 7Z | 1 | 1 |
(A) is bad because it scans content more than once.
Note that for the GZ layer, it may detect the ZIP and 7Z if the
signature hits on the compressed data, which it might, though
extracting the ZIP and 7Z will likely fail.
The reason the above doesn't happen now is that we restrict embedded
type scans for a bunch of archive formats to include GZ and TAR.
(B) If we scan for embedded files at the foo.tar layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| ├── baz.exe | PE | 2 | 1 |
| ├── sfx.zip | ZIP | 2 | 1 |
| │  └── hello.txt | ASCII | 3 | 0 |
| └── sfx.7z | 7Z | 2 | 1 |
|   └── world.txt | ASCII | 3 | 0 |
(B) is almost right. But we can achieve it easily enough only scanning for
embedded content in the current fmap when the "nested fmap level" is 0.
The upside is that it should safely detect all embedded content, even if
it may think the sfz.zip and sfx.7z are in foo.tar instead of in baz.exe.
The biggest risk I can think of affects ZIPs. SFXZIP detection
is identical to ZIP detection, which is why we don't allow SFXZIP to be
detected if insize of a ZIP. If we only allow embedded type scanning at
fmap-layer 0 in each buffer, this will fail to detect the embedded ZIP
if the bar.exe was not compressed in foo.zip and if non-compressed files
extracted from ZIPs aren't extracted as new buffers:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.zip | ZIP | 0 | 0 |
| └── bar.exe | PE | 1 | 1 |
| └── sfx.zip | ZIP | 2 | 2 |
Provided that we ensure all files extracted from zips are scanned in
new buffers, option (B) should be safe.
(C) If we scan for embedded files at the baz.exe layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| └── baz.exe | PE | 2 | 1 |
| ├── sfx.zip | ZIP | 3 | 1 |
| │  └── hello.txt | ASCII | 4 | 0 |
| └── sfx.7z | 7Z | 3 | 1 |
|   └── world.txt | ASCII | 4 | 0 |
(C) is right. But it's harder to achieve. For this example we can get it by
restricting 7ZSFX and ZIPSFX detection only when scanning an executable.
But that may mean losing detection of archives embedded elsewhere.
And we'd have to identify allowable container types for each possible
embedded type, which would be very difficult.
So this commit aims to solve the issue the (B)-way.
Note that in all situations, we still have to scan with file typing
enabled to determine if we need to reassign the current file type, such
as re-identifying a Bzip2 archive as a DMG that happens to be Bzip2-
compressed. Detection of DMG and a handful of other types rely on
finding data partway through or near the ned of a file before
reassigning the entire file as the new type.
Other fixes and considerations in this commit:
- The utf16 HTML parser has weak error handling, particularly with respect
to creating a nested fmap for scanning the ascii decoded file.
This commit cleans up the error handling and wraps the nested scan with
the recursion-stack push()/pop() for correct recursion tracking.
Before this commit, each container layer had a flag to indicate if the
container layer is valid.
We need something similar so that the cli_recursion_stack_get_*()
functions ignore normalized layers. Details...
Imagine an LDB signature for HTML content that specifies a ZIP
container. If the signature actually alerts on the normalized HTML and
you don't ignore normalized layers for the container check, it will
appear as though the alert is in an HTML container rather than a ZIP
container.
This commit accomplishes this with a boolean you set in the scan context
before scanning a new layer. Then when the new fmap is created, it will
use that flag to set similar flag for the layer. The context flag is
reset those that anything after this doesn't have that flag.
The flag allows the new recursion_stack_get() function to ignore
normalized layers when iterating the stack to return a layer at a
requested index, negative or positive.
Scanning normalized extracted/normalized javascript and VBA should also
use the 'layer is normalized' flag.
- This commit also fixes Heuristic.Broken.Executable alert for ELF files
to make sure that:
A) these only alert if cli_append_virus() returns CL_VIRUS (aka it
respects the FP check).
B) all broken-executable alerts for ELF only happen if the
SCAN_HEURISTIC_BROKEN option is enabled.
- This commit also cleans up the error handling in cli_magic_scan_dir().
This was needed so we could correctly apply the layer-is-normalized-flag
to all VBA macros extracted to a directory when scanning the directory.
- Also fix an issue where exceeding scan maximums wouldn't cause embedded
file detection scans to abort. Granted we don't actually want to abort
if max filesize or max recursion depth are exceeded... only if max
scansize, max files, and max scantime are exceeded.
Add 'abort_scan' flag to scan context, to protect against depending on
correct error propagation for fatal conditions. Instead, setting this
flag in the scan context should guarantee that a fatal condition deep in
scan recursion isn't lost which result in more stuff being scanned
instead of aborting. This shouldn't be necessary, but some status codes
like CL_ETIMEOUT never used to be fatal and it's easier to do this than
to verify every parser only returns CL_ETIMEOUT and other "fatal
status codes" in fatal conditions.
- Remove duplicate is_tar() prototype from filestypes.c and include
is_tar.h instead.
- Presently we create the fmap hash when creating the fmap.
This wastes a bit of CPU if the hash is never needed.
Now that we're creating fmap's for all embedded files discovered with
file type recognition scans, this is a much more frequent occurence and
really slows things down.
This commit fixes the issue by only creating fmap hashes as needed.
This should not only resolve the perfomance impact of creating fmap's
for all embedded files, but also should improve performance in general.
- Add allmatch check to the zip parser after the central-header meta
match. That way we don't multiple alerts with the same match except in
allmatch mode. Clean up error handling in the zip parser a tiny bit.
- Fixes to ensure that the scan limits such as scansize, filesize,
recursion depth, # of embedded files, and scantime are always reported
if AlertExceedsMax (--alert-exceeds-max) is enabled.
- Fixed an issue where non-fatal alerts for exceeding scan maximums may
mask signature matches later on. I changed it so these alerts use the
"possibly unwanted" alert-type and thus only alert if no other alerts
were found or if all-match or heuristic-precedence are enabled.
- Added the "Heuristics.Limits.Exceeded.*" events to the JSON metadata
when the --gen-json feature is enabled. These will show up once under
"ParseErrors" the first time a limit is exceeded. In the present
implementation, only one limits-exceeded events will be added, so as to
prevent a malicious or malformed sample from filling the JSON buffer
with millions of events and using a tonne of RAM.
2021-09-11 14:15:21 -07:00
|
|
|
const uint8_t *central_header = NULL;
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
size_t index;
|
2018-12-03 12:40:13 -05:00
|
|
|
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
*file_record_size = 0;
|
2021-10-21 13:18:29 -07:00
|
|
|
|
2022-10-03 18:23:05 -07:00
|
|
|
if (cli_checktimelimit(ctx) != CL_SUCCESS) {
|
|
|
|
cli_dbgmsg("cli_unzip: central header - Time limit reached (max: %u)\n", ctx->engine->maxscantime);
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
status = CL_ETIMEOUT;
|
2022-10-03 18:23:05 -07:00
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
2025-08-14 21:17:46 -04:00
|
|
|
central_header = fmap_need_off(ctx->fmap, central_file_header_offset, SIZEOF_CENTRAL_HEADER);
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
if (NULL == central_header) {
|
2025-10-12 19:12:06 -04:00
|
|
|
cli_dbgmsg("cli_unzip: central header - reached end of central directory.\n");
|
|
|
|
status = CL_BREAK;
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (CENTRAL_HEADER_magic != ZIP_MAGIC_CENTRAL_DIRECTORY_RECORD_BEGIN) {
|
|
|
|
cli_dbgmsg("cli_unzip: central header - file header offset has wrong magic\n");
|
|
|
|
status = CL_EPARSE;
|
libclamav: Fix scan recursion tracking
Scan recursion is the process of identifying files embedded in other
files and then scanning them, recursively.
Internally this process is more complex than it may sound because a file
may have multiple layers of types before finding a new "file".
At present we treat the recursion count in the scanning context as an
index into both our fmap list AND our container list. These two lists
are conceptually a part of the same thing and should be unified.
But what's concerning is that the "recursion level" isn't actually
incremented or decremented at the same time that we add a layer to the
fmap or container lists but instead is more touchy-feely, increasing
when we find a new "file".
To account for this shadiness, the size of the fmap and container lists
has always been a little longer than our "max scan recursion" limit so
we don't accidentally overflow the fmap or container arrays (!).
I've implemented a single recursion-stack as an array, similar to before,
which includes a pointer to each fmap at each layer, along with the size
and type. Push and pop functions add and remove layers whenever a new
fmap is added. A boolean argument when pushing indicates if the new layer
represents a new buffer or new file (descriptor). A new buffer will reset
the "nested fmap level" (described below).
This commit also provides a solution for an issue where we detect
embedded files more than once during scan recursion.
For illustration, imagine a tarball named foo.tar.gz with this structure:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| └── baz.exe | PE | 2 | 1 |
But suppose baz.exe embeds a ZIP archive and a 7Z archive, like this:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| baz.exe | PE | 0 | 0 |
| ├── sfx.zip | ZIP | 1 | 1 |
| │  └── hello.txt | ASCII | 2 | 0 |
| └── sfx.7z | 7Z | 1 | 1 |
|   └── world.txt | ASCII | 2 | 0 |
(A) If we scan for embedded files at any layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| ├── foo.tar | TAR | 1 | 0 |
| │ ├── bar.zip | ZIP | 2 | 1 |
| │ │  └── hola.txt | ASCII | 3 | 0 |
| │ ├── baz.exe | PE | 2 | 1 |
| │ │ ├── sfx.zip | ZIP | 3 | 1 |
| │ │ │  └── hello.txt | ASCII | 4 | 0 |
| │ │ └── sfx.7z | 7Z | 3 | 1 |
| │ │   └── world.txt | ASCII | 4 | 0 |
| │ ├── sfx.zip | ZIP | 2 | 1 |
| │ │  └── hello.txt | ASCII | 3 | 0 |
| │ └── sfx.7z | 7Z | 2 | 1 |
| │  └── world.txt | ASCII | 3 | 0 |
| ├── sfx.zip | ZIP | 1 | 1 |
| └── sfx.7z | 7Z | 1 | 1 |
(A) is bad because it scans content more than once.
Note that for the GZ layer, it may detect the ZIP and 7Z if the
signature hits on the compressed data, which it might, though
extracting the ZIP and 7Z will likely fail.
The reason the above doesn't happen now is that we restrict embedded
type scans for a bunch of archive formats to include GZ and TAR.
(B) If we scan for embedded files at the foo.tar layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| ├── baz.exe | PE | 2 | 1 |
| ├── sfx.zip | ZIP | 2 | 1 |
| │  └── hello.txt | ASCII | 3 | 0 |
| └── sfx.7z | 7Z | 2 | 1 |
|   └── world.txt | ASCII | 3 | 0 |
(B) is almost right. But we can achieve it easily enough only scanning for
embedded content in the current fmap when the "nested fmap level" is 0.
The upside is that it should safely detect all embedded content, even if
it may think the sfz.zip and sfx.7z are in foo.tar instead of in baz.exe.
The biggest risk I can think of affects ZIPs. SFXZIP detection
is identical to ZIP detection, which is why we don't allow SFXZIP to be
detected if insize of a ZIP. If we only allow embedded type scanning at
fmap-layer 0 in each buffer, this will fail to detect the embedded ZIP
if the bar.exe was not compressed in foo.zip and if non-compressed files
extracted from ZIPs aren't extracted as new buffers:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.zip | ZIP | 0 | 0 |
| └── bar.exe | PE | 1 | 1 |
| └── sfx.zip | ZIP | 2 | 2 |
Provided that we ensure all files extracted from zips are scanned in
new buffers, option (B) should be safe.
(C) If we scan for embedded files at the baz.exe layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| └── baz.exe | PE | 2 | 1 |
| ├── sfx.zip | ZIP | 3 | 1 |
| │  └── hello.txt | ASCII | 4 | 0 |
| └── sfx.7z | 7Z | 3 | 1 |
|   └── world.txt | ASCII | 4 | 0 |
(C) is right. But it's harder to achieve. For this example we can get it by
restricting 7ZSFX and ZIPSFX detection only when scanning an executable.
But that may mean losing detection of archives embedded elsewhere.
And we'd have to identify allowable container types for each possible
embedded type, which would be very difficult.
So this commit aims to solve the issue the (B)-way.
Note that in all situations, we still have to scan with file typing
enabled to determine if we need to reassign the current file type, such
as re-identifying a Bzip2 archive as a DMG that happens to be Bzip2-
compressed. Detection of DMG and a handful of other types rely on
finding data partway through or near the ned of a file before
reassigning the entire file as the new type.
Other fixes and considerations in this commit:
- The utf16 HTML parser has weak error handling, particularly with respect
to creating a nested fmap for scanning the ascii decoded file.
This commit cleans up the error handling and wraps the nested scan with
the recursion-stack push()/pop() for correct recursion tracking.
Before this commit, each container layer had a flag to indicate if the
container layer is valid.
We need something similar so that the cli_recursion_stack_get_*()
functions ignore normalized layers. Details...
Imagine an LDB signature for HTML content that specifies a ZIP
container. If the signature actually alerts on the normalized HTML and
you don't ignore normalized layers for the container check, it will
appear as though the alert is in an HTML container rather than a ZIP
container.
This commit accomplishes this with a boolean you set in the scan context
before scanning a new layer. Then when the new fmap is created, it will
use that flag to set similar flag for the layer. The context flag is
reset those that anything after this doesn't have that flag.
The flag allows the new recursion_stack_get() function to ignore
normalized layers when iterating the stack to return a layer at a
requested index, negative or positive.
Scanning normalized extracted/normalized javascript and VBA should also
use the 'layer is normalized' flag.
- This commit also fixes Heuristic.Broken.Executable alert for ELF files
to make sure that:
A) these only alert if cli_append_virus() returns CL_VIRUS (aka it
respects the FP check).
B) all broken-executable alerts for ELF only happen if the
SCAN_HEURISTIC_BROKEN option is enabled.
- This commit also cleans up the error handling in cli_magic_scan_dir().
This was needed so we could correctly apply the layer-is-normalized-flag
to all VBA macros extracted to a directory when scanning the directory.
- Also fix an issue where exceeding scan maximums wouldn't cause embedded
file detection scans to abort. Granted we don't actually want to abort
if max filesize or max recursion depth are exceeded... only if max
scansize, max files, and max scantime are exceeded.
Add 'abort_scan' flag to scan context, to protect against depending on
correct error propagation for fatal conditions. Instead, setting this
flag in the scan context should guarantee that a fatal condition deep in
scan recursion isn't lost which result in more stuff being scanned
instead of aborting. This shouldn't be necessary, but some status codes
like CL_ETIMEOUT never used to be fatal and it's easier to do this than
to verify every parser only returns CL_ETIMEOUT and other "fatal
status codes" in fatal conditions.
- Remove duplicate is_tar() prototype from filestypes.c and include
is_tar.h instead.
- Presently we create the fmap hash when creating the fmap.
This wastes a bit of CPU if the hash is never needed.
Now that we're creating fmap's for all embedded files discovered with
file type recognition scans, this is a much more frequent occurence and
really slows things down.
This commit fixes the issue by only creating fmap hashes as needed.
This should not only resolve the perfomance impact of creating fmap's
for all embedded files, but also should improve performance in general.
- Add allmatch check to the zip parser after the central-header meta
match. That way we don't multiple alerts with the same match except in
allmatch mode. Clean up error handling in the zip parser a tiny bit.
- Fixes to ensure that the scan limits such as scansize, filesize,
recursion depth, # of embedded files, and scantime are always reported
if AlertExceedsMax (--alert-exceeds-max) is enabled.
- Fixed an issue where non-fatal alerts for exceeding scan maximums may
mask signature matches later on. I changed it so these alerts use the
"possibly unwanted" alert-type and thus only alert if no other alerts
were found or if all-match or heuristic-precedence are enabled.
- Added the "Heuristics.Limits.Exceeded.*" events to the JSON metadata
when the --gen-json feature is enabled. These will show up once under
"ParseErrors" the first time a limit is exceeded. In the present
implementation, only one limits-exceeded events will be added, so as to
prevent a malicious or malformed sample from filling the JSON buffer
with millions of events and using a tonne of RAM.
2021-09-11 14:15:21 -07:00
|
|
|
goto done;
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
index = central_file_header_offset + SIZEOF_CENTRAL_HEADER;
|
2018-12-03 12:40:13 -05:00
|
|
|
|
2019-07-11 20:20:48 -04:00
|
|
|
cli_dbgmsg("cli_unzip: central header - flags %x - method %x - csize %x - usize %x - flen %x - elen %x - clen %x - disk %x - off %x\n",
|
|
|
|
CENTRAL_HEADER_flags, CENTRAL_HEADER_method, CENTRAL_HEADER_csize, CENTRAL_HEADER_usize, CENTRAL_HEADER_flen, CENTRAL_HEADER_extra_len, CENTRAL_HEADER_comment_len, CENTRAL_HEADER_disk_num, CENTRAL_HEADER_off);
|
2018-12-03 12:40:13 -05:00
|
|
|
|
2025-08-14 21:17:46 -04:00
|
|
|
if (ctx->fmap->len <= index + CENTRAL_HEADER_flen) {
|
2019-07-11 20:20:48 -04:00
|
|
|
cli_dbgmsg("cli_unzip: central header - fname out of file\n");
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
status = CL_EPARSE;
|
libclamav: Fix scan recursion tracking
Scan recursion is the process of identifying files embedded in other
files and then scanning them, recursively.
Internally this process is more complex than it may sound because a file
may have multiple layers of types before finding a new "file".
At present we treat the recursion count in the scanning context as an
index into both our fmap list AND our container list. These two lists
are conceptually a part of the same thing and should be unified.
But what's concerning is that the "recursion level" isn't actually
incremented or decremented at the same time that we add a layer to the
fmap or container lists but instead is more touchy-feely, increasing
when we find a new "file".
To account for this shadiness, the size of the fmap and container lists
has always been a little longer than our "max scan recursion" limit so
we don't accidentally overflow the fmap or container arrays (!).
I've implemented a single recursion-stack as an array, similar to before,
which includes a pointer to each fmap at each layer, along with the size
and type. Push and pop functions add and remove layers whenever a new
fmap is added. A boolean argument when pushing indicates if the new layer
represents a new buffer or new file (descriptor). A new buffer will reset
the "nested fmap level" (described below).
This commit also provides a solution for an issue where we detect
embedded files more than once during scan recursion.
For illustration, imagine a tarball named foo.tar.gz with this structure:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| └── baz.exe | PE | 2 | 1 |
But suppose baz.exe embeds a ZIP archive and a 7Z archive, like this:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| baz.exe | PE | 0 | 0 |
| ├── sfx.zip | ZIP | 1 | 1 |
| │  └── hello.txt | ASCII | 2 | 0 |
| └── sfx.7z | 7Z | 1 | 1 |
|   └── world.txt | ASCII | 2 | 0 |
(A) If we scan for embedded files at any layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| ├── foo.tar | TAR | 1 | 0 |
| │ ├── bar.zip | ZIP | 2 | 1 |
| │ │  └── hola.txt | ASCII | 3 | 0 |
| │ ├── baz.exe | PE | 2 | 1 |
| │ │ ├── sfx.zip | ZIP | 3 | 1 |
| │ │ │  └── hello.txt | ASCII | 4 | 0 |
| │ │ └── sfx.7z | 7Z | 3 | 1 |
| │ │   └── world.txt | ASCII | 4 | 0 |
| │ ├── sfx.zip | ZIP | 2 | 1 |
| │ │  └── hello.txt | ASCII | 3 | 0 |
| │ └── sfx.7z | 7Z | 2 | 1 |
| │  └── world.txt | ASCII | 3 | 0 |
| ├── sfx.zip | ZIP | 1 | 1 |
| └── sfx.7z | 7Z | 1 | 1 |
(A) is bad because it scans content more than once.
Note that for the GZ layer, it may detect the ZIP and 7Z if the
signature hits on the compressed data, which it might, though
extracting the ZIP and 7Z will likely fail.
The reason the above doesn't happen now is that we restrict embedded
type scans for a bunch of archive formats to include GZ and TAR.
(B) If we scan for embedded files at the foo.tar layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| ├── baz.exe | PE | 2 | 1 |
| ├── sfx.zip | ZIP | 2 | 1 |
| │  └── hello.txt | ASCII | 3 | 0 |
| └── sfx.7z | 7Z | 2 | 1 |
|   └── world.txt | ASCII | 3 | 0 |
(B) is almost right. But we can achieve it easily enough only scanning for
embedded content in the current fmap when the "nested fmap level" is 0.
The upside is that it should safely detect all embedded content, even if
it may think the sfz.zip and sfx.7z are in foo.tar instead of in baz.exe.
The biggest risk I can think of affects ZIPs. SFXZIP detection
is identical to ZIP detection, which is why we don't allow SFXZIP to be
detected if insize of a ZIP. If we only allow embedded type scanning at
fmap-layer 0 in each buffer, this will fail to detect the embedded ZIP
if the bar.exe was not compressed in foo.zip and if non-compressed files
extracted from ZIPs aren't extracted as new buffers:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.zip | ZIP | 0 | 0 |
| └── bar.exe | PE | 1 | 1 |
| └── sfx.zip | ZIP | 2 | 2 |
Provided that we ensure all files extracted from zips are scanned in
new buffers, option (B) should be safe.
(C) If we scan for embedded files at the baz.exe layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| └── baz.exe | PE | 2 | 1 |
| ├── sfx.zip | ZIP | 3 | 1 |
| │  └── hello.txt | ASCII | 4 | 0 |
| └── sfx.7z | 7Z | 3 | 1 |
|   └── world.txt | ASCII | 4 | 0 |
(C) is right. But it's harder to achieve. For this example we can get it by
restricting 7ZSFX and ZIPSFX detection only when scanning an executable.
But that may mean losing detection of archives embedded elsewhere.
And we'd have to identify allowable container types for each possible
embedded type, which would be very difficult.
So this commit aims to solve the issue the (B)-way.
Note that in all situations, we still have to scan with file typing
enabled to determine if we need to reassign the current file type, such
as re-identifying a Bzip2 archive as a DMG that happens to be Bzip2-
compressed. Detection of DMG and a handful of other types rely on
finding data partway through or near the ned of a file before
reassigning the entire file as the new type.
Other fixes and considerations in this commit:
- The utf16 HTML parser has weak error handling, particularly with respect
to creating a nested fmap for scanning the ascii decoded file.
This commit cleans up the error handling and wraps the nested scan with
the recursion-stack push()/pop() for correct recursion tracking.
Before this commit, each container layer had a flag to indicate if the
container layer is valid.
We need something similar so that the cli_recursion_stack_get_*()
functions ignore normalized layers. Details...
Imagine an LDB signature for HTML content that specifies a ZIP
container. If the signature actually alerts on the normalized HTML and
you don't ignore normalized layers for the container check, it will
appear as though the alert is in an HTML container rather than a ZIP
container.
This commit accomplishes this with a boolean you set in the scan context
before scanning a new layer. Then when the new fmap is created, it will
use that flag to set similar flag for the layer. The context flag is
reset those that anything after this doesn't have that flag.
The flag allows the new recursion_stack_get() function to ignore
normalized layers when iterating the stack to return a layer at a
requested index, negative or positive.
Scanning normalized extracted/normalized javascript and VBA should also
use the 'layer is normalized' flag.
- This commit also fixes Heuristic.Broken.Executable alert for ELF files
to make sure that:
A) these only alert if cli_append_virus() returns CL_VIRUS (aka it
respects the FP check).
B) all broken-executable alerts for ELF only happen if the
SCAN_HEURISTIC_BROKEN option is enabled.
- This commit also cleans up the error handling in cli_magic_scan_dir().
This was needed so we could correctly apply the layer-is-normalized-flag
to all VBA macros extracted to a directory when scanning the directory.
- Also fix an issue where exceeding scan maximums wouldn't cause embedded
file detection scans to abort. Granted we don't actually want to abort
if max filesize or max recursion depth are exceeded... only if max
scansize, max files, and max scantime are exceeded.
Add 'abort_scan' flag to scan context, to protect against depending on
correct error propagation for fatal conditions. Instead, setting this
flag in the scan context should guarantee that a fatal condition deep in
scan recursion isn't lost which result in more stuff being scanned
instead of aborting. This shouldn't be necessary, but some status codes
like CL_ETIMEOUT never used to be fatal and it's easier to do this than
to verify every parser only returns CL_ETIMEOUT and other "fatal
status codes" in fatal conditions.
- Remove duplicate is_tar() prototype from filestypes.c and include
is_tar.h instead.
- Presently we create the fmap hash when creating the fmap.
This wastes a bit of CPU if the hash is never needed.
Now that we're creating fmap's for all embedded files discovered with
file type recognition scans, this is a much more frequent occurence and
really slows things down.
This commit fixes the issue by only creating fmap hashes as needed.
This should not only resolve the perfomance impact of creating fmap's
for all embedded files, but also should improve performance in general.
- Add allmatch check to the zip parser after the central-header meta
match. That way we don't multiple alerts with the same match except in
allmatch mode. Clean up error handling in the zip parser a tiny bit.
- Fixes to ensure that the scan limits such as scansize, filesize,
recursion depth, # of embedded files, and scantime are always reported
if AlertExceedsMax (--alert-exceeds-max) is enabled.
- Fixed an issue where non-fatal alerts for exceeding scan maximums may
mask signature matches later on. I changed it so these alerts use the
"possibly unwanted" alert-type and thus only alert if no other alerts
were found or if all-match or heuristic-precedence are enabled.
- Added the "Heuristics.Limits.Exceeded.*" events to the JSON metadata
when the --gen-json feature is enabled. These will show up once under
"ParseErrors" the first time a limit is exceeded. In the present
implementation, only one limits-exceeded events will be added, so as to
prevent a malicious or malformed sample from filling the JSON buffer
with millions of events and using a tonne of RAM.
2021-09-11 14:15:21 -07:00
|
|
|
goto done;
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
|
|
|
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
size_t size = (CENTRAL_HEADER_flen >= sizeof(name)) ? sizeof(name) - 1 : CENTRAL_HEADER_flen;
|
2025-08-14 21:17:46 -04:00
|
|
|
const char *src = fmap_need_off_once(ctx->fmap, index, size);
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
if (src) {
|
|
|
|
memcpy(name, src, size);
|
|
|
|
name[size] = '\0';
|
|
|
|
cli_dbgmsg("cli_unzip: central header - fname: %s\n", name);
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
index += CENTRAL_HEADER_flen;
|
2018-12-03 12:40:13 -05:00
|
|
|
|
|
|
|
/* requests do not supply a ctx; also prevent multiple scans */
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
ret = cli_matchmeta(ctx, name, CENTRAL_HEADER_csize, CENTRAL_HEADER_usize, (CENTRAL_HEADER_flags & F_ENCR) != 0, file_count, CENTRAL_HEADER_crc32);
|
|
|
|
if (CL_VIRUS == ret) {
|
|
|
|
// Set file record size to 0 to indicate this is the last file record
|
|
|
|
status = CL_VIRUS;
|
2022-08-27 10:30:51 -07:00
|
|
|
goto done;
|
libclamav: Fix scan recursion tracking
Scan recursion is the process of identifying files embedded in other
files and then scanning them, recursively.
Internally this process is more complex than it may sound because a file
may have multiple layers of types before finding a new "file".
At present we treat the recursion count in the scanning context as an
index into both our fmap list AND our container list. These two lists
are conceptually a part of the same thing and should be unified.
But what's concerning is that the "recursion level" isn't actually
incremented or decremented at the same time that we add a layer to the
fmap or container lists but instead is more touchy-feely, increasing
when we find a new "file".
To account for this shadiness, the size of the fmap and container lists
has always been a little longer than our "max scan recursion" limit so
we don't accidentally overflow the fmap or container arrays (!).
I've implemented a single recursion-stack as an array, similar to before,
which includes a pointer to each fmap at each layer, along with the size
and type. Push and pop functions add and remove layers whenever a new
fmap is added. A boolean argument when pushing indicates if the new layer
represents a new buffer or new file (descriptor). A new buffer will reset
the "nested fmap level" (described below).
This commit also provides a solution for an issue where we detect
embedded files more than once during scan recursion.
For illustration, imagine a tarball named foo.tar.gz with this structure:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| └── baz.exe | PE | 2 | 1 |
But suppose baz.exe embeds a ZIP archive and a 7Z archive, like this:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| baz.exe | PE | 0 | 0 |
| ├── sfx.zip | ZIP | 1 | 1 |
| │  └── hello.txt | ASCII | 2 | 0 |
| └── sfx.7z | 7Z | 1 | 1 |
|   └── world.txt | ASCII | 2 | 0 |
(A) If we scan for embedded files at any layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| ├── foo.tar | TAR | 1 | 0 |
| │ ├── bar.zip | ZIP | 2 | 1 |
| │ │  └── hola.txt | ASCII | 3 | 0 |
| │ ├── baz.exe | PE | 2 | 1 |
| │ │ ├── sfx.zip | ZIP | 3 | 1 |
| │ │ │  └── hello.txt | ASCII | 4 | 0 |
| │ │ └── sfx.7z | 7Z | 3 | 1 |
| │ │   └── world.txt | ASCII | 4 | 0 |
| │ ├── sfx.zip | ZIP | 2 | 1 |
| │ │  └── hello.txt | ASCII | 3 | 0 |
| │ └── sfx.7z | 7Z | 2 | 1 |
| │  └── world.txt | ASCII | 3 | 0 |
| ├── sfx.zip | ZIP | 1 | 1 |
| └── sfx.7z | 7Z | 1 | 1 |
(A) is bad because it scans content more than once.
Note that for the GZ layer, it may detect the ZIP and 7Z if the
signature hits on the compressed data, which it might, though
extracting the ZIP and 7Z will likely fail.
The reason the above doesn't happen now is that we restrict embedded
type scans for a bunch of archive formats to include GZ and TAR.
(B) If we scan for embedded files at the foo.tar layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| ├── baz.exe | PE | 2 | 1 |
| ├── sfx.zip | ZIP | 2 | 1 |
| │  └── hello.txt | ASCII | 3 | 0 |
| └── sfx.7z | 7Z | 2 | 1 |
|   └── world.txt | ASCII | 3 | 0 |
(B) is almost right. But we can achieve it easily enough only scanning for
embedded content in the current fmap when the "nested fmap level" is 0.
The upside is that it should safely detect all embedded content, even if
it may think the sfz.zip and sfx.7z are in foo.tar instead of in baz.exe.
The biggest risk I can think of affects ZIPs. SFXZIP detection
is identical to ZIP detection, which is why we don't allow SFXZIP to be
detected if insize of a ZIP. If we only allow embedded type scanning at
fmap-layer 0 in each buffer, this will fail to detect the embedded ZIP
if the bar.exe was not compressed in foo.zip and if non-compressed files
extracted from ZIPs aren't extracted as new buffers:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.zip | ZIP | 0 | 0 |
| └── bar.exe | PE | 1 | 1 |
| └── sfx.zip | ZIP | 2 | 2 |
Provided that we ensure all files extracted from zips are scanned in
new buffers, option (B) should be safe.
(C) If we scan for embedded files at the baz.exe layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| └── baz.exe | PE | 2 | 1 |
| ├── sfx.zip | ZIP | 3 | 1 |
| │  └── hello.txt | ASCII | 4 | 0 |
| └── sfx.7z | 7Z | 3 | 1 |
|   └── world.txt | ASCII | 4 | 0 |
(C) is right. But it's harder to achieve. For this example we can get it by
restricting 7ZSFX and ZIPSFX detection only when scanning an executable.
But that may mean losing detection of archives embedded elsewhere.
And we'd have to identify allowable container types for each possible
embedded type, which would be very difficult.
So this commit aims to solve the issue the (B)-way.
Note that in all situations, we still have to scan with file typing
enabled to determine if we need to reassign the current file type, such
as re-identifying a Bzip2 archive as a DMG that happens to be Bzip2-
compressed. Detection of DMG and a handful of other types rely on
finding data partway through or near the ned of a file before
reassigning the entire file as the new type.
Other fixes and considerations in this commit:
- The utf16 HTML parser has weak error handling, particularly with respect
to creating a nested fmap for scanning the ascii decoded file.
This commit cleans up the error handling and wraps the nested scan with
the recursion-stack push()/pop() for correct recursion tracking.
Before this commit, each container layer had a flag to indicate if the
container layer is valid.
We need something similar so that the cli_recursion_stack_get_*()
functions ignore normalized layers. Details...
Imagine an LDB signature for HTML content that specifies a ZIP
container. If the signature actually alerts on the normalized HTML and
you don't ignore normalized layers for the container check, it will
appear as though the alert is in an HTML container rather than a ZIP
container.
This commit accomplishes this with a boolean you set in the scan context
before scanning a new layer. Then when the new fmap is created, it will
use that flag to set similar flag for the layer. The context flag is
reset those that anything after this doesn't have that flag.
The flag allows the new recursion_stack_get() function to ignore
normalized layers when iterating the stack to return a layer at a
requested index, negative or positive.
Scanning normalized extracted/normalized javascript and VBA should also
use the 'layer is normalized' flag.
- This commit also fixes Heuristic.Broken.Executable alert for ELF files
to make sure that:
A) these only alert if cli_append_virus() returns CL_VIRUS (aka it
respects the FP check).
B) all broken-executable alerts for ELF only happen if the
SCAN_HEURISTIC_BROKEN option is enabled.
- This commit also cleans up the error handling in cli_magic_scan_dir().
This was needed so we could correctly apply the layer-is-normalized-flag
to all VBA macros extracted to a directory when scanning the directory.
- Also fix an issue where exceeding scan maximums wouldn't cause embedded
file detection scans to abort. Granted we don't actually want to abort
if max filesize or max recursion depth are exceeded... only if max
scansize, max files, and max scantime are exceeded.
Add 'abort_scan' flag to scan context, to protect against depending on
correct error propagation for fatal conditions. Instead, setting this
flag in the scan context should guarantee that a fatal condition deep in
scan recursion isn't lost which result in more stuff being scanned
instead of aborting. This shouldn't be necessary, but some status codes
like CL_ETIMEOUT never used to be fatal and it's easier to do this than
to verify every parser only returns CL_ETIMEOUT and other "fatal
status codes" in fatal conditions.
- Remove duplicate is_tar() prototype from filestypes.c and include
is_tar.h instead.
- Presently we create the fmap hash when creating the fmap.
This wastes a bit of CPU if the hash is never needed.
Now that we're creating fmap's for all embedded files discovered with
file type recognition scans, this is a much more frequent occurence and
really slows things down.
This commit fixes the issue by only creating fmap hashes as needed.
This should not only resolve the perfomance impact of creating fmap's
for all embedded files, but also should improve performance in general.
- Add allmatch check to the zip parser after the central-header meta
match. That way we don't multiple alerts with the same match except in
allmatch mode. Clean up error handling in the zip parser a tiny bit.
- Fixes to ensure that the scan limits such as scansize, filesize,
recursion depth, # of embedded files, and scantime are always reported
if AlertExceedsMax (--alert-exceeds-max) is enabled.
- Fixed an issue where non-fatal alerts for exceeding scan maximums may
mask signature matches later on. I changed it so these alerts use the
"possibly unwanted" alert-type and thus only alert if no other alerts
were found or if all-match or heuristic-precedence are enabled.
- Added the "Heuristics.Limits.Exceeded.*" events to the JSON metadata
when the --gen-json feature is enabled. These will show up once under
"ParseErrors" the first time a limit is exceeded. In the present
implementation, only one limits-exceeded events will be added, so as to
prevent a malicious or malformed sample from filling the JSON buffer
with millions of events and using a tonne of RAM.
2021-09-11 14:15:21 -07:00
|
|
|
}
|
|
|
|
|
2025-08-14 21:17:46 -04:00
|
|
|
if (ctx->fmap->len <= index + CENTRAL_HEADER_extra_len) {
|
2019-07-11 20:20:48 -04:00
|
|
|
cli_dbgmsg("cli_unzip: central header - extra out of file\n");
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
status = CL_EPARSE;
|
|
|
|
goto done;
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
index += CENTRAL_HEADER_extra_len;
|
2018-12-03 12:40:13 -05:00
|
|
|
|
2025-08-14 21:17:46 -04:00
|
|
|
if (ctx->fmap->len < index + CENTRAL_HEADER_comment_len) {
|
2019-07-11 20:20:48 -04:00
|
|
|
cli_dbgmsg("cli_unzip: central header - comment out of file\n");
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
status = CL_EPARSE;
|
|
|
|
goto done;
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
index += CENTRAL_HEADER_comment_len;
|
|
|
|
|
|
|
|
*file_record_size = index - central_file_header_offset;
|
2018-12-03 12:40:13 -05:00
|
|
|
|
|
|
|
if (!requests) {
|
2025-08-14 21:17:46 -04:00
|
|
|
// Parse the local file header.
|
|
|
|
// We'll verify enough bytes available for a local file header when we parse it.
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
|
|
|
|
status = parse_local_file_header(
|
2025-08-14 21:17:46 -04:00
|
|
|
ctx,
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
CENTRAL_HEADER_off,
|
|
|
|
num_files_unzipped,
|
|
|
|
file_count,
|
|
|
|
central_header,
|
|
|
|
tmpd,
|
|
|
|
1, /* detect_encrypted */
|
|
|
|
zip_scan_cb,
|
|
|
|
record,
|
|
|
|
NULL); /* file_record_size */
|
2018-12-03 12:40:13 -05:00
|
|
|
} else {
|
|
|
|
int i;
|
|
|
|
size_t len;
|
|
|
|
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
for (i = 0; i < requests->namecnt; ++i) {
|
|
|
|
cli_dbgmsg("cli_unzip: central header - checking for %i: %s\n", i, requests->names[i]);
|
2018-12-03 12:40:13 -05:00
|
|
|
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
len = MIN(sizeof(name) - 1, requests->namelens[i]);
|
|
|
|
if (!strncmp(requests->names[i], name, len)) {
|
|
|
|
requests->match = 1;
|
|
|
|
requests->found = i;
|
|
|
|
requests->loff = CENTRAL_HEADER_off;
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
|
|
|
}
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
|
|
|
|
status = CL_SUCCESS;
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
|
|
|
|
libclamav: Fix scan recursion tracking
Scan recursion is the process of identifying files embedded in other
files and then scanning them, recursively.
Internally this process is more complex than it may sound because a file
may have multiple layers of types before finding a new "file".
At present we treat the recursion count in the scanning context as an
index into both our fmap list AND our container list. These two lists
are conceptually a part of the same thing and should be unified.
But what's concerning is that the "recursion level" isn't actually
incremented or decremented at the same time that we add a layer to the
fmap or container lists but instead is more touchy-feely, increasing
when we find a new "file".
To account for this shadiness, the size of the fmap and container lists
has always been a little longer than our "max scan recursion" limit so
we don't accidentally overflow the fmap or container arrays (!).
I've implemented a single recursion-stack as an array, similar to before,
which includes a pointer to each fmap at each layer, along with the size
and type. Push and pop functions add and remove layers whenever a new
fmap is added. A boolean argument when pushing indicates if the new layer
represents a new buffer or new file (descriptor). A new buffer will reset
the "nested fmap level" (described below).
This commit also provides a solution for an issue where we detect
embedded files more than once during scan recursion.
For illustration, imagine a tarball named foo.tar.gz with this structure:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| └── baz.exe | PE | 2 | 1 |
But suppose baz.exe embeds a ZIP archive and a 7Z archive, like this:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| baz.exe | PE | 0 | 0 |
| ├── sfx.zip | ZIP | 1 | 1 |
| │  └── hello.txt | ASCII | 2 | 0 |
| └── sfx.7z | 7Z | 1 | 1 |
|   └── world.txt | ASCII | 2 | 0 |
(A) If we scan for embedded files at any layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| ├── foo.tar | TAR | 1 | 0 |
| │ ├── bar.zip | ZIP | 2 | 1 |
| │ │  └── hola.txt | ASCII | 3 | 0 |
| │ ├── baz.exe | PE | 2 | 1 |
| │ │ ├── sfx.zip | ZIP | 3 | 1 |
| │ │ │  └── hello.txt | ASCII | 4 | 0 |
| │ │ └── sfx.7z | 7Z | 3 | 1 |
| │ │   └── world.txt | ASCII | 4 | 0 |
| │ ├── sfx.zip | ZIP | 2 | 1 |
| │ │  └── hello.txt | ASCII | 3 | 0 |
| │ └── sfx.7z | 7Z | 2 | 1 |
| │  └── world.txt | ASCII | 3 | 0 |
| ├── sfx.zip | ZIP | 1 | 1 |
| └── sfx.7z | 7Z | 1 | 1 |
(A) is bad because it scans content more than once.
Note that for the GZ layer, it may detect the ZIP and 7Z if the
signature hits on the compressed data, which it might, though
extracting the ZIP and 7Z will likely fail.
The reason the above doesn't happen now is that we restrict embedded
type scans for a bunch of archive formats to include GZ and TAR.
(B) If we scan for embedded files at the foo.tar layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| ├── baz.exe | PE | 2 | 1 |
| ├── sfx.zip | ZIP | 2 | 1 |
| │  └── hello.txt | ASCII | 3 | 0 |
| └── sfx.7z | 7Z | 2 | 1 |
|   └── world.txt | ASCII | 3 | 0 |
(B) is almost right. But we can achieve it easily enough only scanning for
embedded content in the current fmap when the "nested fmap level" is 0.
The upside is that it should safely detect all embedded content, even if
it may think the sfz.zip and sfx.7z are in foo.tar instead of in baz.exe.
The biggest risk I can think of affects ZIPs. SFXZIP detection
is identical to ZIP detection, which is why we don't allow SFXZIP to be
detected if insize of a ZIP. If we only allow embedded type scanning at
fmap-layer 0 in each buffer, this will fail to detect the embedded ZIP
if the bar.exe was not compressed in foo.zip and if non-compressed files
extracted from ZIPs aren't extracted as new buffers:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.zip | ZIP | 0 | 0 |
| └── bar.exe | PE | 1 | 1 |
| └── sfx.zip | ZIP | 2 | 2 |
Provided that we ensure all files extracted from zips are scanned in
new buffers, option (B) should be safe.
(C) If we scan for embedded files at the baz.exe layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| └── baz.exe | PE | 2 | 1 |
| ├── sfx.zip | ZIP | 3 | 1 |
| │  └── hello.txt | ASCII | 4 | 0 |
| └── sfx.7z | 7Z | 3 | 1 |
|   └── world.txt | ASCII | 4 | 0 |
(C) is right. But it's harder to achieve. For this example we can get it by
restricting 7ZSFX and ZIPSFX detection only when scanning an executable.
But that may mean losing detection of archives embedded elsewhere.
And we'd have to identify allowable container types for each possible
embedded type, which would be very difficult.
So this commit aims to solve the issue the (B)-way.
Note that in all situations, we still have to scan with file typing
enabled to determine if we need to reassign the current file type, such
as re-identifying a Bzip2 archive as a DMG that happens to be Bzip2-
compressed. Detection of DMG and a handful of other types rely on
finding data partway through or near the ned of a file before
reassigning the entire file as the new type.
Other fixes and considerations in this commit:
- The utf16 HTML parser has weak error handling, particularly with respect
to creating a nested fmap for scanning the ascii decoded file.
This commit cleans up the error handling and wraps the nested scan with
the recursion-stack push()/pop() for correct recursion tracking.
Before this commit, each container layer had a flag to indicate if the
container layer is valid.
We need something similar so that the cli_recursion_stack_get_*()
functions ignore normalized layers. Details...
Imagine an LDB signature for HTML content that specifies a ZIP
container. If the signature actually alerts on the normalized HTML and
you don't ignore normalized layers for the container check, it will
appear as though the alert is in an HTML container rather than a ZIP
container.
This commit accomplishes this with a boolean you set in the scan context
before scanning a new layer. Then when the new fmap is created, it will
use that flag to set similar flag for the layer. The context flag is
reset those that anything after this doesn't have that flag.
The flag allows the new recursion_stack_get() function to ignore
normalized layers when iterating the stack to return a layer at a
requested index, negative or positive.
Scanning normalized extracted/normalized javascript and VBA should also
use the 'layer is normalized' flag.
- This commit also fixes Heuristic.Broken.Executable alert for ELF files
to make sure that:
A) these only alert if cli_append_virus() returns CL_VIRUS (aka it
respects the FP check).
B) all broken-executable alerts for ELF only happen if the
SCAN_HEURISTIC_BROKEN option is enabled.
- This commit also cleans up the error handling in cli_magic_scan_dir().
This was needed so we could correctly apply the layer-is-normalized-flag
to all VBA macros extracted to a directory when scanning the directory.
- Also fix an issue where exceeding scan maximums wouldn't cause embedded
file detection scans to abort. Granted we don't actually want to abort
if max filesize or max recursion depth are exceeded... only if max
scansize, max files, and max scantime are exceeded.
Add 'abort_scan' flag to scan context, to protect against depending on
correct error propagation for fatal conditions. Instead, setting this
flag in the scan context should guarantee that a fatal condition deep in
scan recursion isn't lost which result in more stuff being scanned
instead of aborting. This shouldn't be necessary, but some status codes
like CL_ETIMEOUT never used to be fatal and it's easier to do this than
to verify every parser only returns CL_ETIMEOUT and other "fatal
status codes" in fatal conditions.
- Remove duplicate is_tar() prototype from filestypes.c and include
is_tar.h instead.
- Presently we create the fmap hash when creating the fmap.
This wastes a bit of CPU if the hash is never needed.
Now that we're creating fmap's for all embedded files discovered with
file type recognition scans, this is a much more frequent occurence and
really slows things down.
This commit fixes the issue by only creating fmap hashes as needed.
This should not only resolve the perfomance impact of creating fmap's
for all embedded files, but also should improve performance in general.
- Add allmatch check to the zip parser after the central-header meta
match. That way we don't multiple alerts with the same match except in
allmatch mode. Clean up error handling in the zip parser a tiny bit.
- Fixes to ensure that the scan limits such as scansize, filesize,
recursion depth, # of embedded files, and scantime are always reported
if AlertExceedsMax (--alert-exceeds-max) is enabled.
- Fixed an issue where non-fatal alerts for exceeding scan maximums may
mask signature matches later on. I changed it so these alerts use the
"possibly unwanted" alert-type and thus only alert if no other alerts
were found or if all-match or heuristic-precedence are enabled.
- Added the "Heuristics.Limits.Exceeded.*" events to the JSON metadata
when the --gen-json feature is enabled. These will show up once under
"ParseErrors" the first time a limit is exceeded. In the present
implementation, only one limits-exceeded events will be added, so as to
prevent a malicious or malformed sample from filling the JSON buffer
with millions of events and using a tonne of RAM.
2021-09-11 14:15:21 -07:00
|
|
|
done:
|
|
|
|
if (NULL != central_header) {
|
2025-08-14 21:17:46 -04:00
|
|
|
fmap_unneed_ptr(ctx->fmap, central_header, SIZEOF_CENTRAL_HEADER);
|
libclamav: Fix scan recursion tracking
Scan recursion is the process of identifying files embedded in other
files and then scanning them, recursively.
Internally this process is more complex than it may sound because a file
may have multiple layers of types before finding a new "file".
At present we treat the recursion count in the scanning context as an
index into both our fmap list AND our container list. These two lists
are conceptually a part of the same thing and should be unified.
But what's concerning is that the "recursion level" isn't actually
incremented or decremented at the same time that we add a layer to the
fmap or container lists but instead is more touchy-feely, increasing
when we find a new "file".
To account for this shadiness, the size of the fmap and container lists
has always been a little longer than our "max scan recursion" limit so
we don't accidentally overflow the fmap or container arrays (!).
I've implemented a single recursion-stack as an array, similar to before,
which includes a pointer to each fmap at each layer, along with the size
and type. Push and pop functions add and remove layers whenever a new
fmap is added. A boolean argument when pushing indicates if the new layer
represents a new buffer or new file (descriptor). A new buffer will reset
the "nested fmap level" (described below).
This commit also provides a solution for an issue where we detect
embedded files more than once during scan recursion.
For illustration, imagine a tarball named foo.tar.gz with this structure:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| └── baz.exe | PE | 2 | 1 |
But suppose baz.exe embeds a ZIP archive and a 7Z archive, like this:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| baz.exe | PE | 0 | 0 |
| ├── sfx.zip | ZIP | 1 | 1 |
| │  └── hello.txt | ASCII | 2 | 0 |
| └── sfx.7z | 7Z | 1 | 1 |
|   └── world.txt | ASCII | 2 | 0 |
(A) If we scan for embedded files at any layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| ├── foo.tar | TAR | 1 | 0 |
| │ ├── bar.zip | ZIP | 2 | 1 |
| │ │  └── hola.txt | ASCII | 3 | 0 |
| │ ├── baz.exe | PE | 2 | 1 |
| │ │ ├── sfx.zip | ZIP | 3 | 1 |
| │ │ │  └── hello.txt | ASCII | 4 | 0 |
| │ │ └── sfx.7z | 7Z | 3 | 1 |
| │ │   └── world.txt | ASCII | 4 | 0 |
| │ ├── sfx.zip | ZIP | 2 | 1 |
| │ │  └── hello.txt | ASCII | 3 | 0 |
| │ └── sfx.7z | 7Z | 2 | 1 |
| │  └── world.txt | ASCII | 3 | 0 |
| ├── sfx.zip | ZIP | 1 | 1 |
| └── sfx.7z | 7Z | 1 | 1 |
(A) is bad because it scans content more than once.
Note that for the GZ layer, it may detect the ZIP and 7Z if the
signature hits on the compressed data, which it might, though
extracting the ZIP and 7Z will likely fail.
The reason the above doesn't happen now is that we restrict embedded
type scans for a bunch of archive formats to include GZ and TAR.
(B) If we scan for embedded files at the foo.tar layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| ├── baz.exe | PE | 2 | 1 |
| ├── sfx.zip | ZIP | 2 | 1 |
| │  └── hello.txt | ASCII | 3 | 0 |
| └── sfx.7z | 7Z | 2 | 1 |
|   └── world.txt | ASCII | 3 | 0 |
(B) is almost right. But we can achieve it easily enough only scanning for
embedded content in the current fmap when the "nested fmap level" is 0.
The upside is that it should safely detect all embedded content, even if
it may think the sfz.zip and sfx.7z are in foo.tar instead of in baz.exe.
The biggest risk I can think of affects ZIPs. SFXZIP detection
is identical to ZIP detection, which is why we don't allow SFXZIP to be
detected if insize of a ZIP. If we only allow embedded type scanning at
fmap-layer 0 in each buffer, this will fail to detect the embedded ZIP
if the bar.exe was not compressed in foo.zip and if non-compressed files
extracted from ZIPs aren't extracted as new buffers:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.zip | ZIP | 0 | 0 |
| └── bar.exe | PE | 1 | 1 |
| └── sfx.zip | ZIP | 2 | 2 |
Provided that we ensure all files extracted from zips are scanned in
new buffers, option (B) should be safe.
(C) If we scan for embedded files at the baz.exe layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| └── baz.exe | PE | 2 | 1 |
| ├── sfx.zip | ZIP | 3 | 1 |
| │  └── hello.txt | ASCII | 4 | 0 |
| └── sfx.7z | 7Z | 3 | 1 |
|   └── world.txt | ASCII | 4 | 0 |
(C) is right. But it's harder to achieve. For this example we can get it by
restricting 7ZSFX and ZIPSFX detection only when scanning an executable.
But that may mean losing detection of archives embedded elsewhere.
And we'd have to identify allowable container types for each possible
embedded type, which would be very difficult.
So this commit aims to solve the issue the (B)-way.
Note that in all situations, we still have to scan with file typing
enabled to determine if we need to reassign the current file type, such
as re-identifying a Bzip2 archive as a DMG that happens to be Bzip2-
compressed. Detection of DMG and a handful of other types rely on
finding data partway through or near the ned of a file before
reassigning the entire file as the new type.
Other fixes and considerations in this commit:
- The utf16 HTML parser has weak error handling, particularly with respect
to creating a nested fmap for scanning the ascii decoded file.
This commit cleans up the error handling and wraps the nested scan with
the recursion-stack push()/pop() for correct recursion tracking.
Before this commit, each container layer had a flag to indicate if the
container layer is valid.
We need something similar so that the cli_recursion_stack_get_*()
functions ignore normalized layers. Details...
Imagine an LDB signature for HTML content that specifies a ZIP
container. If the signature actually alerts on the normalized HTML and
you don't ignore normalized layers for the container check, it will
appear as though the alert is in an HTML container rather than a ZIP
container.
This commit accomplishes this with a boolean you set in the scan context
before scanning a new layer. Then when the new fmap is created, it will
use that flag to set similar flag for the layer. The context flag is
reset those that anything after this doesn't have that flag.
The flag allows the new recursion_stack_get() function to ignore
normalized layers when iterating the stack to return a layer at a
requested index, negative or positive.
Scanning normalized extracted/normalized javascript and VBA should also
use the 'layer is normalized' flag.
- This commit also fixes Heuristic.Broken.Executable alert for ELF files
to make sure that:
A) these only alert if cli_append_virus() returns CL_VIRUS (aka it
respects the FP check).
B) all broken-executable alerts for ELF only happen if the
SCAN_HEURISTIC_BROKEN option is enabled.
- This commit also cleans up the error handling in cli_magic_scan_dir().
This was needed so we could correctly apply the layer-is-normalized-flag
to all VBA macros extracted to a directory when scanning the directory.
- Also fix an issue where exceeding scan maximums wouldn't cause embedded
file detection scans to abort. Granted we don't actually want to abort
if max filesize or max recursion depth are exceeded... only if max
scansize, max files, and max scantime are exceeded.
Add 'abort_scan' flag to scan context, to protect against depending on
correct error propagation for fatal conditions. Instead, setting this
flag in the scan context should guarantee that a fatal condition deep in
scan recursion isn't lost which result in more stuff being scanned
instead of aborting. This shouldn't be necessary, but some status codes
like CL_ETIMEOUT never used to be fatal and it's easier to do this than
to verify every parser only returns CL_ETIMEOUT and other "fatal
status codes" in fatal conditions.
- Remove duplicate is_tar() prototype from filestypes.c and include
is_tar.h instead.
- Presently we create the fmap hash when creating the fmap.
This wastes a bit of CPU if the hash is never needed.
Now that we're creating fmap's for all embedded files discovered with
file type recognition scans, this is a much more frequent occurence and
really slows things down.
This commit fixes the issue by only creating fmap hashes as needed.
This should not only resolve the perfomance impact of creating fmap's
for all embedded files, but also should improve performance in general.
- Add allmatch check to the zip parser after the central-header meta
match. That way we don't multiple alerts with the same match except in
allmatch mode. Clean up error handling in the zip parser a tiny bit.
- Fixes to ensure that the scan limits such as scansize, filesize,
recursion depth, # of embedded files, and scantime are always reported
if AlertExceedsMax (--alert-exceeds-max) is enabled.
- Fixed an issue where non-fatal alerts for exceeding scan maximums may
mask signature matches later on. I changed it so these alerts use the
"possibly unwanted" alert-type and thus only alert if no other alerts
were found or if all-match or heuristic-precedence are enabled.
- Added the "Heuristics.Limits.Exceeded.*" events to the JSON metadata
when the --gen-json feature is enabled. These will show up once under
"ParseErrors" the first time a limit is exceeded. In the present
implementation, only one limits-exceeded events will be added, so as to
prevent a malicious or malformed sample from filling the JSON buffer
with millions of events and using a tonne of RAM.
2021-09-11 14:15:21 -07:00
|
|
|
}
|
|
|
|
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
return status;
|
2007-12-13 19:47:07 +00:00
|
|
|
}
|
|
|
|
|
2019-08-30 14:48:57 -04:00
|
|
|
/**
|
|
|
|
* @brief Sort zip_record structures based on local file offset.
|
|
|
|
*
|
|
|
|
* @param first
|
|
|
|
* @param second
|
|
|
|
* @return int 1 if first record's offset is higher than second's.
|
2021-08-10 08:41:17 +10:00
|
|
|
* @return int 0 if first and second record offsets are equal.
|
2019-08-30 14:48:57 -04:00
|
|
|
* @return int -1 if first record's offset is less than second's.
|
|
|
|
*/
|
|
|
|
static int sort_by_file_offset(const void *first, const void *second)
|
|
|
|
{
|
|
|
|
const struct zip_record *a = (const struct zip_record *)first;
|
|
|
|
const struct zip_record *b = (const struct zip_record *)second;
|
|
|
|
|
|
|
|
/* Avoid return x - y, which can cause undefined behaviour
|
|
|
|
because of signed integer overflow. */
|
2019-09-08 23:17:43 -04:00
|
|
|
if (a->local_header_offset < b->local_header_offset)
|
2019-08-30 14:48:57 -04:00
|
|
|
return -1;
|
2019-09-08 23:17:43 -04:00
|
|
|
else if (a->local_header_offset > b->local_header_offset)
|
2019-08-30 14:48:57 -04:00
|
|
|
return 1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2019-09-08 23:17:43 -04:00
|
|
|
* @brief Create a catalogue of the central directory.
|
2019-08-30 14:48:57 -04:00
|
|
|
*
|
2019-09-08 23:17:43 -04:00
|
|
|
* This function indexes every file in the central directory.
|
|
|
|
* It creates a zip record catalogue and sorts them by file entry offset.
|
2019-08-30 14:48:57 -04:00
|
|
|
* Then it iterates the sorted file records looking for overlapping files.
|
|
|
|
*
|
2019-09-08 23:17:43 -04:00
|
|
|
* The caller is responsible for freeing the catalogue.
|
|
|
|
* The catalogue may contain duplicate items, which should be skipped.
|
|
|
|
*
|
|
|
|
* @param ctx The scanning context
|
|
|
|
* @param coff The central directory offset
|
|
|
|
* @param[out] catalogue A catalogue of zip_records found in the central directory.
|
|
|
|
* @param[out] num_records The number of records in the catalogue.
|
2025-03-22 22:41:29 -04:00
|
|
|
* @return cl_error_t CL_SUCCESS if no overlapping files
|
2019-08-30 14:48:57 -04:00
|
|
|
* @return cl_error_t CL_VIRUS if overlapping files and heuristic alerts are enabled
|
|
|
|
* @return cl_error_t CL_EFORMAT if overlapping files and heuristic alerts are disabled
|
|
|
|
* @return cl_error_t CL_ETIMEOUT if the scan time limit is exceeded.
|
|
|
|
* @return cl_error_t CL_EMEM for memory allocation errors.
|
|
|
|
*/
|
2019-09-08 23:17:43 -04:00
|
|
|
cl_error_t index_the_central_directory(
|
|
|
|
cli_ctx *ctx,
|
|
|
|
uint32_t coff,
|
|
|
|
struct zip_record **catalogue,
|
|
|
|
size_t *num_records)
|
2019-08-30 14:48:57 -04:00
|
|
|
{
|
2025-03-22 22:41:29 -04:00
|
|
|
cl_error_t status = CL_ERROR;
|
|
|
|
cl_error_t ret;
|
2019-08-30 14:48:57 -04:00
|
|
|
|
2019-09-08 23:17:43 -04:00
|
|
|
size_t num_record_blocks = 0;
|
|
|
|
size_t index = 0;
|
|
|
|
|
2019-08-30 14:48:57 -04:00
|
|
|
struct zip_record *zip_catalogue = NULL;
|
|
|
|
size_t records_count = 0;
|
2019-09-08 23:17:43 -04:00
|
|
|
struct zip_record *curr_record = NULL;
|
|
|
|
struct zip_record *prev_record = NULL;
|
|
|
|
uint32_t num_overlapping_files = 0;
|
libclamav: Fix scan recursion tracking
Scan recursion is the process of identifying files embedded in other
files and then scanning them, recursively.
Internally this process is more complex than it may sound because a file
may have multiple layers of types before finding a new "file".
At present we treat the recursion count in the scanning context as an
index into both our fmap list AND our container list. These two lists
are conceptually a part of the same thing and should be unified.
But what's concerning is that the "recursion level" isn't actually
incremented or decremented at the same time that we add a layer to the
fmap or container lists but instead is more touchy-feely, increasing
when we find a new "file".
To account for this shadiness, the size of the fmap and container lists
has always been a little longer than our "max scan recursion" limit so
we don't accidentally overflow the fmap or container arrays (!).
I've implemented a single recursion-stack as an array, similar to before,
which includes a pointer to each fmap at each layer, along with the size
and type. Push and pop functions add and remove layers whenever a new
fmap is added. A boolean argument when pushing indicates if the new layer
represents a new buffer or new file (descriptor). A new buffer will reset
the "nested fmap level" (described below).
This commit also provides a solution for an issue where we detect
embedded files more than once during scan recursion.
For illustration, imagine a tarball named foo.tar.gz with this structure:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| └── baz.exe | PE | 2 | 1 |
But suppose baz.exe embeds a ZIP archive and a 7Z archive, like this:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| baz.exe | PE | 0 | 0 |
| ├── sfx.zip | ZIP | 1 | 1 |
| │  └── hello.txt | ASCII | 2 | 0 |
| └── sfx.7z | 7Z | 1 | 1 |
|   └── world.txt | ASCII | 2 | 0 |
(A) If we scan for embedded files at any layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| ├── foo.tar | TAR | 1 | 0 |
| │ ├── bar.zip | ZIP | 2 | 1 |
| │ │  └── hola.txt | ASCII | 3 | 0 |
| │ ├── baz.exe | PE | 2 | 1 |
| │ │ ├── sfx.zip | ZIP | 3 | 1 |
| │ │ │  └── hello.txt | ASCII | 4 | 0 |
| │ │ └── sfx.7z | 7Z | 3 | 1 |
| │ │   └── world.txt | ASCII | 4 | 0 |
| │ ├── sfx.zip | ZIP | 2 | 1 |
| │ │  └── hello.txt | ASCII | 3 | 0 |
| │ └── sfx.7z | 7Z | 2 | 1 |
| │  └── world.txt | ASCII | 3 | 0 |
| ├── sfx.zip | ZIP | 1 | 1 |
| └── sfx.7z | 7Z | 1 | 1 |
(A) is bad because it scans content more than once.
Note that for the GZ layer, it may detect the ZIP and 7Z if the
signature hits on the compressed data, which it might, though
extracting the ZIP and 7Z will likely fail.
The reason the above doesn't happen now is that we restrict embedded
type scans for a bunch of archive formats to include GZ and TAR.
(B) If we scan for embedded files at the foo.tar layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| ├── baz.exe | PE | 2 | 1 |
| ├── sfx.zip | ZIP | 2 | 1 |
| │  └── hello.txt | ASCII | 3 | 0 |
| └── sfx.7z | 7Z | 2 | 1 |
|   └── world.txt | ASCII | 3 | 0 |
(B) is almost right. But we can achieve it easily enough only scanning for
embedded content in the current fmap when the "nested fmap level" is 0.
The upside is that it should safely detect all embedded content, even if
it may think the sfz.zip and sfx.7z are in foo.tar instead of in baz.exe.
The biggest risk I can think of affects ZIPs. SFXZIP detection
is identical to ZIP detection, which is why we don't allow SFXZIP to be
detected if insize of a ZIP. If we only allow embedded type scanning at
fmap-layer 0 in each buffer, this will fail to detect the embedded ZIP
if the bar.exe was not compressed in foo.zip and if non-compressed files
extracted from ZIPs aren't extracted as new buffers:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.zip | ZIP | 0 | 0 |
| └── bar.exe | PE | 1 | 1 |
| └── sfx.zip | ZIP | 2 | 2 |
Provided that we ensure all files extracted from zips are scanned in
new buffers, option (B) should be safe.
(C) If we scan for embedded files at the baz.exe layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| └── baz.exe | PE | 2 | 1 |
| ├── sfx.zip | ZIP | 3 | 1 |
| │  └── hello.txt | ASCII | 4 | 0 |
| └── sfx.7z | 7Z | 3 | 1 |
|   └── world.txt | ASCII | 4 | 0 |
(C) is right. But it's harder to achieve. For this example we can get it by
restricting 7ZSFX and ZIPSFX detection only when scanning an executable.
But that may mean losing detection of archives embedded elsewhere.
And we'd have to identify allowable container types for each possible
embedded type, which would be very difficult.
So this commit aims to solve the issue the (B)-way.
Note that in all situations, we still have to scan with file typing
enabled to determine if we need to reassign the current file type, such
as re-identifying a Bzip2 archive as a DMG that happens to be Bzip2-
compressed. Detection of DMG and a handful of other types rely on
finding data partway through or near the ned of a file before
reassigning the entire file as the new type.
Other fixes and considerations in this commit:
- The utf16 HTML parser has weak error handling, particularly with respect
to creating a nested fmap for scanning the ascii decoded file.
This commit cleans up the error handling and wraps the nested scan with
the recursion-stack push()/pop() for correct recursion tracking.
Before this commit, each container layer had a flag to indicate if the
container layer is valid.
We need something similar so that the cli_recursion_stack_get_*()
functions ignore normalized layers. Details...
Imagine an LDB signature for HTML content that specifies a ZIP
container. If the signature actually alerts on the normalized HTML and
you don't ignore normalized layers for the container check, it will
appear as though the alert is in an HTML container rather than a ZIP
container.
This commit accomplishes this with a boolean you set in the scan context
before scanning a new layer. Then when the new fmap is created, it will
use that flag to set similar flag for the layer. The context flag is
reset those that anything after this doesn't have that flag.
The flag allows the new recursion_stack_get() function to ignore
normalized layers when iterating the stack to return a layer at a
requested index, negative or positive.
Scanning normalized extracted/normalized javascript and VBA should also
use the 'layer is normalized' flag.
- This commit also fixes Heuristic.Broken.Executable alert for ELF files
to make sure that:
A) these only alert if cli_append_virus() returns CL_VIRUS (aka it
respects the FP check).
B) all broken-executable alerts for ELF only happen if the
SCAN_HEURISTIC_BROKEN option is enabled.
- This commit also cleans up the error handling in cli_magic_scan_dir().
This was needed so we could correctly apply the layer-is-normalized-flag
to all VBA macros extracted to a directory when scanning the directory.
- Also fix an issue where exceeding scan maximums wouldn't cause embedded
file detection scans to abort. Granted we don't actually want to abort
if max filesize or max recursion depth are exceeded... only if max
scansize, max files, and max scantime are exceeded.
Add 'abort_scan' flag to scan context, to protect against depending on
correct error propagation for fatal conditions. Instead, setting this
flag in the scan context should guarantee that a fatal condition deep in
scan recursion isn't lost which result in more stuff being scanned
instead of aborting. This shouldn't be necessary, but some status codes
like CL_ETIMEOUT never used to be fatal and it's easier to do this than
to verify every parser only returns CL_ETIMEOUT and other "fatal
status codes" in fatal conditions.
- Remove duplicate is_tar() prototype from filestypes.c and include
is_tar.h instead.
- Presently we create the fmap hash when creating the fmap.
This wastes a bit of CPU if the hash is never needed.
Now that we're creating fmap's for all embedded files discovered with
file type recognition scans, this is a much more frequent occurence and
really slows things down.
This commit fixes the issue by only creating fmap hashes as needed.
This should not only resolve the perfomance impact of creating fmap's
for all embedded files, but also should improve performance in general.
- Add allmatch check to the zip parser after the central-header meta
match. That way we don't multiple alerts with the same match except in
allmatch mode. Clean up error handling in the zip parser a tiny bit.
- Fixes to ensure that the scan limits such as scansize, filesize,
recursion depth, # of embedded files, and scantime are always reported
if AlertExceedsMax (--alert-exceeds-max) is enabled.
- Fixed an issue where non-fatal alerts for exceeding scan maximums may
mask signature matches later on. I changed it so these alerts use the
"possibly unwanted" alert-type and thus only alert if no other alerts
were found or if all-match or heuristic-precedence are enabled.
- Added the "Heuristics.Limits.Exceeded.*" events to the JSON metadata
when the --gen-json feature is enabled. These will show up once under
"ParseErrors" the first time a limit is exceeded. In the present
implementation, only one limits-exceeded events will be added, so as to
prevent a malicious or malformed sample from filling the JSON buffer
with millions of events and using a tonne of RAM.
2021-09-11 14:15:21 -07:00
|
|
|
bool exceeded_max_files = false;
|
2019-08-30 14:48:57 -04:00
|
|
|
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
size_t record_size = 0;
|
|
|
|
size_t record_offset = coff;
|
|
|
|
|
2019-09-08 23:17:43 -04:00
|
|
|
if (NULL == catalogue || NULL == num_records) {
|
|
|
|
cli_errmsg("index_the_central_directory: Invalid NULL arguments\n");
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
*catalogue = NULL;
|
|
|
|
*num_records = 0;
|
2019-08-30 14:48:57 -04:00
|
|
|
|
2025-03-22 22:41:29 -04:00
|
|
|
CLI_CALLOC_OR_GOTO_DONE(
|
|
|
|
zip_catalogue,
|
|
|
|
1,
|
|
|
|
sizeof(struct zip_record) * ZIP_RECORDS_CHECK_BLOCKSIZE,
|
|
|
|
status = CL_EMEM);
|
|
|
|
|
2019-08-30 14:48:57 -04:00
|
|
|
num_record_blocks = 1;
|
|
|
|
|
|
|
|
cli_dbgmsg("cli_unzip: checking for non-recursive zip bombs...\n");
|
|
|
|
|
2021-10-21 13:18:29 -07:00
|
|
|
do {
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
ret = parse_central_directory_file_header(
|
2025-08-14 21:17:46 -04:00
|
|
|
ctx,
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
record_offset,
|
|
|
|
NULL, // num_files_unzipped not required
|
|
|
|
records_count + 1,
|
|
|
|
NULL, // tmpd not required
|
|
|
|
NULL,
|
|
|
|
&(zip_catalogue[records_count]),
|
|
|
|
&record_size);
|
|
|
|
|
|
|
|
if (ret == CL_VIRUS) {
|
|
|
|
// Aborting scan due to a detection (not in all match mode).
|
|
|
|
status = CL_VIRUS;
|
|
|
|
goto done;
|
2021-10-21 13:18:29 -07:00
|
|
|
}
|
|
|
|
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
if (record_size == 0) {
|
2021-10-21 13:18:29 -07:00
|
|
|
// No more files (previous was last).
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
// Found a record.
|
|
|
|
records_count++;
|
2020-06-11 16:42:08 +02:00
|
|
|
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
// Increment the record offset by the size of the record for the next iteration.
|
|
|
|
record_offset += record_size;
|
2019-08-30 14:48:57 -04:00
|
|
|
|
|
|
|
if (cli_checktimelimit(ctx) != CL_SUCCESS) {
|
|
|
|
cli_dbgmsg("cli_unzip: Time limit reached (max: %u)\n", ctx->engine->maxscantime);
|
|
|
|
status = CL_ETIMEOUT;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* stop checking file entries if we'll exceed maxfiles */
|
|
|
|
if (ctx->engine->maxfiles && records_count >= ctx->engine->maxfiles) {
|
|
|
|
cli_dbgmsg("cli_unzip: Files limit reached (max: %u)\n", ctx->engine->maxfiles);
|
2022-08-19 16:21:42 -07:00
|
|
|
cli_append_potentially_unwanted_if_heur_exceedsmax(ctx, "Heuristics.Limits.Exceeded.MaxFiles");
|
libclamav: Fix scan recursion tracking
Scan recursion is the process of identifying files embedded in other
files and then scanning them, recursively.
Internally this process is more complex than it may sound because a file
may have multiple layers of types before finding a new "file".
At present we treat the recursion count in the scanning context as an
index into both our fmap list AND our container list. These two lists
are conceptually a part of the same thing and should be unified.
But what's concerning is that the "recursion level" isn't actually
incremented or decremented at the same time that we add a layer to the
fmap or container lists but instead is more touchy-feely, increasing
when we find a new "file".
To account for this shadiness, the size of the fmap and container lists
has always been a little longer than our "max scan recursion" limit so
we don't accidentally overflow the fmap or container arrays (!).
I've implemented a single recursion-stack as an array, similar to before,
which includes a pointer to each fmap at each layer, along with the size
and type. Push and pop functions add and remove layers whenever a new
fmap is added. A boolean argument when pushing indicates if the new layer
represents a new buffer or new file (descriptor). A new buffer will reset
the "nested fmap level" (described below).
This commit also provides a solution for an issue where we detect
embedded files more than once during scan recursion.
For illustration, imagine a tarball named foo.tar.gz with this structure:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| └── baz.exe | PE | 2 | 1 |
But suppose baz.exe embeds a ZIP archive and a 7Z archive, like this:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| baz.exe | PE | 0 | 0 |
| ├── sfx.zip | ZIP | 1 | 1 |
| │  └── hello.txt | ASCII | 2 | 0 |
| └── sfx.7z | 7Z | 1 | 1 |
|   └── world.txt | ASCII | 2 | 0 |
(A) If we scan for embedded files at any layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| ├── foo.tar | TAR | 1 | 0 |
| │ ├── bar.zip | ZIP | 2 | 1 |
| │ │  └── hola.txt | ASCII | 3 | 0 |
| │ ├── baz.exe | PE | 2 | 1 |
| │ │ ├── sfx.zip | ZIP | 3 | 1 |
| │ │ │  └── hello.txt | ASCII | 4 | 0 |
| │ │ └── sfx.7z | 7Z | 3 | 1 |
| │ │   └── world.txt | ASCII | 4 | 0 |
| │ ├── sfx.zip | ZIP | 2 | 1 |
| │ │  └── hello.txt | ASCII | 3 | 0 |
| │ └── sfx.7z | 7Z | 2 | 1 |
| │  └── world.txt | ASCII | 3 | 0 |
| ├── sfx.zip | ZIP | 1 | 1 |
| └── sfx.7z | 7Z | 1 | 1 |
(A) is bad because it scans content more than once.
Note that for the GZ layer, it may detect the ZIP and 7Z if the
signature hits on the compressed data, which it might, though
extracting the ZIP and 7Z will likely fail.
The reason the above doesn't happen now is that we restrict embedded
type scans for a bunch of archive formats to include GZ and TAR.
(B) If we scan for embedded files at the foo.tar layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| ├── baz.exe | PE | 2 | 1 |
| ├── sfx.zip | ZIP | 2 | 1 |
| │  └── hello.txt | ASCII | 3 | 0 |
| └── sfx.7z | 7Z | 2 | 1 |
|   └── world.txt | ASCII | 3 | 0 |
(B) is almost right. But we can achieve it easily enough only scanning for
embedded content in the current fmap when the "nested fmap level" is 0.
The upside is that it should safely detect all embedded content, even if
it may think the sfz.zip and sfx.7z are in foo.tar instead of in baz.exe.
The biggest risk I can think of affects ZIPs. SFXZIP detection
is identical to ZIP detection, which is why we don't allow SFXZIP to be
detected if insize of a ZIP. If we only allow embedded type scanning at
fmap-layer 0 in each buffer, this will fail to detect the embedded ZIP
if the bar.exe was not compressed in foo.zip and if non-compressed files
extracted from ZIPs aren't extracted as new buffers:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.zip | ZIP | 0 | 0 |
| └── bar.exe | PE | 1 | 1 |
| └── sfx.zip | ZIP | 2 | 2 |
Provided that we ensure all files extracted from zips are scanned in
new buffers, option (B) should be safe.
(C) If we scan for embedded files at the baz.exe layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| └── baz.exe | PE | 2 | 1 |
| ├── sfx.zip | ZIP | 3 | 1 |
| │  └── hello.txt | ASCII | 4 | 0 |
| └── sfx.7z | 7Z | 3 | 1 |
|   └── world.txt | ASCII | 4 | 0 |
(C) is right. But it's harder to achieve. For this example we can get it by
restricting 7ZSFX and ZIPSFX detection only when scanning an executable.
But that may mean losing detection of archives embedded elsewhere.
And we'd have to identify allowable container types for each possible
embedded type, which would be very difficult.
So this commit aims to solve the issue the (B)-way.
Note that in all situations, we still have to scan with file typing
enabled to determine if we need to reassign the current file type, such
as re-identifying a Bzip2 archive as a DMG that happens to be Bzip2-
compressed. Detection of DMG and a handful of other types rely on
finding data partway through or near the ned of a file before
reassigning the entire file as the new type.
Other fixes and considerations in this commit:
- The utf16 HTML parser has weak error handling, particularly with respect
to creating a nested fmap for scanning the ascii decoded file.
This commit cleans up the error handling and wraps the nested scan with
the recursion-stack push()/pop() for correct recursion tracking.
Before this commit, each container layer had a flag to indicate if the
container layer is valid.
We need something similar so that the cli_recursion_stack_get_*()
functions ignore normalized layers. Details...
Imagine an LDB signature for HTML content that specifies a ZIP
container. If the signature actually alerts on the normalized HTML and
you don't ignore normalized layers for the container check, it will
appear as though the alert is in an HTML container rather than a ZIP
container.
This commit accomplishes this with a boolean you set in the scan context
before scanning a new layer. Then when the new fmap is created, it will
use that flag to set similar flag for the layer. The context flag is
reset those that anything after this doesn't have that flag.
The flag allows the new recursion_stack_get() function to ignore
normalized layers when iterating the stack to return a layer at a
requested index, negative or positive.
Scanning normalized extracted/normalized javascript and VBA should also
use the 'layer is normalized' flag.
- This commit also fixes Heuristic.Broken.Executable alert for ELF files
to make sure that:
A) these only alert if cli_append_virus() returns CL_VIRUS (aka it
respects the FP check).
B) all broken-executable alerts for ELF only happen if the
SCAN_HEURISTIC_BROKEN option is enabled.
- This commit also cleans up the error handling in cli_magic_scan_dir().
This was needed so we could correctly apply the layer-is-normalized-flag
to all VBA macros extracted to a directory when scanning the directory.
- Also fix an issue where exceeding scan maximums wouldn't cause embedded
file detection scans to abort. Granted we don't actually want to abort
if max filesize or max recursion depth are exceeded... only if max
scansize, max files, and max scantime are exceeded.
Add 'abort_scan' flag to scan context, to protect against depending on
correct error propagation for fatal conditions. Instead, setting this
flag in the scan context should guarantee that a fatal condition deep in
scan recursion isn't lost which result in more stuff being scanned
instead of aborting. This shouldn't be necessary, but some status codes
like CL_ETIMEOUT never used to be fatal and it's easier to do this than
to verify every parser only returns CL_ETIMEOUT and other "fatal
status codes" in fatal conditions.
- Remove duplicate is_tar() prototype from filestypes.c and include
is_tar.h instead.
- Presently we create the fmap hash when creating the fmap.
This wastes a bit of CPU if the hash is never needed.
Now that we're creating fmap's for all embedded files discovered with
file type recognition scans, this is a much more frequent occurence and
really slows things down.
This commit fixes the issue by only creating fmap hashes as needed.
This should not only resolve the perfomance impact of creating fmap's
for all embedded files, but also should improve performance in general.
- Add allmatch check to the zip parser after the central-header meta
match. That way we don't multiple alerts with the same match except in
allmatch mode. Clean up error handling in the zip parser a tiny bit.
- Fixes to ensure that the scan limits such as scansize, filesize,
recursion depth, # of embedded files, and scantime are always reported
if AlertExceedsMax (--alert-exceeds-max) is enabled.
- Fixed an issue where non-fatal alerts for exceeding scan maximums may
mask signature matches later on. I changed it so these alerts use the
"possibly unwanted" alert-type and thus only alert if no other alerts
were found or if all-match or heuristic-precedence are enabled.
- Added the "Heuristics.Limits.Exceeded.*" events to the JSON metadata
when the --gen-json feature is enabled. These will show up once under
"ParseErrors" the first time a limit is exceeded. In the present
implementation, only one limits-exceeded events will be added, so as to
prevent a malicious or malformed sample from filling the JSON buffer
with millions of events and using a tonne of RAM.
2021-09-11 14:15:21 -07:00
|
|
|
exceeded_max_files = true; // Set a bool so we can return the correct status code later.
|
|
|
|
// We still need to scan the files we found while reviewing the file records up to this limit.
|
2019-08-30 14:48:57 -04:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
if (num_record_blocks * ZIP_RECORDS_CHECK_BLOCKSIZE == records_count + 1) {
|
|
|
|
cli_dbgmsg("cli_unzip: Filled a block of zip records. Allocating an additional block for more zip records...\n");
|
2019-08-30 14:48:57 -04:00
|
|
|
|
2025-03-22 22:41:29 -04:00
|
|
|
CLI_MAX_REALLOC_OR_GOTO_DONE(
|
|
|
|
zip_catalogue,
|
|
|
|
sizeof(struct zip_record) * ZIP_RECORDS_CHECK_BLOCKSIZE * (num_record_blocks + 1),
|
|
|
|
status = CL_EMEM);
|
2021-10-21 13:18:29 -07:00
|
|
|
|
2019-08-30 14:48:57 -04:00
|
|
|
num_record_blocks++;
|
|
|
|
/* zero out the memory for the new records */
|
2025-03-22 22:41:29 -04:00
|
|
|
memset(&(zip_catalogue[records_count]), 0,
|
2025-03-25 15:37:12 -04:00
|
|
|
sizeof(struct zip_record) * (ZIP_RECORDS_CHECK_BLOCKSIZE * num_record_blocks - records_count));
|
2019-08-30 14:48:57 -04:00
|
|
|
}
|
2021-10-21 13:18:29 -07:00
|
|
|
} while (1);
|
2019-08-30 14:48:57 -04:00
|
|
|
|
2019-09-08 23:17:43 -04:00
|
|
|
if (records_count > 1) {
|
|
|
|
/*
|
|
|
|
* Sort the records by local file offset
|
|
|
|
*/
|
|
|
|
cli_qsort(zip_catalogue, records_count, sizeof(struct zip_record), sort_by_file_offset);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Detect overlapping files.
|
|
|
|
*/
|
|
|
|
for (index = 1; index < records_count; index++) {
|
|
|
|
prev_record = &(zip_catalogue[index - 1]);
|
|
|
|
curr_record = &(zip_catalogue[index]);
|
|
|
|
|
2022-07-18 11:38:00 -07:00
|
|
|
uint32_t prev_record_size = prev_record->local_header_size + prev_record->compressed_size;
|
|
|
|
uint32_t curr_record_size = curr_record->local_header_size + curr_record->compressed_size;
|
|
|
|
uint32_t prev_record_end;
|
|
|
|
uint32_t curr_record_end;
|
|
|
|
|
2019-09-08 23:17:43 -04:00
|
|
|
/* Check for integer overflow in 32bit size & offset values */
|
2022-07-18 11:38:00 -07:00
|
|
|
if ((UINT32_MAX - prev_record_size < prev_record->local_header_offset) ||
|
|
|
|
(UINT32_MAX - curr_record_size < curr_record->local_header_offset)) {
|
2019-09-08 23:17:43 -04:00
|
|
|
cli_dbgmsg("cli_unzip: Integer overflow detected; invalid data sizes in zip file headers.\n");
|
|
|
|
status = CL_EFORMAT;
|
|
|
|
goto done;
|
|
|
|
}
|
2019-08-30 14:48:57 -04:00
|
|
|
|
2022-07-18 11:38:00 -07:00
|
|
|
prev_record_end = prev_record->local_header_offset + prev_record_size;
|
|
|
|
curr_record_end = curr_record->local_header_offset + curr_record_size;
|
|
|
|
|
|
|
|
if (((curr_record->local_header_offset >= prev_record->local_header_offset) && (curr_record->local_header_offset + ZIP_RECORD_OVERLAP_FUDGE_FACTOR < prev_record_end)) ||
|
|
|
|
((prev_record->local_header_offset >= curr_record->local_header_offset) && (prev_record->local_header_offset + ZIP_RECORD_OVERLAP_FUDGE_FACTOR < curr_record_end))) {
|
2019-09-08 23:17:43 -04:00
|
|
|
/* Overlapping file detected */
|
|
|
|
num_overlapping_files++;
|
2019-08-30 14:48:57 -04:00
|
|
|
|
2019-09-08 23:17:43 -04:00
|
|
|
if ((curr_record->local_header_offset == prev_record->local_header_offset) &&
|
|
|
|
(curr_record->local_header_size == prev_record->local_header_size) &&
|
|
|
|
(curr_record->compressed_size == prev_record->compressed_size)) {
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
cli_dbgmsg("cli_unzip: Ignoring duplicate file entry at offset: 0x%x.\n", curr_record->local_header_offset);
|
2019-08-30 14:48:57 -04:00
|
|
|
} else {
|
2019-09-08 23:17:43 -04:00
|
|
|
cli_dbgmsg("cli_unzip: Overlapping files detected.\n");
|
2022-07-18 11:38:00 -07:00
|
|
|
cli_dbgmsg(" previous file end: %u\n", prev_record_end);
|
2019-09-08 23:17:43 -04:00
|
|
|
cli_dbgmsg(" current file start: %u\n", curr_record->local_header_offset);
|
|
|
|
|
|
|
|
if (ZIP_MAX_NUM_OVERLAPPING_FILES < num_overlapping_files) {
|
2025-09-15 18:01:03 -04:00
|
|
|
status = CL_EFORMAT;
|
|
|
|
|
2019-09-08 23:17:43 -04:00
|
|
|
if (SCAN_HEURISTICS) {
|
2025-09-15 18:01:03 -04:00
|
|
|
ret = cli_append_potentially_unwanted(ctx, "Heuristics.Zip.OverlappingFiles");
|
|
|
|
if (CL_SUCCESS != ret) {
|
|
|
|
status = ret;
|
|
|
|
}
|
2019-09-08 23:17:43 -04:00
|
|
|
}
|
2025-09-15 18:01:03 -04:00
|
|
|
|
2019-09-08 23:17:43 -04:00
|
|
|
goto done;
|
|
|
|
}
|
2019-08-30 14:48:57 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-09-08 23:17:43 -04:00
|
|
|
if (cli_checktimelimit(ctx) != CL_SUCCESS) {
|
|
|
|
cli_dbgmsg("cli_unzip: Time limit reached (max: %u)\n", ctx->engine->maxscantime);
|
|
|
|
status = CL_ETIMEOUT;
|
|
|
|
goto done;
|
|
|
|
}
|
2019-08-30 14:48:57 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-09-08 23:17:43 -04:00
|
|
|
*catalogue = zip_catalogue;
|
|
|
|
*num_records = records_count;
|
|
|
|
status = CL_SUCCESS;
|
|
|
|
|
2019-08-30 14:48:57 -04:00
|
|
|
done:
|
|
|
|
|
2019-09-08 23:17:43 -04:00
|
|
|
if (CL_SUCCESS != status) {
|
|
|
|
if (NULL != zip_catalogue) {
|
2021-10-21 13:18:29 -07:00
|
|
|
size_t i;
|
|
|
|
for (i = 0; i < records_count; i++) {
|
|
|
|
if (NULL != zip_catalogue[i].original_filename) {
|
|
|
|
free(zip_catalogue[i].original_filename);
|
|
|
|
zip_catalogue[i].original_filename = NULL;
|
|
|
|
}
|
|
|
|
}
|
2019-09-08 23:17:43 -04:00
|
|
|
free(zip_catalogue);
|
|
|
|
zip_catalogue = NULL;
|
|
|
|
}
|
2019-08-30 14:48:57 -04:00
|
|
|
|
2022-08-27 10:30:51 -07:00
|
|
|
if (exceeded_max_files) {
|
|
|
|
status = CL_EMAXFILES;
|
|
|
|
}
|
|
|
|
}
|
2020-06-11 16:42:08 +02:00
|
|
|
|
2019-08-30 14:48:57 -04:00
|
|
|
return status;
|
|
|
|
}
|
|
|
|
|
2025-02-26 21:12:58 -05:00
|
|
|
/**
|
2025-03-22 22:41:29 -04:00
|
|
|
* @brief Index local file headers between two file offsets
|
2025-02-26 21:12:58 -05:00
|
|
|
*
|
|
|
|
* This function indexes every file within certain file offsets in a zip file.
|
|
|
|
* It places the indexed local file headers into a catalogue. If there are
|
2025-03-22 22:41:29 -04:00
|
|
|
* already elements in the catalogue, it appends the found files to the
|
2025-02-26 21:12:58 -05:00
|
|
|
* catalogue.
|
|
|
|
*
|
|
|
|
* The caller is responsible for freeing the catalogue.
|
|
|
|
* The catalogue may contain duplicate items, which should be skipped.
|
|
|
|
*
|
|
|
|
* @param ctx The scanning context
|
|
|
|
* @param map The file map
|
|
|
|
* @param fsize The file size
|
|
|
|
* @param start_offset The start file offset
|
|
|
|
* @param end_offset The end file offset
|
|
|
|
* @param file_count The number of files extracted from the zip file thus far
|
|
|
|
* @param[out] temp_catalogue A catalogue of zip_records. Found files between the two offset bounds will be appended to this list.
|
|
|
|
* @param[out] num_records The number of records in the catalogue.
|
2025-03-22 22:41:29 -04:00
|
|
|
* @return cl_error_t CL_SUCCESS if no overlapping files
|
2025-02-26 21:12:58 -05:00
|
|
|
* @return cl_error_t CL_VIRUS if overlapping files and heuristic alerts are enabled
|
|
|
|
* @return cl_error_t CL_EFORMAT if overlapping files and heuristic alerts are disabled
|
|
|
|
* @return cl_error_t CL_ETIMEOUT if the scan time limit is exceeded.
|
|
|
|
* @return cl_error_t CL_EMEM for memory allocation errors.
|
|
|
|
*/
|
|
|
|
cl_error_t index_local_file_headers_within_bounds(
|
|
|
|
cli_ctx *ctx,
|
|
|
|
fmap_t *map,
|
|
|
|
uint32_t fsize,
|
|
|
|
uint32_t start_offset,
|
|
|
|
uint32_t end_offset,
|
|
|
|
uint32_t file_count,
|
|
|
|
struct zip_record **temp_catalogue,
|
|
|
|
size_t *num_records)
|
|
|
|
{
|
2025-03-22 22:41:29 -04:00
|
|
|
cl_error_t status = CL_ERROR;
|
|
|
|
cl_error_t ret;
|
2025-02-26 21:12:58 -05:00
|
|
|
|
|
|
|
size_t num_record_blocks = 0;
|
|
|
|
size_t index = 0;
|
|
|
|
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
uint32_t search_offset = 0;
|
2025-03-22 22:41:29 -04:00
|
|
|
uint32_t total_file_count = file_count;
|
2025-02-26 21:12:58 -05:00
|
|
|
struct zip_record *zip_catalogue = NULL;
|
|
|
|
bool exceeded_max_files = false;
|
|
|
|
|
|
|
|
if (NULL == temp_catalogue || NULL == num_records) {
|
|
|
|
cli_errmsg("index_local_file_headers_within_bounds: Invalid NULL arguments\n");
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
zip_catalogue = *temp_catalogue;
|
2025-03-22 22:41:29 -04:00
|
|
|
|
2025-02-26 21:12:58 -05:00
|
|
|
/*
|
|
|
|
* Allocate zip_record if it is empty. If not empty, we will append file headers to the list
|
|
|
|
*/
|
|
|
|
if (NULL == zip_catalogue) {
|
2025-03-22 22:41:29 -04:00
|
|
|
CLI_CALLOC_OR_GOTO_DONE(
|
|
|
|
zip_catalogue,
|
|
|
|
1,
|
|
|
|
sizeof(struct zip_record) * ZIP_RECORDS_CHECK_BLOCKSIZE,
|
|
|
|
status = CL_EMEM);
|
2025-02-26 21:12:58 -05:00
|
|
|
|
2025-03-22 22:41:29 -04:00
|
|
|
*num_records = 0;
|
2025-02-26 21:12:58 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
num_record_blocks = (*num_records / ZIP_RECORDS_CHECK_BLOCKSIZE) + 1;
|
|
|
|
index = *num_records;
|
|
|
|
|
|
|
|
if (start_offset > fsize || end_offset > fsize || start_offset > end_offset) {
|
2025-10-12 19:12:06 -04:00
|
|
|
cli_errmsg("index_local_file_headers_within_bounds: Invalid offset arguments: start_offset=%u, end_offset=%u, fsize=%u\n",
|
|
|
|
start_offset, end_offset, fsize);
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
status = CL_EPARSE;
|
2025-02-26 21:12:58 -05:00
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Search for local file headers between the start and end offsets. Append found file headers to zip_catalogue
|
|
|
|
*/
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
for (search_offset = start_offset; search_offset < end_offset; search_offset++) {
|
|
|
|
const char *local_file_header = fmap_need_off_once(map, search_offset, SIZEOF_LOCAL_HEADER);
|
|
|
|
if (NULL == local_file_header) {
|
|
|
|
break; // Reached the end of the file.
|
|
|
|
}
|
|
|
|
|
|
|
|
if (cli_readint32(local_file_header) == ZIP_MAGIC_LOCAL_FILE_HEADER) {
|
|
|
|
uint32_t local_file_header_offset = search_offset;
|
|
|
|
size_t file_record_size = 0;
|
|
|
|
|
|
|
|
ret = parse_local_file_header(
|
2025-03-22 22:41:29 -04:00
|
|
|
ctx,
|
2025-08-14 21:17:46 -04:00
|
|
|
local_file_header_offset,
|
|
|
|
NULL, /* num_files_unzipped */
|
|
|
|
total_file_count + 1, /* file_count */
|
|
|
|
NULL, /* central_header */
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
NULL, /* tmpd */
|
|
|
|
1, /* detect_encrypted */
|
|
|
|
NULL, /* zcb */
|
|
|
|
&(zip_catalogue[index]), /* record */
|
|
|
|
&file_record_size); /* file_record_size */
|
2025-02-26 21:12:58 -05:00
|
|
|
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
if (file_record_size != 0 && CL_EPARSE != ret) {
|
2025-02-26 21:12:58 -05:00
|
|
|
// Found a record.
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
cli_dbgmsg("cli_unzip: Found a record\n");
|
2025-02-26 21:12:58 -05:00
|
|
|
index++;
|
|
|
|
total_file_count++;
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
|
|
|
|
// increment search_offset by the size of the found local file header + file data
|
|
|
|
// but decrement by 1 to account for the increment at the end of the loop
|
|
|
|
search_offset += file_record_size - 1;
|
2025-02-26 21:12:58 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
if (ret == CL_VIRUS) {
|
|
|
|
status = CL_VIRUS;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (cli_checktimelimit(ctx) != CL_SUCCESS) {
|
|
|
|
cli_dbgmsg("cli_unzip: Time limit reached (max: %u)\n", ctx->engine->maxscantime);
|
|
|
|
status = CL_ETIMEOUT;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* stop checking file entries if we'll exceed maxfiles */
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
if (ctx->engine->maxfiles && total_file_count >= ctx->engine->maxfiles) {
|
2025-02-26 21:12:58 -05:00
|
|
|
cli_dbgmsg("cli_unzip: Files limit reached (max: %u)\n", ctx->engine->maxfiles);
|
|
|
|
cli_append_potentially_unwanted_if_heur_exceedsmax(ctx, "Heuristics.Limits.Exceeded.MaxFiles");
|
|
|
|
exceeded_max_files = true; // Set a bool so we can return the correct status code later.
|
|
|
|
// We still need to scan the files we found while reviewing the file records up to this limit.
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
if (num_record_blocks * ZIP_RECORDS_CHECK_BLOCKSIZE == index + 1) {
|
|
|
|
// Filled up the current block of zip records, need to allocate more space to fit additional records.
|
|
|
|
cli_dbgmsg("cli_unzip: Filled a zip record block. Allocating an additional block for more zip records...\n");
|
2025-02-26 21:12:58 -05:00
|
|
|
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
CLI_MAX_REALLOC_OR_GOTO_DONE(
|
|
|
|
zip_catalogue,
|
|
|
|
sizeof(struct zip_record) * ZIP_RECORDS_CHECK_BLOCKSIZE * (num_record_blocks + 1),
|
|
|
|
status = CL_EMEM);
|
2025-02-26 21:12:58 -05:00
|
|
|
|
|
|
|
num_record_blocks++;
|
|
|
|
/* zero out the memory for the new records */
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
memset(&(zip_catalogue[index]), 0,
|
|
|
|
sizeof(struct zip_record) * (ZIP_RECORDS_CHECK_BLOCKSIZE * num_record_blocks - index));
|
2025-02-26 21:12:58 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
*temp_catalogue = zip_catalogue;
|
2025-03-22 22:41:29 -04:00
|
|
|
*num_records = index;
|
|
|
|
status = CL_SUCCESS;
|
2025-02-26 21:12:58 -05:00
|
|
|
|
|
|
|
done:
|
|
|
|
if (CL_SUCCESS != status) {
|
|
|
|
if (NULL != zip_catalogue) {
|
|
|
|
size_t i;
|
|
|
|
for (i = 0; i < index; i++) {
|
|
|
|
if (NULL != zip_catalogue[i].original_filename) {
|
|
|
|
free(zip_catalogue[i].original_filename);
|
|
|
|
zip_catalogue[i].original_filename = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
free(zip_catalogue);
|
2025-03-22 22:41:29 -04:00
|
|
|
zip_catalogue = NULL;
|
2025-02-26 21:12:58 -05:00
|
|
|
*temp_catalogue = NULL; // zip_catalogue and *temp_catalogue have the same value. Set temp_catalogue to NULL to ensure no use after free
|
|
|
|
}
|
|
|
|
|
|
|
|
if (exceeded_max_files) {
|
|
|
|
status = CL_EMAXFILES;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return status;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2025-03-22 22:41:29 -04:00
|
|
|
* @brief Add files not present in the central directory to the catalogue
|
2025-02-26 21:12:58 -05:00
|
|
|
*
|
|
|
|
* This function indexes every file not present in the central directory.
|
2025-03-22 22:41:29 -04:00
|
|
|
* It searches through all the local file headers in the zip file and
|
2025-02-26 21:12:58 -05:00
|
|
|
* adds any that are found that were not already in the catalogue.
|
|
|
|
*
|
|
|
|
* The caller is responsible for freeing the catalogue.
|
|
|
|
* The catalogue may contain duplicate items, which should be skipped.
|
|
|
|
*
|
|
|
|
* @param ctx The scanning context
|
|
|
|
* @param map The file map
|
|
|
|
* @param fsize The file size
|
|
|
|
* @param[in, out] catalogue A catalogue of zip_records found in the central directory.
|
|
|
|
* @param[in, out] num_records The number of records in the catalogue.
|
2025-03-22 22:41:29 -04:00
|
|
|
* @return cl_error_t CL_SUCCESS if no overlapping files
|
2025-02-26 21:12:58 -05:00
|
|
|
* @return cl_error_t CL_VIRUS if overlapping files and heuristic alerts are enabled
|
|
|
|
* @return cl_error_t CL_EFORMAT if overlapping files and heuristic alerts are disabled
|
|
|
|
* @return cl_error_t CL_ETIMEOUT if the scan time limit is exceeded.
|
|
|
|
* @return cl_error_t CL_EMEM for memory allocation errors.
|
|
|
|
*/
|
|
|
|
cl_error_t index_local_file_headers(
|
|
|
|
cli_ctx *ctx,
|
|
|
|
fmap_t *map,
|
|
|
|
uint32_t fsize,
|
|
|
|
struct zip_record **catalogue,
|
|
|
|
size_t *num_records)
|
|
|
|
{
|
2025-03-22 22:41:29 -04:00
|
|
|
cl_error_t status = CL_ERROR;
|
|
|
|
cl_error_t ret;
|
2025-02-26 21:12:58 -05:00
|
|
|
|
|
|
|
uint32_t i = 0;
|
|
|
|
uint32_t start_offset = 0;
|
|
|
|
uint32_t end_offset = 0;
|
|
|
|
size_t total_files_found = 0;
|
|
|
|
|
2025-03-22 22:41:29 -04:00
|
|
|
struct zip_record *temp_catalogue = NULL;
|
2025-02-26 21:12:58 -05:00
|
|
|
struct zip_record *combined_catalogue = NULL;
|
2025-03-22 22:41:29 -04:00
|
|
|
struct zip_record *curr_record = NULL;
|
|
|
|
struct zip_record *next_record = NULL;
|
|
|
|
struct zip_record *prev_record = NULL;
|
|
|
|
size_t local_file_headers_count = 0;
|
|
|
|
uint32_t num_overlapping_files = 0;
|
2025-02-26 21:12:58 -05:00
|
|
|
|
|
|
|
if (NULL == catalogue || NULL == num_records || NULL == *catalogue) {
|
2025-03-25 15:37:12 -04:00
|
|
|
cli_dbgmsg("index_local_file_headers: Invalid NULL arguments\n");
|
2025-02-26 21:12:58 -05:00
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
total_files_found = *num_records;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Generate a list of zip records found before, between, and after the zip records already in catalogue
|
|
|
|
* First, scan between the start of the file and the first zip_record offset (or the end of the file if no zip_records have been found)
|
|
|
|
*/
|
|
|
|
if (*num_records == 0) {
|
|
|
|
end_offset = fsize;
|
|
|
|
} else {
|
|
|
|
end_offset = (*catalogue)[0].local_header_offset;
|
|
|
|
}
|
|
|
|
|
2025-03-22 22:41:29 -04:00
|
|
|
ret = index_local_file_headers_within_bounds(
|
|
|
|
ctx,
|
|
|
|
map,
|
|
|
|
fsize,
|
|
|
|
start_offset,
|
|
|
|
end_offset,
|
|
|
|
total_files_found,
|
|
|
|
&temp_catalogue,
|
|
|
|
&local_file_headers_count);
|
2025-02-26 21:12:58 -05:00
|
|
|
if (CL_SUCCESS != ret) {
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
total_files_found += local_file_headers_count;
|
|
|
|
|
|
|
|
/*
|
2025-03-22 22:41:29 -04:00
|
|
|
* Search for zip records between the zip records already in the catalogue
|
2025-02-26 21:12:58 -05:00
|
|
|
*/
|
|
|
|
for (i = 0; i < *num_records; i++) {
|
|
|
|
// Before searching for more files, check if number of found files exceeds maxfiles
|
|
|
|
if (ctx->engine->maxfiles && total_files_found >= ctx->engine->maxfiles) {
|
|
|
|
cli_dbgmsg("cli_unzip: Files limit reached (max: %u)\n", ctx->engine->maxfiles);
|
|
|
|
cli_append_potentially_unwanted_if_heur_exceedsmax(ctx, "Heuristics.Limits.Exceeded.MaxFiles");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2025-03-22 22:41:29 -04:00
|
|
|
curr_record = &((*catalogue)[i]);
|
|
|
|
start_offset = curr_record->local_header_offset + curr_record->local_header_size + curr_record->compressed_size;
|
2025-02-26 21:12:58 -05:00
|
|
|
if (i + 1 == *num_records) {
|
|
|
|
end_offset = fsize;
|
|
|
|
} else {
|
|
|
|
next_record = &((*catalogue)[i + 1]);
|
2025-03-25 15:37:12 -04:00
|
|
|
end_offset = next_record->local_header_offset;
|
2025-02-26 21:12:58 -05:00
|
|
|
}
|
|
|
|
|
2025-03-22 22:41:29 -04:00
|
|
|
ret = index_local_file_headers_within_bounds(
|
|
|
|
ctx,
|
|
|
|
map,
|
|
|
|
fsize,
|
|
|
|
start_offset,
|
|
|
|
end_offset,
|
|
|
|
total_files_found,
|
|
|
|
&temp_catalogue,
|
|
|
|
&local_file_headers_count);
|
2025-02-26 21:12:58 -05:00
|
|
|
if (CL_SUCCESS != ret) {
|
|
|
|
status = ret;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
total_files_found = *num_records + local_file_headers_count;
|
|
|
|
|
|
|
|
if (cli_checktimelimit(ctx) != CL_SUCCESS) {
|
|
|
|
cli_dbgmsg("cli_unzip: Time limit reached (max: %u)\n", ctx->engine->maxscantime);
|
|
|
|
status = CL_ETIMEOUT;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2025-03-22 22:41:29 -04:00
|
|
|
* Combine the zip records already in the catalogue with the recently found zip records
|
2025-02-26 21:12:58 -05:00
|
|
|
* Only do this if new zip records were found
|
2025-03-22 22:41:29 -04:00
|
|
|
*/
|
2025-02-26 21:12:58 -05:00
|
|
|
if (local_file_headers_count > 0) {
|
2025-03-22 22:41:29 -04:00
|
|
|
CLI_CALLOC_OR_GOTO_DONE(
|
|
|
|
combined_catalogue,
|
|
|
|
1,
|
|
|
|
sizeof(struct zip_record) * ZIP_RECORDS_CHECK_BLOCKSIZE * (total_files_found + 1),
|
|
|
|
status = CL_EMEM);
|
2025-02-26 21:12:58 -05:00
|
|
|
|
|
|
|
// *num_records is the number of already found files
|
2025-03-22 22:41:29 -04:00
|
|
|
// local_file_headers_count is the number of new files found
|
2025-02-26 21:12:58 -05:00
|
|
|
// total_files_found is the sum of both of the above
|
|
|
|
uint32_t temp_catalogue_offset = 0;
|
2025-03-22 22:41:29 -04:00
|
|
|
uint32_t catalogue_offset = 0;
|
2025-02-26 21:12:58 -05:00
|
|
|
|
2025-03-22 22:41:29 -04:00
|
|
|
for (i = 0; i < total_files_found; i++) {
|
|
|
|
// Conditions in which we add from temp_catalogue: it is the only one left OR
|
|
|
|
if (catalogue_offset >= *num_records ||
|
|
|
|
(temp_catalogue_offset < local_file_headers_count &&
|
|
|
|
temp_catalogue[temp_catalogue_offset].local_header_offset < (*catalogue)[catalogue_offset].local_header_offset)) {
|
2025-02-26 21:12:58 -05:00
|
|
|
// add entry from temp_catalogue into the list
|
|
|
|
combined_catalogue[i] = temp_catalogue[temp_catalogue_offset];
|
|
|
|
temp_catalogue_offset++;
|
|
|
|
} else {
|
|
|
|
// add entry from the catalogue into the list
|
|
|
|
combined_catalogue[i] = (*catalogue)[catalogue_offset];
|
|
|
|
catalogue_offset++;
|
|
|
|
}
|
2025-03-22 22:41:29 -04:00
|
|
|
|
2025-02-26 21:12:58 -05:00
|
|
|
/*
|
|
|
|
* Detect overlapping files.
|
|
|
|
*/
|
|
|
|
if (i > 0) {
|
|
|
|
prev_record = &(combined_catalogue[i - 1]);
|
|
|
|
curr_record = &(combined_catalogue[i]);
|
|
|
|
|
|
|
|
uint32_t prev_record_size = prev_record->local_header_size + prev_record->compressed_size;
|
|
|
|
uint32_t curr_record_size = curr_record->local_header_size + curr_record->compressed_size;
|
|
|
|
uint32_t prev_record_end;
|
|
|
|
uint32_t curr_record_end;
|
|
|
|
|
|
|
|
/* Check for integer overflow in 32bit size & offset values */
|
|
|
|
if ((UINT32_MAX - prev_record_size < prev_record->local_header_offset) ||
|
|
|
|
(UINT32_MAX - curr_record_size < curr_record->local_header_offset)) {
|
|
|
|
cli_dbgmsg("cli_unzip: Integer overflow detected; invalid data sizes in zip file headers.\n");
|
|
|
|
status = CL_EFORMAT;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
prev_record_end = prev_record->local_header_offset + prev_record_size;
|
|
|
|
curr_record_end = curr_record->local_header_offset + curr_record_size;
|
|
|
|
|
|
|
|
if (((curr_record->local_header_offset >= prev_record->local_header_offset) && (curr_record->local_header_offset + ZIP_RECORD_OVERLAP_FUDGE_FACTOR < prev_record_end)) ||
|
|
|
|
((prev_record->local_header_offset >= curr_record->local_header_offset) && (prev_record->local_header_offset + ZIP_RECORD_OVERLAP_FUDGE_FACTOR < curr_record_end))) {
|
|
|
|
/* Overlapping file detected */
|
|
|
|
num_overlapping_files++;
|
|
|
|
|
|
|
|
if ((curr_record->local_header_offset == prev_record->local_header_offset) &&
|
|
|
|
(curr_record->local_header_size == prev_record->local_header_size) &&
|
|
|
|
(curr_record->compressed_size == prev_record->compressed_size)) {
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
cli_dbgmsg("cli_unzip: Ignoring duplicate file entry at offset: 0x%x.\n", curr_record->local_header_offset);
|
2025-02-26 21:12:58 -05:00
|
|
|
} else {
|
|
|
|
cli_dbgmsg("cli_unzip: Overlapping files detected.\n");
|
|
|
|
cli_dbgmsg(" previous file end: %u\n", prev_record_end);
|
|
|
|
cli_dbgmsg(" current file start: %u\n", curr_record->local_header_offset);
|
|
|
|
|
|
|
|
if (ZIP_MAX_NUM_OVERLAPPING_FILES < num_overlapping_files) {
|
2025-10-02 11:46:14 -04:00
|
|
|
status = CL_EFORMAT;
|
2025-02-26 21:12:58 -05:00
|
|
|
if (SCAN_HEURISTICS) {
|
2025-10-02 11:46:14 -04:00
|
|
|
ret = cli_append_potentially_unwanted(ctx, "Heuristics.Zip.OverlappingFiles");
|
|
|
|
if (CL_SUCCESS != ret) {
|
|
|
|
status = ret;
|
|
|
|
}
|
2025-02-26 21:12:58 -05:00
|
|
|
}
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (cli_checktimelimit(ctx) != CL_SUCCESS) {
|
|
|
|
cli_dbgmsg("cli_unzip: Time limit reached (max: %u)\n", ctx->engine->maxscantime);
|
|
|
|
status = CL_ETIMEOUT;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
free(temp_catalogue);
|
|
|
|
temp_catalogue = NULL;
|
2025-10-02 11:46:14 -04:00
|
|
|
|
2025-02-26 21:12:58 -05:00
|
|
|
free(*catalogue);
|
2025-10-02 11:46:14 -04:00
|
|
|
*catalogue = combined_catalogue;
|
|
|
|
combined_catalogue = NULL;
|
|
|
|
|
2025-02-26 21:12:58 -05:00
|
|
|
*num_records = total_files_found;
|
|
|
|
} else {
|
|
|
|
free(temp_catalogue);
|
|
|
|
temp_catalogue = NULL;
|
|
|
|
}
|
|
|
|
|
2025-03-22 22:41:29 -04:00
|
|
|
status = CL_SUCCESS;
|
2025-02-26 21:12:58 -05:00
|
|
|
|
|
|
|
done:
|
|
|
|
if (CL_SUCCESS != status) {
|
|
|
|
if (NULL != *catalogue) {
|
|
|
|
size_t i;
|
|
|
|
for (i = 0; i < (total_files_found - local_file_headers_count); i++) {
|
|
|
|
if (NULL != (*catalogue)[i].original_filename) {
|
|
|
|
free((*catalogue)[i].original_filename);
|
|
|
|
(*catalogue)[i].original_filename = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
free(*catalogue);
|
|
|
|
*catalogue = NULL;
|
|
|
|
}
|
2025-10-02 11:46:14 -04:00
|
|
|
}
|
2025-02-26 21:12:58 -05:00
|
|
|
|
2025-10-02 11:46:14 -04:00
|
|
|
if (NULL != temp_catalogue) {
|
|
|
|
size_t i;
|
|
|
|
for (i = 0; i < local_file_headers_count; i++) {
|
|
|
|
if (NULL != temp_catalogue[i].original_filename) {
|
|
|
|
free(temp_catalogue[i].original_filename);
|
|
|
|
temp_catalogue[i].original_filename = NULL;
|
2025-02-26 21:12:58 -05:00
|
|
|
}
|
|
|
|
}
|
2025-10-02 11:46:14 -04:00
|
|
|
free(temp_catalogue);
|
|
|
|
temp_catalogue = NULL;
|
|
|
|
}
|
2025-02-26 21:12:58 -05:00
|
|
|
|
2025-10-02 11:46:14 -04:00
|
|
|
if (NULL != combined_catalogue) {
|
|
|
|
size_t i;
|
|
|
|
for (i = 0; i < total_files_found; i++) {
|
|
|
|
if (NULL != combined_catalogue[i].original_filename) {
|
|
|
|
free(combined_catalogue[i].original_filename);
|
|
|
|
combined_catalogue[i].original_filename = NULL;
|
2025-02-26 21:12:58 -05:00
|
|
|
}
|
|
|
|
}
|
2025-10-02 11:46:14 -04:00
|
|
|
free(combined_catalogue);
|
|
|
|
combined_catalogue = NULL;
|
2025-02-26 21:12:58 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
return status;
|
|
|
|
}
|
|
|
|
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
/**
|
|
|
|
* @brief Find the central directory header in a zip file.
|
|
|
|
*
|
|
|
|
* Find the central directory header, first by finding the End Of Central Directory header.
|
|
|
|
*
|
|
|
|
* The End Of Central Directory header is located at the end of the zip file and contains the offset of the central
|
|
|
|
* directory and ends with a variable length comment.
|
|
|
|
* We'll start searching for the magic bytes SIZEOF_END_OF_CENTRAL bytes from the end of the file, and work our way
|
|
|
|
* backwards until we find the End Of Central Directory header magic bytes.
|
|
|
|
*
|
|
|
|
* @param map The file map
|
|
|
|
* @param fsize The file size
|
|
|
|
* @param[out] coff The central directory offset
|
|
|
|
* @return cl_error_t
|
|
|
|
*/
|
|
|
|
static cl_error_t find_central_directory_header(
|
|
|
|
fmap_t *map,
|
|
|
|
uint32_t fsize,
|
|
|
|
uint32_t *coff)
|
|
|
|
{
|
|
|
|
cl_error_t status = CL_ERROR;
|
|
|
|
uint32_t eocoff = 0;
|
|
|
|
|
|
|
|
cli_dbgmsg("find_central_directory_header: Searching for End Of Central Directory header...\n");
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Find the End Of Central Directory header.
|
|
|
|
*/
|
|
|
|
for (eocoff = fsize - SIZEOF_END_OF_CENTRAL; eocoff > 0; eocoff--) {
|
|
|
|
const char *eocptr = fmap_need_off_once(
|
|
|
|
map,
|
|
|
|
eocoff,
|
|
|
|
SIZEOF_END_OF_CENTRAL - 2 /* -2 because don't need to read the comment length */);
|
|
|
|
if (!eocptr) {
|
|
|
|
// Failed to get a pointer within the file at that offset and size.
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (cli_readint32(eocptr) == ZIP_MAGIC_CENTRAL_DIRECTORY_RECORD_END) {
|
|
|
|
// Found the End Of Central Directory header.
|
|
|
|
// Use it to find the central directory offset.
|
|
|
|
cli_dbgmsg("find_central_directory_header: Found End Of Central Directory header at offset: 0x%x. "
|
|
|
|
"Searching for Central Directory header...\n",
|
|
|
|
eocoff);
|
|
|
|
|
|
|
|
// The offset for the Central Directory header is stored at offset 16 in the End Of Central Directory header.
|
|
|
|
uint32_t maybe_coff = cli_readint32(&eocptr[16]);
|
|
|
|
|
|
|
|
if (!CLI_ISCONTAINED_0_TO(fsize, maybe_coff, SIZEOF_CENTRAL_HEADER)) {
|
|
|
|
// The alleged central directory offset + size of the header is not within the file size.
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Found it.
|
|
|
|
cli_dbgmsg("find_central_directory_header: Found Central Directory header at offset: 0x%x\n", maybe_coff);
|
|
|
|
*coff = maybe_coff;
|
|
|
|
status = CL_SUCCESS;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (CL_SUCCESS != status) {
|
|
|
|
cli_dbgmsg("find_central_directory_header: Central directory header not found.\n");
|
|
|
|
status = CL_EPARSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
return status;
|
|
|
|
}
|
|
|
|
|
2019-07-11 20:20:48 -04:00
|
|
|
cl_error_t cli_unzip(cli_ctx *ctx)
|
2018-12-03 12:40:13 -05:00
|
|
|
{
|
2025-03-22 22:41:29 -04:00
|
|
|
cl_error_t status = CL_ERROR;
|
|
|
|
cl_error_t ret;
|
|
|
|
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
size_t num_files_unzipped = 0;
|
|
|
|
uint32_t fsize;
|
|
|
|
uint32_t coff = 0;
|
|
|
|
|
libclamav: Fix scan recursion tracking
Scan recursion is the process of identifying files embedded in other
files and then scanning them, recursively.
Internally this process is more complex than it may sound because a file
may have multiple layers of types before finding a new "file".
At present we treat the recursion count in the scanning context as an
index into both our fmap list AND our container list. These two lists
are conceptually a part of the same thing and should be unified.
But what's concerning is that the "recursion level" isn't actually
incremented or decremented at the same time that we add a layer to the
fmap or container lists but instead is more touchy-feely, increasing
when we find a new "file".
To account for this shadiness, the size of the fmap and container lists
has always been a little longer than our "max scan recursion" limit so
we don't accidentally overflow the fmap or container arrays (!).
I've implemented a single recursion-stack as an array, similar to before,
which includes a pointer to each fmap at each layer, along with the size
and type. Push and pop functions add and remove layers whenever a new
fmap is added. A boolean argument when pushing indicates if the new layer
represents a new buffer or new file (descriptor). A new buffer will reset
the "nested fmap level" (described below).
This commit also provides a solution for an issue where we detect
embedded files more than once during scan recursion.
For illustration, imagine a tarball named foo.tar.gz with this structure:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| └── baz.exe | PE | 2 | 1 |
But suppose baz.exe embeds a ZIP archive and a 7Z archive, like this:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| baz.exe | PE | 0 | 0 |
| ├── sfx.zip | ZIP | 1 | 1 |
| │  └── hello.txt | ASCII | 2 | 0 |
| └── sfx.7z | 7Z | 1 | 1 |
|   └── world.txt | ASCII | 2 | 0 |
(A) If we scan for embedded files at any layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| ├── foo.tar | TAR | 1 | 0 |
| │ ├── bar.zip | ZIP | 2 | 1 |
| │ │  └── hola.txt | ASCII | 3 | 0 |
| │ ├── baz.exe | PE | 2 | 1 |
| │ │ ├── sfx.zip | ZIP | 3 | 1 |
| │ │ │  └── hello.txt | ASCII | 4 | 0 |
| │ │ └── sfx.7z | 7Z | 3 | 1 |
| │ │   └── world.txt | ASCII | 4 | 0 |
| │ ├── sfx.zip | ZIP | 2 | 1 |
| │ │  └── hello.txt | ASCII | 3 | 0 |
| │ └── sfx.7z | 7Z | 2 | 1 |
| │  └── world.txt | ASCII | 3 | 0 |
| ├── sfx.zip | ZIP | 1 | 1 |
| └── sfx.7z | 7Z | 1 | 1 |
(A) is bad because it scans content more than once.
Note that for the GZ layer, it may detect the ZIP and 7Z if the
signature hits on the compressed data, which it might, though
extracting the ZIP and 7Z will likely fail.
The reason the above doesn't happen now is that we restrict embedded
type scans for a bunch of archive formats to include GZ and TAR.
(B) If we scan for embedded files at the foo.tar layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| ├── baz.exe | PE | 2 | 1 |
| ├── sfx.zip | ZIP | 2 | 1 |
| │  └── hello.txt | ASCII | 3 | 0 |
| └── sfx.7z | 7Z | 2 | 1 |
|   └── world.txt | ASCII | 3 | 0 |
(B) is almost right. But we can achieve it easily enough only scanning for
embedded content in the current fmap when the "nested fmap level" is 0.
The upside is that it should safely detect all embedded content, even if
it may think the sfz.zip and sfx.7z are in foo.tar instead of in baz.exe.
The biggest risk I can think of affects ZIPs. SFXZIP detection
is identical to ZIP detection, which is why we don't allow SFXZIP to be
detected if insize of a ZIP. If we only allow embedded type scanning at
fmap-layer 0 in each buffer, this will fail to detect the embedded ZIP
if the bar.exe was not compressed in foo.zip and if non-compressed files
extracted from ZIPs aren't extracted as new buffers:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.zip | ZIP | 0 | 0 |
| └── bar.exe | PE | 1 | 1 |
| └── sfx.zip | ZIP | 2 | 2 |
Provided that we ensure all files extracted from zips are scanned in
new buffers, option (B) should be safe.
(C) If we scan for embedded files at the baz.exe layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| └── baz.exe | PE | 2 | 1 |
| ├── sfx.zip | ZIP | 3 | 1 |
| │  └── hello.txt | ASCII | 4 | 0 |
| └── sfx.7z | 7Z | 3 | 1 |
|   └── world.txt | ASCII | 4 | 0 |
(C) is right. But it's harder to achieve. For this example we can get it by
restricting 7ZSFX and ZIPSFX detection only when scanning an executable.
But that may mean losing detection of archives embedded elsewhere.
And we'd have to identify allowable container types for each possible
embedded type, which would be very difficult.
So this commit aims to solve the issue the (B)-way.
Note that in all situations, we still have to scan with file typing
enabled to determine if we need to reassign the current file type, such
as re-identifying a Bzip2 archive as a DMG that happens to be Bzip2-
compressed. Detection of DMG and a handful of other types rely on
finding data partway through or near the ned of a file before
reassigning the entire file as the new type.
Other fixes and considerations in this commit:
- The utf16 HTML parser has weak error handling, particularly with respect
to creating a nested fmap for scanning the ascii decoded file.
This commit cleans up the error handling and wraps the nested scan with
the recursion-stack push()/pop() for correct recursion tracking.
Before this commit, each container layer had a flag to indicate if the
container layer is valid.
We need something similar so that the cli_recursion_stack_get_*()
functions ignore normalized layers. Details...
Imagine an LDB signature for HTML content that specifies a ZIP
container. If the signature actually alerts on the normalized HTML and
you don't ignore normalized layers for the container check, it will
appear as though the alert is in an HTML container rather than a ZIP
container.
This commit accomplishes this with a boolean you set in the scan context
before scanning a new layer. Then when the new fmap is created, it will
use that flag to set similar flag for the layer. The context flag is
reset those that anything after this doesn't have that flag.
The flag allows the new recursion_stack_get() function to ignore
normalized layers when iterating the stack to return a layer at a
requested index, negative or positive.
Scanning normalized extracted/normalized javascript and VBA should also
use the 'layer is normalized' flag.
- This commit also fixes Heuristic.Broken.Executable alert for ELF files
to make sure that:
A) these only alert if cli_append_virus() returns CL_VIRUS (aka it
respects the FP check).
B) all broken-executable alerts for ELF only happen if the
SCAN_HEURISTIC_BROKEN option is enabled.
- This commit also cleans up the error handling in cli_magic_scan_dir().
This was needed so we could correctly apply the layer-is-normalized-flag
to all VBA macros extracted to a directory when scanning the directory.
- Also fix an issue where exceeding scan maximums wouldn't cause embedded
file detection scans to abort. Granted we don't actually want to abort
if max filesize or max recursion depth are exceeded... only if max
scansize, max files, and max scantime are exceeded.
Add 'abort_scan' flag to scan context, to protect against depending on
correct error propagation for fatal conditions. Instead, setting this
flag in the scan context should guarantee that a fatal condition deep in
scan recursion isn't lost which result in more stuff being scanned
instead of aborting. This shouldn't be necessary, but some status codes
like CL_ETIMEOUT never used to be fatal and it's easier to do this than
to verify every parser only returns CL_ETIMEOUT and other "fatal
status codes" in fatal conditions.
- Remove duplicate is_tar() prototype from filestypes.c and include
is_tar.h instead.
- Presently we create the fmap hash when creating the fmap.
This wastes a bit of CPU if the hash is never needed.
Now that we're creating fmap's for all embedded files discovered with
file type recognition scans, this is a much more frequent occurence and
really slows things down.
This commit fixes the issue by only creating fmap hashes as needed.
This should not only resolve the perfomance impact of creating fmap's
for all embedded files, but also should improve performance in general.
- Add allmatch check to the zip parser after the central-header meta
match. That way we don't multiple alerts with the same match except in
allmatch mode. Clean up error handling in the zip parser a tiny bit.
- Fixes to ensure that the scan limits such as scansize, filesize,
recursion depth, # of embedded files, and scantime are always reported
if AlertExceedsMax (--alert-exceeds-max) is enabled.
- Fixed an issue where non-fatal alerts for exceeding scan maximums may
mask signature matches later on. I changed it so these alerts use the
"possibly unwanted" alert-type and thus only alert if no other alerts
were found or if all-match or heuristic-precedence are enabled.
- Added the "Heuristics.Limits.Exceeded.*" events to the JSON metadata
when the --gen-json feature is enabled. These will show up once under
"ParseErrors" the first time a limit is exceeded. In the present
implementation, only one limits-exceeded events will be added, so as to
prevent a malicious or malformed sample from filling the JSON buffer
with millions of events and using a tonne of RAM.
2021-09-11 14:15:21 -07:00
|
|
|
fmap_t *map = ctx->fmap;
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
|
|
|
|
char *tmpd = NULL;
|
|
|
|
|
2024-03-25 13:01:46 -04:00
|
|
|
int toval = 0;
|
2019-09-08 23:17:43 -04:00
|
|
|
struct zip_record *zip_catalogue = NULL;
|
|
|
|
size_t records_count = 0;
|
|
|
|
size_t i;
|
2007-12-13 19:47:07 +00:00
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
cli_dbgmsg("in cli_unzip\n");
|
|
|
|
fsize = (uint32_t)map->len;
|
|
|
|
if (sizeof(off_t) != sizeof(uint32_t) && (size_t)fsize != map->len) {
|
|
|
|
cli_dbgmsg("cli_unzip: file too big\n");
|
2025-03-22 22:41:29 -04:00
|
|
|
status = CL_SUCCESS;
|
2019-08-30 14:48:57 -04:00
|
|
|
goto done;
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
2019-07-11 20:20:48 -04:00
|
|
|
if (fsize < SIZEOF_CENTRAL_HEADER) {
|
2018-12-03 12:40:13 -05:00
|
|
|
cli_dbgmsg("cli_unzip: file too short\n");
|
2025-03-22 22:41:29 -04:00
|
|
|
status = CL_SUCCESS;
|
2019-08-30 14:48:57 -04:00
|
|
|
goto done;
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
|
|
|
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
/*
|
|
|
|
* Find the central directory header
|
|
|
|
*/
|
|
|
|
ret = find_central_directory_header(
|
|
|
|
map,
|
|
|
|
fsize,
|
|
|
|
&coff);
|
|
|
|
if (CL_SUCCESS == ret) {
|
|
|
|
cli_dbgmsg("cli_unzip: central directory header offset: 0x%x\n", coff);
|
2019-08-30 14:48:57 -04:00
|
|
|
|
2019-09-08 23:17:43 -04:00
|
|
|
/*
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
* Index the central directory.
|
2019-09-08 23:17:43 -04:00
|
|
|
*/
|
|
|
|
ret = index_the_central_directory(
|
|
|
|
ctx,
|
|
|
|
coff,
|
|
|
|
&zip_catalogue,
|
|
|
|
&records_count);
|
2025-02-26 21:12:58 -05:00
|
|
|
if (CL_SUCCESS != ret) {
|
2025-03-22 22:41:29 -04:00
|
|
|
cli_dbgmsg("index_central_dir_failed, must rely purely on local file headers\n");
|
2025-02-26 21:12:58 -05:00
|
|
|
|
2025-03-22 22:41:29 -04:00
|
|
|
CLI_CALLOC_OR_GOTO_DONE(
|
|
|
|
zip_catalogue,
|
|
|
|
1,
|
|
|
|
sizeof(struct zip_record) * ZIP_RECORDS_CHECK_BLOCKSIZE,
|
|
|
|
status = CL_EMEM);
|
|
|
|
|
|
|
|
records_count = 0;
|
2019-08-30 14:48:57 -04:00
|
|
|
}
|
2025-03-22 22:41:29 -04:00
|
|
|
} else {
|
|
|
|
cli_dbgmsg("cli_unzip: central directory header not found, must rely purely on local file headers\n");
|
2019-08-30 14:48:57 -04:00
|
|
|
|
2025-03-22 22:41:29 -04:00
|
|
|
CLI_CALLOC_OR_GOTO_DONE(
|
|
|
|
zip_catalogue,
|
|
|
|
1,
|
|
|
|
sizeof(struct zip_record) * ZIP_RECORDS_CHECK_BLOCKSIZE,
|
|
|
|
status = CL_EMEM);
|
2019-09-08 23:17:43 -04:00
|
|
|
|
2025-03-22 22:41:29 -04:00
|
|
|
records_count = 0;
|
|
|
|
}
|
2019-09-08 23:17:43 -04:00
|
|
|
|
2025-03-22 22:41:29 -04:00
|
|
|
/*
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
* Add local file headers not referenced by the central directory.
|
2025-03-22 22:41:29 -04:00
|
|
|
*/
|
|
|
|
ret = index_local_file_headers(
|
|
|
|
ctx,
|
|
|
|
map,
|
|
|
|
fsize,
|
|
|
|
&zip_catalogue,
|
|
|
|
&records_count);
|
|
|
|
if (CL_SUCCESS != ret) {
|
|
|
|
cli_dbgmsg("index_local_file_headers_failed\n");
|
|
|
|
status = ret;
|
|
|
|
goto done;
|
|
|
|
}
|
2019-09-08 23:17:43 -04:00
|
|
|
|
2025-03-22 22:41:29 -04:00
|
|
|
/*
|
|
|
|
* Then decrypt/unzip & scan each unique file entry.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < records_count; i++) {
|
|
|
|
const uint8_t *compressed_data = NULL;
|
2019-09-08 23:17:43 -04:00
|
|
|
|
2025-03-22 22:41:29 -04:00
|
|
|
if ((i > 0) &&
|
|
|
|
(zip_catalogue[i].local_header_offset == zip_catalogue[i - 1].local_header_offset) &&
|
|
|
|
(zip_catalogue[i].local_header_size == zip_catalogue[i - 1].local_header_size) &&
|
|
|
|
(zip_catalogue[i].compressed_size == zip_catalogue[i - 1].compressed_size)) {
|
libclamav: Fix scan recursion tracking
Scan recursion is the process of identifying files embedded in other
files and then scanning them, recursively.
Internally this process is more complex than it may sound because a file
may have multiple layers of types before finding a new "file".
At present we treat the recursion count in the scanning context as an
index into both our fmap list AND our container list. These two lists
are conceptually a part of the same thing and should be unified.
But what's concerning is that the "recursion level" isn't actually
incremented or decremented at the same time that we add a layer to the
fmap or container lists but instead is more touchy-feely, increasing
when we find a new "file".
To account for this shadiness, the size of the fmap and container lists
has always been a little longer than our "max scan recursion" limit so
we don't accidentally overflow the fmap or container arrays (!).
I've implemented a single recursion-stack as an array, similar to before,
which includes a pointer to each fmap at each layer, along with the size
and type. Push and pop functions add and remove layers whenever a new
fmap is added. A boolean argument when pushing indicates if the new layer
represents a new buffer or new file (descriptor). A new buffer will reset
the "nested fmap level" (described below).
This commit also provides a solution for an issue where we detect
embedded files more than once during scan recursion.
For illustration, imagine a tarball named foo.tar.gz with this structure:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| └── baz.exe | PE | 2 | 1 |
But suppose baz.exe embeds a ZIP archive and a 7Z archive, like this:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| baz.exe | PE | 0 | 0 |
| ├── sfx.zip | ZIP | 1 | 1 |
| │  └── hello.txt | ASCII | 2 | 0 |
| └── sfx.7z | 7Z | 1 | 1 |
|   └── world.txt | ASCII | 2 | 0 |
(A) If we scan for embedded files at any layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| ├── foo.tar | TAR | 1 | 0 |
| │ ├── bar.zip | ZIP | 2 | 1 |
| │ │  └── hola.txt | ASCII | 3 | 0 |
| │ ├── baz.exe | PE | 2 | 1 |
| │ │ ├── sfx.zip | ZIP | 3 | 1 |
| │ │ │  └── hello.txt | ASCII | 4 | 0 |
| │ │ └── sfx.7z | 7Z | 3 | 1 |
| │ │   └── world.txt | ASCII | 4 | 0 |
| │ ├── sfx.zip | ZIP | 2 | 1 |
| │ │  └── hello.txt | ASCII | 3 | 0 |
| │ └── sfx.7z | 7Z | 2 | 1 |
| │  └── world.txt | ASCII | 3 | 0 |
| ├── sfx.zip | ZIP | 1 | 1 |
| └── sfx.7z | 7Z | 1 | 1 |
(A) is bad because it scans content more than once.
Note that for the GZ layer, it may detect the ZIP and 7Z if the
signature hits on the compressed data, which it might, though
extracting the ZIP and 7Z will likely fail.
The reason the above doesn't happen now is that we restrict embedded
type scans for a bunch of archive formats to include GZ and TAR.
(B) If we scan for embedded files at the foo.tar layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| ├── baz.exe | PE | 2 | 1 |
| ├── sfx.zip | ZIP | 2 | 1 |
| │  └── hello.txt | ASCII | 3 | 0 |
| └── sfx.7z | 7Z | 2 | 1 |
|   └── world.txt | ASCII | 3 | 0 |
(B) is almost right. But we can achieve it easily enough only scanning for
embedded content in the current fmap when the "nested fmap level" is 0.
The upside is that it should safely detect all embedded content, even if
it may think the sfz.zip and sfx.7z are in foo.tar instead of in baz.exe.
The biggest risk I can think of affects ZIPs. SFXZIP detection
is identical to ZIP detection, which is why we don't allow SFXZIP to be
detected if insize of a ZIP. If we only allow embedded type scanning at
fmap-layer 0 in each buffer, this will fail to detect the embedded ZIP
if the bar.exe was not compressed in foo.zip and if non-compressed files
extracted from ZIPs aren't extracted as new buffers:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.zip | ZIP | 0 | 0 |
| └── bar.exe | PE | 1 | 1 |
| └── sfx.zip | ZIP | 2 | 2 |
Provided that we ensure all files extracted from zips are scanned in
new buffers, option (B) should be safe.
(C) If we scan for embedded files at the baz.exe layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| └── baz.exe | PE | 2 | 1 |
| ├── sfx.zip | ZIP | 3 | 1 |
| │  └── hello.txt | ASCII | 4 | 0 |
| └── sfx.7z | 7Z | 3 | 1 |
|   └── world.txt | ASCII | 4 | 0 |
(C) is right. But it's harder to achieve. For this example we can get it by
restricting 7ZSFX and ZIPSFX detection only when scanning an executable.
But that may mean losing detection of archives embedded elsewhere.
And we'd have to identify allowable container types for each possible
embedded type, which would be very difficult.
So this commit aims to solve the issue the (B)-way.
Note that in all situations, we still have to scan with file typing
enabled to determine if we need to reassign the current file type, such
as re-identifying a Bzip2 archive as a DMG that happens to be Bzip2-
compressed. Detection of DMG and a handful of other types rely on
finding data partway through or near the ned of a file before
reassigning the entire file as the new type.
Other fixes and considerations in this commit:
- The utf16 HTML parser has weak error handling, particularly with respect
to creating a nested fmap for scanning the ascii decoded file.
This commit cleans up the error handling and wraps the nested scan with
the recursion-stack push()/pop() for correct recursion tracking.
Before this commit, each container layer had a flag to indicate if the
container layer is valid.
We need something similar so that the cli_recursion_stack_get_*()
functions ignore normalized layers. Details...
Imagine an LDB signature for HTML content that specifies a ZIP
container. If the signature actually alerts on the normalized HTML and
you don't ignore normalized layers for the container check, it will
appear as though the alert is in an HTML container rather than a ZIP
container.
This commit accomplishes this with a boolean you set in the scan context
before scanning a new layer. Then when the new fmap is created, it will
use that flag to set similar flag for the layer. The context flag is
reset those that anything after this doesn't have that flag.
The flag allows the new recursion_stack_get() function to ignore
normalized layers when iterating the stack to return a layer at a
requested index, negative or positive.
Scanning normalized extracted/normalized javascript and VBA should also
use the 'layer is normalized' flag.
- This commit also fixes Heuristic.Broken.Executable alert for ELF files
to make sure that:
A) these only alert if cli_append_virus() returns CL_VIRUS (aka it
respects the FP check).
B) all broken-executable alerts for ELF only happen if the
SCAN_HEURISTIC_BROKEN option is enabled.
- This commit also cleans up the error handling in cli_magic_scan_dir().
This was needed so we could correctly apply the layer-is-normalized-flag
to all VBA macros extracted to a directory when scanning the directory.
- Also fix an issue where exceeding scan maximums wouldn't cause embedded
file detection scans to abort. Granted we don't actually want to abort
if max filesize or max recursion depth are exceeded... only if max
scansize, max files, and max scantime are exceeded.
Add 'abort_scan' flag to scan context, to protect against depending on
correct error propagation for fatal conditions. Instead, setting this
flag in the scan context should guarantee that a fatal condition deep in
scan recursion isn't lost which result in more stuff being scanned
instead of aborting. This shouldn't be necessary, but some status codes
like CL_ETIMEOUT never used to be fatal and it's easier to do this than
to verify every parser only returns CL_ETIMEOUT and other "fatal
status codes" in fatal conditions.
- Remove duplicate is_tar() prototype from filestypes.c and include
is_tar.h instead.
- Presently we create the fmap hash when creating the fmap.
This wastes a bit of CPU if the hash is never needed.
Now that we're creating fmap's for all embedded files discovered with
file type recognition scans, this is a much more frequent occurence and
really slows things down.
This commit fixes the issue by only creating fmap hashes as needed.
This should not only resolve the perfomance impact of creating fmap's
for all embedded files, but also should improve performance in general.
- Add allmatch check to the zip parser after the central-header meta
match. That way we don't multiple alerts with the same match except in
allmatch mode. Clean up error handling in the zip parser a tiny bit.
- Fixes to ensure that the scan limits such as scansize, filesize,
recursion depth, # of embedded files, and scantime are always reported
if AlertExceedsMax (--alert-exceeds-max) is enabled.
- Fixed an issue where non-fatal alerts for exceeding scan maximums may
mask signature matches later on. I changed it so these alerts use the
"possibly unwanted" alert-type and thus only alert if no other alerts
were found or if all-match or heuristic-precedence are enabled.
- Added the "Heuristics.Limits.Exceeded.*" events to the JSON metadata
when the --gen-json feature is enabled. These will show up once under
"ParseErrors" the first time a limit is exceeded. In the present
implementation, only one limits-exceeded events will be added, so as to
prevent a malicious or malformed sample from filling the JSON buffer
with millions of events and using a tonne of RAM.
2021-09-11 14:15:21 -07:00
|
|
|
|
2025-03-22 22:41:29 -04:00
|
|
|
/* Duplicate file entry, skip. */
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
cli_dbgmsg("cli_unzip: Skipping unzipping of duplicate file entry at offset: 0x%x\n", zip_catalogue[i].local_header_offset);
|
2025-03-22 22:41:29 -04:00
|
|
|
continue;
|
|
|
|
}
|
2019-07-11 22:14:39 -04:00
|
|
|
|
2025-03-22 22:41:29 -04:00
|
|
|
// Get a pointer to the compressed data, is just after the local header.
|
|
|
|
compressed_data = fmap_need_off(
|
|
|
|
map,
|
|
|
|
zip_catalogue[i].local_header_offset + zip_catalogue[i].local_header_size,
|
|
|
|
zip_catalogue[i].compressed_size);
|
|
|
|
|
|
|
|
if (zip_catalogue[i].encrypted) {
|
|
|
|
if (fmap_need_ptr_once(map, compressed_data, zip_catalogue[i].compressed_size)) {
|
|
|
|
status = zdecrypt(
|
|
|
|
compressed_data,
|
|
|
|
zip_catalogue[i].compressed_size,
|
|
|
|
zip_catalogue[i].uncompressed_size,
|
|
|
|
fmap_need_off(map, zip_catalogue[i].local_header_offset, SIZEOF_LOCAL_HEADER),
|
|
|
|
&num_files_unzipped,
|
|
|
|
ctx,
|
|
|
|
tmpd,
|
|
|
|
zip_scan_cb,
|
|
|
|
zip_catalogue[i].original_filename);
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
} else {
|
|
|
|
// If we cannot get a pointer to the compressed data, we cannot decrypt it.
|
|
|
|
// Skip this file.
|
|
|
|
cli_dbgmsg("cli_unzip: Skipping decryption of file entry at offset: 0x%x, size: %u, compressed data not available\n",
|
|
|
|
zip_catalogue[i].local_header_offset,
|
|
|
|
zip_catalogue[i].compressed_size);
|
2019-08-16 17:18:59 -07:00
|
|
|
}
|
2025-03-22 22:41:29 -04:00
|
|
|
} else {
|
|
|
|
if (fmap_need_ptr_once(map, compressed_data, zip_catalogue[i].compressed_size)) {
|
|
|
|
status = unz(
|
|
|
|
compressed_data,
|
|
|
|
zip_catalogue[i].compressed_size,
|
|
|
|
zip_catalogue[i].uncompressed_size,
|
|
|
|
zip_catalogue[i].method,
|
|
|
|
zip_catalogue[i].flags,
|
|
|
|
&num_files_unzipped,
|
|
|
|
ctx,
|
|
|
|
tmpd,
|
|
|
|
zip_scan_cb,
|
|
|
|
zip_catalogue[i].original_filename,
|
|
|
|
false);
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
} else {
|
|
|
|
// If we cannot get a pointer to the compressed data, we cannot decompress it.
|
|
|
|
// Skip this file.
|
|
|
|
cli_dbgmsg("cli_unzip: Skipping decompression of file entry at offset: 0x%x, size: %u, compressed data not available\n",
|
|
|
|
zip_catalogue[i].local_header_offset,
|
|
|
|
zip_catalogue[i].compressed_size);
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
2025-03-22 22:41:29 -04:00
|
|
|
}
|
2024-03-25 13:01:46 -04:00
|
|
|
|
2025-03-22 22:41:29 -04:00
|
|
|
if (ctx->engine->maxfiles && num_files_unzipped >= ctx->engine->maxfiles) {
|
|
|
|
// Note: this check piggybacks on the MaxFiles setting, but is not actually
|
|
|
|
// scanning these files or incrementing the ctx->scannedfiles count
|
|
|
|
// This check is also redundant. zip_scan_cb == cli_magic_scan_desc,
|
|
|
|
// so we will also check and update the limits for the actual number of scanned
|
|
|
|
// files inside cli_magic_scan()
|
|
|
|
cli_dbgmsg("cli_unzip: Files limit reached (max: %u)\n", ctx->engine->maxfiles);
|
|
|
|
cli_append_potentially_unwanted_if_heur_exceedsmax(ctx, "Heuristics.Limits.Exceeded.MaxFiles");
|
|
|
|
status = CL_EMAXFILES;
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
goto done;
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
2019-08-30 14:48:57 -04:00
|
|
|
|
2025-03-22 22:41:29 -04:00
|
|
|
if (cli_checktimelimit(ctx) != CL_SUCCESS) {
|
|
|
|
cli_dbgmsg("cli_unzip: Time limit reached (max: %u)\n", ctx->engine->maxscantime);
|
|
|
|
status = CL_ETIMEOUT;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (cli_json_timeout_cycle_check(ctx, &toval) != CL_SUCCESS) {
|
|
|
|
status = CL_ETIMEOUT;
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
goto done;
|
2025-03-22 22:41:29 -04:00
|
|
|
}
|
|
|
|
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
if (ctx->abort_scan) {
|
|
|
|
// The scan was aborted, stop processing files.
|
|
|
|
// This also takes into account CL_VIRUS status (to abort on detection when not in allmatch mode).
|
2025-03-22 22:41:29 -04:00
|
|
|
break;
|
|
|
|
}
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
|
|
|
|
// Continue to the next file entry even if the current one failed.
|
2019-08-30 14:48:57 -04:00
|
|
|
}
|
2022-08-27 10:30:51 -07:00
|
|
|
|
2019-08-30 14:48:57 -04:00
|
|
|
done:
|
2019-09-08 23:17:43 -04:00
|
|
|
|
|
|
|
if (NULL != zip_catalogue) {
|
2020-03-19 21:23:54 -04:00
|
|
|
/* Clean up zip record resources */
|
|
|
|
for (i = 0; i < records_count; i++) {
|
|
|
|
if (NULL != zip_catalogue[i].original_filename) {
|
|
|
|
free(zip_catalogue[i].original_filename);
|
|
|
|
zip_catalogue[i].original_filename = NULL;
|
|
|
|
}
|
|
|
|
}
|
2019-09-08 23:17:43 -04:00
|
|
|
free(zip_catalogue);
|
|
|
|
zip_catalogue = NULL;
|
|
|
|
}
|
|
|
|
|
2019-08-30 14:48:57 -04:00
|
|
|
if (NULL != tmpd) {
|
|
|
|
if (!ctx->engine->keeptmp) {
|
|
|
|
cli_rmdirs(tmpd);
|
|
|
|
}
|
|
|
|
free(tmpd);
|
|
|
|
}
|
2007-12-13 19:47:07 +00:00
|
|
|
|
2025-03-22 22:41:29 -04:00
|
|
|
return status;
|
2007-12-13 19:47:07 +00:00
|
|
|
}
|
|
|
|
|
2025-08-14 21:17:46 -04:00
|
|
|
cl_error_t unzip_single_internal(cli_ctx *ctx, size_t local_header_offset, zip_cb zcb)
|
2014-04-28 12:45:51 -04:00
|
|
|
{
|
2025-03-22 22:41:29 -04:00
|
|
|
cl_error_t ret = CL_SUCCESS;
|
2019-07-11 20:20:48 -04:00
|
|
|
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
size_t num_files_unzipped = 0;
|
2018-12-03 12:40:13 -05:00
|
|
|
|
|
|
|
cli_dbgmsg("in cli_unzip_single\n");
|
2019-07-11 20:20:48 -04:00
|
|
|
|
2025-08-14 21:17:46 -04:00
|
|
|
if (NULL == ctx || NULL == ctx->fmap) {
|
|
|
|
cli_dbgmsg("cli_unzip_single: Invalid NULL arguments\n");
|
|
|
|
return CL_ENULLARG;
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
2025-08-14 21:17:46 -04:00
|
|
|
|
|
|
|
if (local_header_offset + SIZEOF_LOCAL_HEADER > ctx->fmap->len) {
|
2018-12-03 12:40:13 -05:00
|
|
|
cli_dbgmsg("cli_unzip: file too short\n");
|
2025-03-22 22:41:29 -04:00
|
|
|
return CL_SUCCESS;
|
2018-12-03 12:40:13 -05:00
|
|
|
}
|
|
|
|
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
ret = parse_local_file_header(
|
2025-08-14 21:17:46 -04:00
|
|
|
ctx,
|
2025-03-22 22:41:29 -04:00
|
|
|
local_header_offset,
|
|
|
|
&num_files_unzipped,
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
0, /* file_count */
|
|
|
|
NULL, /* central_header*/
|
|
|
|
NULL, /* tmpd */
|
|
|
|
0, /* detect_encrypted */
|
2025-03-22 22:41:29 -04:00
|
|
|
zcb,
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
NULL, /* record */
|
|
|
|
NULL); /* file_record_size */
|
2018-12-03 12:40:13 -05:00
|
|
|
|
|
|
|
return ret;
|
2007-12-13 19:47:07 +00:00
|
|
|
}
|
2014-04-28 12:45:51 -04:00
|
|
|
|
2025-08-14 21:17:46 -04:00
|
|
|
cl_error_t cli_unzip_single(cli_ctx *ctx, size_t local_header_offset)
|
2018-12-03 12:40:13 -05:00
|
|
|
{
|
2019-07-11 20:20:48 -04:00
|
|
|
return unzip_single_internal(ctx, local_header_offset, zip_scan_cb);
|
2014-04-28 12:45:51 -04:00
|
|
|
}
|
|
|
|
|
2019-07-11 20:20:48 -04:00
|
|
|
cl_error_t unzip_search_add(struct zip_requests *requests, const char *name, size_t nlen)
|
2014-10-14 17:15:18 -04:00
|
|
|
{
|
|
|
|
cli_dbgmsg("in unzip_search_add\n");
|
|
|
|
|
|
|
|
if (requests->namecnt >= MAX_ZIP_REQUESTS) {
|
|
|
|
cli_dbgmsg("DEBUGGING MESSAGE GOES HERE!\n");
|
|
|
|
return CL_BREAK;
|
|
|
|
}
|
|
|
|
|
|
|
|
cli_dbgmsg("unzip_search_add: adding %s (len %llu)\n", name, (long long unsigned)nlen);
|
|
|
|
|
2018-12-03 12:40:13 -05:00
|
|
|
requests->names[requests->namecnt] = name;
|
2014-10-14 17:15:18 -04:00
|
|
|
requests->namelens[requests->namecnt] = nlen;
|
|
|
|
requests->namecnt++;
|
|
|
|
|
|
|
|
return CL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
2025-08-14 21:17:46 -04:00
|
|
|
cl_error_t unzip_search(cli_ctx *ctx, struct zip_requests *requests)
|
2014-04-28 12:45:51 -04:00
|
|
|
{
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
cl_error_t status = CL_ERROR;
|
|
|
|
cl_error_t ret;
|
|
|
|
size_t file_count = 0;
|
2025-08-14 21:17:46 -04:00
|
|
|
uint32_t coff = 0;
|
|
|
|
uint32_t toval = 0;
|
2024-03-25 13:01:46 -04:00
|
|
|
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
size_t file_record_size = 0;
|
|
|
|
|
2014-04-28 12:45:51 -04:00
|
|
|
cli_dbgmsg("in unzip_search\n");
|
2014-10-14 17:15:18 -04:00
|
|
|
|
2025-08-14 21:17:46 -04:00
|
|
|
if (NULL == ctx || NULL == ctx->fmap) {
|
2014-04-28 12:45:51 -04:00
|
|
|
return CL_ENULLARG;
|
|
|
|
}
|
|
|
|
|
2025-08-14 21:17:46 -04:00
|
|
|
if (ctx->fmap->len < SIZEOF_CENTRAL_HEADER) {
|
2014-04-28 12:45:51 -04:00
|
|
|
cli_dbgmsg("unzip_search: file too short\n");
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
status = CL_SUCCESS;
|
|
|
|
goto done;
|
2014-04-28 12:45:51 -04:00
|
|
|
}
|
|
|
|
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
/*
|
|
|
|
* Find the central directory header
|
|
|
|
*/
|
|
|
|
ret = find_central_directory_header(
|
2025-08-14 21:17:46 -04:00
|
|
|
ctx->fmap,
|
|
|
|
ctx->fmap->len,
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
&coff);
|
|
|
|
if (CL_SUCCESS == ret) {
|
|
|
|
uint32_t central_file_header_offset = coff;
|
|
|
|
cli_dbgmsg("unzip_search: central directory header offset: 0x%x\n", central_file_header_offset);
|
|
|
|
do {
|
|
|
|
ret = parse_central_directory_file_header(
|
2025-08-14 21:17:46 -04:00
|
|
|
ctx,
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
central_file_header_offset,
|
|
|
|
NULL, /* num_files_unzipped */
|
|
|
|
file_count + 1,
|
|
|
|
NULL, /* tmpd */
|
|
|
|
requests,
|
|
|
|
NULL, /* record */
|
|
|
|
&file_record_size);
|
2014-04-28 12:45:51 -04:00
|
|
|
|
2014-10-14 17:15:18 -04:00
|
|
|
if (requests->match) {
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
// Found a match.
|
|
|
|
status = CL_VIRUS;
|
|
|
|
goto done;
|
2014-04-28 12:45:51 -04:00
|
|
|
}
|
|
|
|
|
2019-07-11 20:20:48 -04:00
|
|
|
file_count++;
|
|
|
|
if (ctx && ctx->engine->maxfiles && file_count >= ctx->engine->maxfiles) {
|
libclamav: Fix scan recursion tracking
Scan recursion is the process of identifying files embedded in other
files and then scanning them, recursively.
Internally this process is more complex than it may sound because a file
may have multiple layers of types before finding a new "file".
At present we treat the recursion count in the scanning context as an
index into both our fmap list AND our container list. These two lists
are conceptually a part of the same thing and should be unified.
But what's concerning is that the "recursion level" isn't actually
incremented or decremented at the same time that we add a layer to the
fmap or container lists but instead is more touchy-feely, increasing
when we find a new "file".
To account for this shadiness, the size of the fmap and container lists
has always been a little longer than our "max scan recursion" limit so
we don't accidentally overflow the fmap or container arrays (!).
I've implemented a single recursion-stack as an array, similar to before,
which includes a pointer to each fmap at each layer, along with the size
and type. Push and pop functions add and remove layers whenever a new
fmap is added. A boolean argument when pushing indicates if the new layer
represents a new buffer or new file (descriptor). A new buffer will reset
the "nested fmap level" (described below).
This commit also provides a solution for an issue where we detect
embedded files more than once during scan recursion.
For illustration, imagine a tarball named foo.tar.gz with this structure:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| └── baz.exe | PE | 2 | 1 |
But suppose baz.exe embeds a ZIP archive and a 7Z archive, like this:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| baz.exe | PE | 0 | 0 |
| ├── sfx.zip | ZIP | 1 | 1 |
| │  └── hello.txt | ASCII | 2 | 0 |
| └── sfx.7z | 7Z | 1 | 1 |
|   └── world.txt | ASCII | 2 | 0 |
(A) If we scan for embedded files at any layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| ├── foo.tar | TAR | 1 | 0 |
| │ ├── bar.zip | ZIP | 2 | 1 |
| │ │  └── hola.txt | ASCII | 3 | 0 |
| │ ├── baz.exe | PE | 2 | 1 |
| │ │ ├── sfx.zip | ZIP | 3 | 1 |
| │ │ │  └── hello.txt | ASCII | 4 | 0 |
| │ │ └── sfx.7z | 7Z | 3 | 1 |
| │ │   └── world.txt | ASCII | 4 | 0 |
| │ ├── sfx.zip | ZIP | 2 | 1 |
| │ │  └── hello.txt | ASCII | 3 | 0 |
| │ └── sfx.7z | 7Z | 2 | 1 |
| │  └── world.txt | ASCII | 3 | 0 |
| ├── sfx.zip | ZIP | 1 | 1 |
| └── sfx.7z | 7Z | 1 | 1 |
(A) is bad because it scans content more than once.
Note that for the GZ layer, it may detect the ZIP and 7Z if the
signature hits on the compressed data, which it might, though
extracting the ZIP and 7Z will likely fail.
The reason the above doesn't happen now is that we restrict embedded
type scans for a bunch of archive formats to include GZ and TAR.
(B) If we scan for embedded files at the foo.tar layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| ├── baz.exe | PE | 2 | 1 |
| ├── sfx.zip | ZIP | 2 | 1 |
| │  └── hello.txt | ASCII | 3 | 0 |
| └── sfx.7z | 7Z | 2 | 1 |
|   └── world.txt | ASCII | 3 | 0 |
(B) is almost right. But we can achieve it easily enough only scanning for
embedded content in the current fmap when the "nested fmap level" is 0.
The upside is that it should safely detect all embedded content, even if
it may think the sfz.zip and sfx.7z are in foo.tar instead of in baz.exe.
The biggest risk I can think of affects ZIPs. SFXZIP detection
is identical to ZIP detection, which is why we don't allow SFXZIP to be
detected if insize of a ZIP. If we only allow embedded type scanning at
fmap-layer 0 in each buffer, this will fail to detect the embedded ZIP
if the bar.exe was not compressed in foo.zip and if non-compressed files
extracted from ZIPs aren't extracted as new buffers:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.zip | ZIP | 0 | 0 |
| └── bar.exe | PE | 1 | 1 |
| └── sfx.zip | ZIP | 2 | 2 |
Provided that we ensure all files extracted from zips are scanned in
new buffers, option (B) should be safe.
(C) If we scan for embedded files at the baz.exe layer, we may detect:
| description | type | rec level | nested fmap level |
| ------------------------- | ----- | --------- | ----------------- |
| foo.tar.gz | GZ | 0 | 0 |
| └── foo.tar | TAR | 1 | 0 |
| ├── bar.zip | ZIP | 2 | 1 |
| │  └── hola.txt | ASCII | 3 | 0 |
| └── baz.exe | PE | 2 | 1 |
| ├── sfx.zip | ZIP | 3 | 1 |
| │  └── hello.txt | ASCII | 4 | 0 |
| └── sfx.7z | 7Z | 3 | 1 |
|   └── world.txt | ASCII | 4 | 0 |
(C) is right. But it's harder to achieve. For this example we can get it by
restricting 7ZSFX and ZIPSFX detection only when scanning an executable.
But that may mean losing detection of archives embedded elsewhere.
And we'd have to identify allowable container types for each possible
embedded type, which would be very difficult.
So this commit aims to solve the issue the (B)-way.
Note that in all situations, we still have to scan with file typing
enabled to determine if we need to reassign the current file type, such
as re-identifying a Bzip2 archive as a DMG that happens to be Bzip2-
compressed. Detection of DMG and a handful of other types rely on
finding data partway through or near the ned of a file before
reassigning the entire file as the new type.
Other fixes and considerations in this commit:
- The utf16 HTML parser has weak error handling, particularly with respect
to creating a nested fmap for scanning the ascii decoded file.
This commit cleans up the error handling and wraps the nested scan with
the recursion-stack push()/pop() for correct recursion tracking.
Before this commit, each container layer had a flag to indicate if the
container layer is valid.
We need something similar so that the cli_recursion_stack_get_*()
functions ignore normalized layers. Details...
Imagine an LDB signature for HTML content that specifies a ZIP
container. If the signature actually alerts on the normalized HTML and
you don't ignore normalized layers for the container check, it will
appear as though the alert is in an HTML container rather than a ZIP
container.
This commit accomplishes this with a boolean you set in the scan context
before scanning a new layer. Then when the new fmap is created, it will
use that flag to set similar flag for the layer. The context flag is
reset those that anything after this doesn't have that flag.
The flag allows the new recursion_stack_get() function to ignore
normalized layers when iterating the stack to return a layer at a
requested index, negative or positive.
Scanning normalized extracted/normalized javascript and VBA should also
use the 'layer is normalized' flag.
- This commit also fixes Heuristic.Broken.Executable alert for ELF files
to make sure that:
A) these only alert if cli_append_virus() returns CL_VIRUS (aka it
respects the FP check).
B) all broken-executable alerts for ELF only happen if the
SCAN_HEURISTIC_BROKEN option is enabled.
- This commit also cleans up the error handling in cli_magic_scan_dir().
This was needed so we could correctly apply the layer-is-normalized-flag
to all VBA macros extracted to a directory when scanning the directory.
- Also fix an issue where exceeding scan maximums wouldn't cause embedded
file detection scans to abort. Granted we don't actually want to abort
if max filesize or max recursion depth are exceeded... only if max
scansize, max files, and max scantime are exceeded.
Add 'abort_scan' flag to scan context, to protect against depending on
correct error propagation for fatal conditions. Instead, setting this
flag in the scan context should guarantee that a fatal condition deep in
scan recursion isn't lost which result in more stuff being scanned
instead of aborting. This shouldn't be necessary, but some status codes
like CL_ETIMEOUT never used to be fatal and it's easier to do this than
to verify every parser only returns CL_ETIMEOUT and other "fatal
status codes" in fatal conditions.
- Remove duplicate is_tar() prototype from filestypes.c and include
is_tar.h instead.
- Presently we create the fmap hash when creating the fmap.
This wastes a bit of CPU if the hash is never needed.
Now that we're creating fmap's for all embedded files discovered with
file type recognition scans, this is a much more frequent occurence and
really slows things down.
This commit fixes the issue by only creating fmap hashes as needed.
This should not only resolve the perfomance impact of creating fmap's
for all embedded files, but also should improve performance in general.
- Add allmatch check to the zip parser after the central-header meta
match. That way we don't multiple alerts with the same match except in
allmatch mode. Clean up error handling in the zip parser a tiny bit.
- Fixes to ensure that the scan limits such as scansize, filesize,
recursion depth, # of embedded files, and scantime are always reported
if AlertExceedsMax (--alert-exceeds-max) is enabled.
- Fixed an issue where non-fatal alerts for exceeding scan maximums may
mask signature matches later on. I changed it so these alerts use the
"possibly unwanted" alert-type and thus only alert if no other alerts
were found or if all-match or heuristic-precedence are enabled.
- Added the "Heuristics.Limits.Exceeded.*" events to the JSON metadata
when the --gen-json feature is enabled. These will show up once under
"ParseErrors" the first time a limit is exceeded. In the present
implementation, only one limits-exceeded events will be added, so as to
prevent a malicious or malformed sample from filling the JSON buffer
with millions of events and using a tonne of RAM.
2021-09-11 14:15:21 -07:00
|
|
|
// Note: this check piggybacks on the MaxFiles setting, but is not actually
|
|
|
|
// scanning these files or incrementing the ctx->scannedfiles count
|
2014-04-28 12:45:51 -04:00
|
|
|
cli_dbgmsg("cli_unzip: Files limit reached (max: %u)\n", ctx->engine->maxfiles);
|
2022-08-19 16:21:42 -07:00
|
|
|
cli_append_potentially_unwanted_if_heur_exceedsmax(ctx, "Heuristics.Limits.Exceeded.MaxFiles");
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
status = CL_EMAXFILES;
|
|
|
|
goto done;
|
2014-04-28 12:45:51 -04:00
|
|
|
}
|
2024-03-25 13:01:46 -04:00
|
|
|
|
2014-10-14 17:15:18 -04:00
|
|
|
if (ctx && cli_json_timeout_cycle_check(ctx, (int *)(&toval)) != CL_SUCCESS) {
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
status = CL_ETIMEOUT;
|
|
|
|
goto done;
|
2014-06-25 13:08:18 -04:00
|
|
|
}
|
2025-10-09 17:40:14 -04:00
|
|
|
|
|
|
|
// Increment to the next central file header.
|
|
|
|
central_file_header_offset += file_record_size;
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
} while ((ret == CL_SUCCESS) && (file_record_size > 0));
|
2014-04-28 12:45:51 -04:00
|
|
|
} else {
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
cli_dbgmsg("unzip_search: Cannot locate central directory. unzip_search failed.\n");
|
|
|
|
status = CL_EPARSE;
|
|
|
|
goto done;
|
2014-04-28 12:45:51 -04:00
|
|
|
}
|
|
|
|
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
done:
|
|
|
|
return status;
|
2014-04-28 12:45:51 -04:00
|
|
|
}
|
|
|
|
|
2019-07-11 20:20:48 -04:00
|
|
|
cl_error_t unzip_search_single(cli_ctx *ctx, const char *name, size_t nlen, uint32_t *loff)
|
2014-10-14 17:15:18 -04:00
|
|
|
{
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
cl_error_t status = CL_ERROR;
|
|
|
|
struct zip_requests requests = {0};
|
2014-10-14 17:15:18 -04:00
|
|
|
|
|
|
|
cli_dbgmsg("in unzip_search_single\n");
|
|
|
|
if (!ctx) {
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
status = CL_ENULLARG;
|
|
|
|
goto done;
|
2014-10-14 17:15:18 -04:00
|
|
|
}
|
|
|
|
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
// Add the file name to the requests.
|
|
|
|
status = unzip_search_add(&requests, name, nlen);
|
|
|
|
if (CL_SUCCESS != status) {
|
|
|
|
cli_dbgmsg("unzip_search_single: Failed to add file name to requests\n");
|
|
|
|
goto done;
|
2014-10-14 17:15:18 -04:00
|
|
|
}
|
|
|
|
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
// Search for the zip file entry in the current layer.
|
2025-08-14 21:17:46 -04:00
|
|
|
status = unzip_search(ctx, &requests);
|
2025-10-09 21:27:18 -04:00
|
|
|
if (CL_VIRUS == status) {
|
2014-10-14 17:15:18 -04:00
|
|
|
*loff = requests.loff;
|
|
|
|
}
|
|
|
|
|
ZIP: Fix infinite loop + significant code cleanup
An infinite loop may occur when scanning some malformed ZIP files.
I introduced this issue in 96c00b6d80a4cb16cb2d39111614733e4a62221d
with this line:
```c
// decrement coff by 1 to account for the increment at the end of the loop
coff -= 1;
```
The problem is that the function may return 0, which should
indicate that there are no more files. The result was that
`coff` would stay the same and the loop would repeat.
This issue is in 1.5 development and affects the 1.5.0 beta but
does not affect any production versions.
Fixes: https://github.com/Cisco-Talos/clamav/issues/1534
Special thanks to Sophie0x2E for an initial fix, proposed in
https://github.com/Cisco-Talos/clamav/pull/1539
In review, I was uncomfortable with other existing code and
decided to to a more significant overhaul of the error handling
in the ZIP module.
In addition to cleanup, this commit has some functional changes:
- When parsing a central directory file header inside of
`parse_central_directory_file_header()`, it will now fail out if the
"extra length" or "comment length" fields would exceced the length of
the archive. That doesn't mean the associated local file header won't
be parsed later, but it won't use the central directory file header
to find it. Instead, the ZIP module will have to find the local file
header by searching for extra records not listed in the central directory.
This change was mostly to tidy up complex error handling.
- Add two FTM new signatures to identify split ZIP archives.
This signature identifies the first segment (first file) in a split or
spanned ZIP archive. It may also be found on a single-segment "split"
archive, depending on the ZIP archiver.
```
0:0:504b0708504b0304:ZIP (First segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Practically speaking, this new signature makes it so ClamAV identifies
the file as a ZIP right away without having to rely on SFX_ZIP detection.
Extraction is then handled by the ZIP `cli_unzip` function rather than
extracting each with `cli_unzip_single` which handles SFX_ZIP entries.
Note: ClamAV isn't capable of finding additional files on disk to support
handling the additional segments. So it doesn't make any difference with
handling those other files.
This signature is for single-segment split/spanned archives, depending
on the ZIP archiver.
```
0:0:504b0303504b0304:ZIP (Single-segment split/spanned):CL_TYPE_ANY:CL_TYPE_ZIP
```
Like the first one, this also means we won't rely on SFX_ZIP detection
and will treat this files as regular ZIPs.
- Added a test file to verify that ClamAV can extract a single-file
"split" ZIP.
- Added a clamscan test with test files to verify that scanning a split
archive across two segments correctly extracts the properly formed zip
file entries. Sadly, we can't join the segments to extract everything.
2025-08-04 22:50:48 -04:00
|
|
|
done:
|
|
|
|
return status;
|
2014-10-14 17:15:18 -04:00
|
|
|
}
|