clamav/libclamav/jpeg.c

706 lines
28 KiB
C
Raw Normal View History

2011-04-05 16:33:38 +02:00
/*
2024-01-12 17:03:59 -05:00
* Copyright (C) 2013-2024 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
* Copyright (C) 2011-2013 Sourcefire, Inc.
2011-04-05 16:33:38 +02:00
*
* Authors: Tomasz Kojm <tkojm@clamav.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/stat.h>
#ifdef HAVE_UNISTD_H
2011-04-05 16:33:38 +02:00
#include <unistd.h>
#endif
#include <stdbool.h>
2011-04-05 16:33:38 +02:00
#include <time.h>
#include "jpeg.h"
#include "clamav.h"
#include "scanners.h"
2011-04-05 16:33:38 +02:00
// clang-format off
/*
* JPEG format highlights
* ----------------------
*
* Links:
* - https://en.wikipedia.org/wiki/JPEG#Syntax_and_structure
* - https://en.wikipedia.org/wiki/JPEG_File_Interchange_Format
* - https://en.wikipedia.org/wiki/Exif
*
* A JPEG image is a sequence of segments.
*
* Each segment starts with a two-byte marker. The first byte is 0xff and is
* followed by one of the following to identify the segment.
* Some segments are simply the 2-byte marker, while others have a payload.
* Realistically it appears that just the start-of-image and end-of-image lack
* the 2-byte size field, the rest have it, even the 4-byte DRI segment.
*
* All variable-byte payloads have 2-bytes indicating the size which includes
* the 2-bytes (but not the marker itself).
*
* Within entropy-encoded (compressed) data, any 0xff will have an 0x00
* inserted after it to indicate that it's just and 0xff and _NOT_ a segment
* marker. Decoders skip the 0x00 byte.
* This only applies to entropy-encoded data, not to marker payload data.
2023-11-26 15:01:19 -08:00
* We don't really worry about this though because this parser stops when it
* reaches the image data.
*/
/*
* JPEG Segment & Entropy Markers.
*/
typedef enum {
/* Start of Image
* No payload
*/
JPEG_MARKER_SEGMENT_SOI_START_OF_IMAGE = 0xD8,
/* Start of Frame for a Baseline DCT-based JPEG (S0F0)
* Variable size payload.
* Baseline DCT-based JPEG, and specifies the width, height, number of
* components, and component subsampling
*/
JPEG_MARKER_SEGMENT_S0F0_START_OF_FRAME_BASELINE_DCT = 0xC0,
2023-11-26 15:01:19 -08:00
/* Start of Frame for an extended sequential DCT-based JPEG (S0F1)
* Variable size payload.
*/
JPEG_MARKER_SEGMENT_S0F1_START_OF_FRAME_EXT_SEQ_DCT = 0xC1,
/* Start of Frame for a progressive DCT-based JPEG (S0F2)
* Variable size payload.
* Progressive DCT-based JPEG, and specifies the width, height, number of
* components, and component subsampling
*/
JPEG_MARKER_SEGMENT_S0F2_START_OF_FRAME_PROG_DCT = 0xC2,
/* Start of Frame for a lossless sequential DCT-based JPEG (S0F3)
* Variable size payload.
*/
JPEG_MARKER_SEGMENT_S0F3_START_OF_FRAME_DIFF_SEQ_DCT = 0xC3,
/* Start of Frame for a differential sequential DCT-based JPEG (S0F5)
* Variable size payload.
*/
JPEG_MARKER_SEGMENT_S0F5_START_OF_FRAME_DIFF_SEQ_DCT = 0xC5,
/* Start of Frame for a differential progressive DCT-based JPEG (S0F6)
* Variable size payload.
*/
JPEG_MARKER_SEGMENT_S0F6_START_OF_FRAME_DIFF_PROG_DCT = 0xC6,
/* Start of Frame for a differential lossless DCT-based JPEG (S0F7)
* Variable size payload.
*/
JPEG_MARKER_SEGMENT_S0F7_START_OF_FRAME_DIFF_LOSSLESS_DCT = 0xC7,
2023-11-26 15:01:19 -08:00
/* Start of Frame for a differential sequential arithmetic-based JPEG (S0F5)
* Variable size payload.
*/
JPEG_MARKER_SEGMENT_S0F9_START_OF_FRAME_DIFF_SEQ_ARITH = 0xC9,
2023-11-26 15:01:19 -08:00
/* Start of Frame for a differential progressive arithmetic-based JPEG (S0F6)
* Variable size payload.
*/
JPEG_MARKER_SEGMENT_S0F10_START_OF_FRAME_DIFF_PROG_ARITH = 0xCA,
2023-11-26 15:01:19 -08:00
/* Start of Frame for a differential lossless arithmetic-based JPEG (S0F7)
* Variable size payload.
*/
JPEG_MARKER_SEGMENT_S0F11_START_OF_FRAME_DIFF_LOSSLESS_ARITH = 0xCB,
/* Define Huffman Tables (DHT)
* Variable size payload.
* Defines one or more Huffman tables.
*/
JPEG_MARKER_SEGMENT_DHT_DEFINE_HUFFMAN_TABLES = 0xC4,
2023-11-26 15:01:19 -08:00
/* Define Arithmetic Coding Conditioning (DAC)
* Variable size payload.
*/
JPEG_MARKER_SEGMENT_DHT_DEFINE_ARITH_CODING = 0xCC,
/* Define Quantization Tables (DTQ)
* Variable size payload.
* Defines one or more quantization tables.
*/
JPEG_MARKER_SEGMENT_DQT_DEFINE_QUANTIZATION_TABLES = 0xDB,
/* Define Restart Interval (DRI)
* 4-byte payload.
* Specifies the interval between RSTn markers, in Minimum Coded Units (MCUs).
* This marker is followed by two bytes indicating the fixed size so it can be
* treated like any other variable size segment.
*/
JPEG_MARKER_SEGMENT_DRI_DEFINE_RESTART_INTERVAL = 0xDD,
/* Start of Scan (SOS)
* Variable size payload
* This is the start of the JPEG image data, so we'll actually stop parsing
* when we reach this.
*/
JPEG_MARKER_SEGMENT_SOS_START_OF_SCAN = 0xDA,
/*
* App-specific markers E0 - EF
* Variable size payload.
* Since several vendors might use the *same* APPn marker type, application-
* specific markers often begin with a standard or vendor name (e.g., "Exif" or
* "Adobe") or some other identifying string.
*
* Some known app specific markers include:
* 0xE0:
* - JFIF
* 0xE1:
* - Exif
* - XMP data, starts with http://ns.adobe.com/xap/1.0/\0
* 0xE2:
* - ICC Profile Chunk. There could be multiple of these to fit the entire profile, see http://www.color.org/icc_specs2.xalter and http://www.color.org/specification/ICC1v43_2010-12.pdf Section B.4
* 0xE8:
* - SPIFF. Not a common format, see http://fileformats.archiveteam.org/wiki/SPIFF
* 0xED:
* - IPTC / IMM metadata (a type of comment)
* - Photoshop data
* 0xEE:
* - AdobeRGB (as opposed to sRGB)
*/
JPEG_MARKER_SEGMENT_APP0 = 0xE0,
JPEG_MARKER_SEGMENT_APP1 = 0xE1,
JPEG_MARKER_SEGMENT_APP2 = 0xE2,
JPEG_MARKER_SEGMENT_APP3 = 0xE3,
JPEG_MARKER_SEGMENT_APP4 = 0xE4,
JPEG_MARKER_SEGMENT_APP5 = 0xE5,
JPEG_MARKER_SEGMENT_APP6 = 0xE6,
JPEG_MARKER_SEGMENT_APP7 = 0xE7,
JPEG_MARKER_SEGMENT_APP8 = 0xE8,
JPEG_MARKER_SEGMENT_APP9 = 0xE9,
JPEG_MARKER_SEGMENT_APP10 = 0xEA,
JPEG_MARKER_SEGMENT_APP11 = 0xEB,
JPEG_MARKER_SEGMENT_APP12 = 0xEC,
JPEG_MARKER_SEGMENT_APP13 = 0xED,
JPEG_MARKER_SEGMENT_APP14 = 0xEE,
JPEG_MARKER_SEGMENT_APP15 = 0xEF,
/* DTI (?)
*
*/
JPEG_MARKER_SEGMENT_DTI = 0xF1,
/* DTT (?)
*
*/
JPEG_MARKER_SEGMENT_DTT = 0xF2,
/* JPG7
* Variable size payload (?)
*/
JPEG_MARKER_SEGMENT_JPG7 = 0xF7,
/* Comment (COM)
* Variable size payload.
*/
JPEG_MARKER_SEGMENT_COM_COMMENT = 0xFE,
/* End of Image
* No payload
*/
JPEG_MARKER_SEGMENT_EOI_END_OF_IMAGE = 0xD9,
/* Entropy-encoded (aka compressed) data markers.
*
* These aren't referenced since we don't parse the image data.
*/
JPEG_MARKER_NOT_A_MARKER_0x00 = 0x00,
JPEG_MARKER_NOT_A_MARKER_0xFF = 0xFF,
/* Reset entropy-markers are inserted every r macroblocks, where r is the restart interval set by a DRI marker.
* Not used if there was no DRI segment-marker.
* The low three bits of the marker code cycle in value from 0 to 7 (i.e. D0 - D7).
*/
JPEG_MARKER_ENTROPY_RST0_RESET = 0xD0,
JPEG_MARKER_ENTROPY_RST1_RESET = 0xD1,
JPEG_MARKER_ENTROPY_RST2_RESET = 0xD2,
JPEG_MARKER_ENTROPY_RST3_RESET = 0xD3,
JPEG_MARKER_ENTROPY_RST4_RESET = 0xD4,
JPEG_MARKER_ENTROPY_RST5_RESET = 0xD5,
JPEG_MARKER_ENTROPY_RST6_RESET = 0xD6,
JPEG_MARKER_ENTROPY_RST7_RESET = 0xD7,
} jpeg_marker_t;
// clang-format on
2021-01-23 16:41:41 -08:00
static cl_error_t jpeg_check_photoshop_8bim(cli_ctx *ctx, size_t *off)
{
cl_error_t retval;
const unsigned char *buf;
uint16_t ntmp;
uint8_t nlength, id[2];
uint32_t size;
2021-01-23 16:41:41 -08:00
size_t offset = *off;
libclamav: Fix scan recursion tracking Scan recursion is the process of identifying files embedded in other files and then scanning them, recursively. Internally this process is more complex than it may sound because a file may have multiple layers of types before finding a new "file". At present we treat the recursion count in the scanning context as an index into both our fmap list AND our container list. These two lists are conceptually a part of the same thing and should be unified. But what's concerning is that the "recursion level" isn't actually incremented or decremented at the same time that we add a layer to the fmap or container lists but instead is more touchy-feely, increasing when we find a new "file". To account for this shadiness, the size of the fmap and container lists has always been a little longer than our "max scan recursion" limit so we don't accidentally overflow the fmap or container arrays (!). I've implemented a single recursion-stack as an array, similar to before, which includes a pointer to each fmap at each layer, along with the size and type. Push and pop functions add and remove layers whenever a new fmap is added. A boolean argument when pushing indicates if the new layer represents a new buffer or new file (descriptor). A new buffer will reset the "nested fmap level" (described below). This commit also provides a solution for an issue where we detect embedded files more than once during scan recursion. For illustration, imagine a tarball named foo.tar.gz with this structure: | description | type | rec level | nested fmap level | | ------------------------- | ----- | --------- | ----------------- | | foo.tar.gz | GZ | 0 | 0 | | └── foo.tar | TAR | 1 | 0 | | ├── bar.zip | ZIP | 2 | 1 | | │   └── hola.txt | ASCII | 3 | 0 | | └── baz.exe | PE | 2 | 1 | But suppose baz.exe embeds a ZIP archive and a 7Z archive, like this: | description | type | rec level | nested fmap level | | ------------------------- | ----- | --------- | ----------------- | | baz.exe | PE | 0 | 0 | | ├── sfx.zip | ZIP | 1 | 1 | | │   └── hello.txt | ASCII | 2 | 0 | | └── sfx.7z | 7Z | 1 | 1 | |    └── world.txt | ASCII | 2 | 0 | (A) If we scan for embedded files at any layer, we may detect: | description | type | rec level | nested fmap level | | ------------------------- | ----- | --------- | ----------------- | | foo.tar.gz | GZ | 0 | 0 | | ├── foo.tar | TAR | 1 | 0 | | │ ├── bar.zip | ZIP | 2 | 1 | | │ │   └── hola.txt | ASCII | 3 | 0 | | │ ├── baz.exe | PE | 2 | 1 | | │ │ ├── sfx.zip | ZIP | 3 | 1 | | │ │ │   └── hello.txt | ASCII | 4 | 0 | | │ │ └── sfx.7z | 7Z | 3 | 1 | | │ │    └── world.txt | ASCII | 4 | 0 | | │ ├── sfx.zip | ZIP | 2 | 1 | | │ │   └── hello.txt | ASCII | 3 | 0 | | │ └── sfx.7z | 7Z | 2 | 1 | | │   └── world.txt | ASCII | 3 | 0 | | ├── sfx.zip | ZIP | 1 | 1 | | └── sfx.7z | 7Z | 1 | 1 | (A) is bad because it scans content more than once. Note that for the GZ layer, it may detect the ZIP and 7Z if the signature hits on the compressed data, which it might, though extracting the ZIP and 7Z will likely fail. The reason the above doesn't happen now is that we restrict embedded type scans for a bunch of archive formats to include GZ and TAR. (B) If we scan for embedded files at the foo.tar layer, we may detect: | description | type | rec level | nested fmap level | | ------------------------- | ----- | --------- | ----------------- | | foo.tar.gz | GZ | 0 | 0 | | └── foo.tar | TAR | 1 | 0 | | ├── bar.zip | ZIP | 2 | 1 | | │   └── hola.txt | ASCII | 3 | 0 | | ├── baz.exe | PE | 2 | 1 | | ├── sfx.zip | ZIP | 2 | 1 | | │   └── hello.txt | ASCII | 3 | 0 | | └── sfx.7z | 7Z | 2 | 1 | |    └── world.txt | ASCII | 3 | 0 | (B) is almost right. But we can achieve it easily enough only scanning for embedded content in the current fmap when the "nested fmap level" is 0. The upside is that it should safely detect all embedded content, even if it may think the sfz.zip and sfx.7z are in foo.tar instead of in baz.exe. The biggest risk I can think of affects ZIPs. SFXZIP detection is identical to ZIP detection, which is why we don't allow SFXZIP to be detected if insize of a ZIP. If we only allow embedded type scanning at fmap-layer 0 in each buffer, this will fail to detect the embedded ZIP if the bar.exe was not compressed in foo.zip and if non-compressed files extracted from ZIPs aren't extracted as new buffers: | description | type | rec level | nested fmap level | | ------------------------- | ----- | --------- | ----------------- | | foo.zip | ZIP | 0 | 0 | | └── bar.exe | PE | 1 | 1 | | └── sfx.zip | ZIP | 2 | 2 | Provided that we ensure all files extracted from zips are scanned in new buffers, option (B) should be safe. (C) If we scan for embedded files at the baz.exe layer, we may detect: | description | type | rec level | nested fmap level | | ------------------------- | ----- | --------- | ----------------- | | foo.tar.gz | GZ | 0 | 0 | | └── foo.tar | TAR | 1 | 0 | | ├── bar.zip | ZIP | 2 | 1 | | │   └── hola.txt | ASCII | 3 | 0 | | └── baz.exe | PE | 2 | 1 | | ├── sfx.zip | ZIP | 3 | 1 | | │   └── hello.txt | ASCII | 4 | 0 | | └── sfx.7z | 7Z | 3 | 1 | |    └── world.txt | ASCII | 4 | 0 | (C) is right. But it's harder to achieve. For this example we can get it by restricting 7ZSFX and ZIPSFX detection only when scanning an executable. But that may mean losing detection of archives embedded elsewhere. And we'd have to identify allowable container types for each possible embedded type, which would be very difficult. So this commit aims to solve the issue the (B)-way. Note that in all situations, we still have to scan with file typing enabled to determine if we need to reassign the current file type, such as re-identifying a Bzip2 archive as a DMG that happens to be Bzip2- compressed. Detection of DMG and a handful of other types rely on finding data partway through or near the ned of a file before reassigning the entire file as the new type. Other fixes and considerations in this commit: - The utf16 HTML parser has weak error handling, particularly with respect to creating a nested fmap for scanning the ascii decoded file. This commit cleans up the error handling and wraps the nested scan with the recursion-stack push()/pop() for correct recursion tracking. Before this commit, each container layer had a flag to indicate if the container layer is valid. We need something similar so that the cli_recursion_stack_get_*() functions ignore normalized layers. Details... Imagine an LDB signature for HTML content that specifies a ZIP container. If the signature actually alerts on the normalized HTML and you don't ignore normalized layers for the container check, it will appear as though the alert is in an HTML container rather than a ZIP container. This commit accomplishes this with a boolean you set in the scan context before scanning a new layer. Then when the new fmap is created, it will use that flag to set similar flag for the layer. The context flag is reset those that anything after this doesn't have that flag. The flag allows the new recursion_stack_get() function to ignore normalized layers when iterating the stack to return a layer at a requested index, negative or positive. Scanning normalized extracted/normalized javascript and VBA should also use the 'layer is normalized' flag. - This commit also fixes Heuristic.Broken.Executable alert for ELF files to make sure that: A) these only alert if cli_append_virus() returns CL_VIRUS (aka it respects the FP check). B) all broken-executable alerts for ELF only happen if the SCAN_HEURISTIC_BROKEN option is enabled. - This commit also cleans up the error handling in cli_magic_scan_dir(). This was needed so we could correctly apply the layer-is-normalized-flag to all VBA macros extracted to a directory when scanning the directory. - Also fix an issue where exceeding scan maximums wouldn't cause embedded file detection scans to abort. Granted we don't actually want to abort if max filesize or max recursion depth are exceeded... only if max scansize, max files, and max scantime are exceeded. Add 'abort_scan' flag to scan context, to protect against depending on correct error propagation for fatal conditions. Instead, setting this flag in the scan context should guarantee that a fatal condition deep in scan recursion isn't lost which result in more stuff being scanned instead of aborting. This shouldn't be necessary, but some status codes like CL_ETIMEOUT never used to be fatal and it's easier to do this than to verify every parser only returns CL_ETIMEOUT and other "fatal status codes" in fatal conditions. - Remove duplicate is_tar() prototype from filestypes.c and include is_tar.h instead. - Presently we create the fmap hash when creating the fmap. This wastes a bit of CPU if the hash is never needed. Now that we're creating fmap's for all embedded files discovered with file type recognition scans, this is a much more frequent occurence and really slows things down. This commit fixes the issue by only creating fmap hashes as needed. This should not only resolve the perfomance impact of creating fmap's for all embedded files, but also should improve performance in general. - Add allmatch check to the zip parser after the central-header meta match. That way we don't multiple alerts with the same match except in allmatch mode. Clean up error handling in the zip parser a tiny bit. - Fixes to ensure that the scan limits such as scansize, filesize, recursion depth, # of embedded files, and scantime are always reported if AlertExceedsMax (--alert-exceeds-max) is enabled. - Fixed an issue where non-fatal alerts for exceeding scan maximums may mask signature matches later on. I changed it so these alerts use the "possibly unwanted" alert-type and thus only alert if no other alerts were found or if all-match or heuristic-precedence are enabled. - Added the "Heuristics.Limits.Exceeded.*" events to the JSON metadata when the --gen-json feature is enabled. These will show up once under "ParseErrors" the first time a limit is exceeded. In the present implementation, only one limits-exceeded events will be added, so as to prevent a malicious or malformed sample from filling the JSON buffer with millions of events and using a tonne of RAM.
2021-09-11 14:15:21 -07:00
fmap_t *map = ctx->fmap;
if (!(buf = fmap_need_off_once(map, offset, 4 + 2 + 1))) {
cli_dbgmsg("read bim failed\n");
return CL_BREAK;
}
if (memcmp(buf, "8BIM", 4) != 0) {
cli_dbgmsg("missed 8bim\n");
return CL_BREAK;
}
id[0] = (uint8_t)buf[4];
id[1] = (uint8_t)buf[5];
cli_dbgmsg("ID: 0x%.2x%.2x\n", id[0], id[1]);
nlength = buf[6];
ntmp = nlength + ((((uint16_t)nlength) + 1) & 0x01);
offset += 4 + 2 + 1 + ntmp;
if (fmap_readn(map, &size, offset, 4) != 4) {
return CL_BREAK;
}
size = be32_to_host(size);
if (size == 0) {
return CL_BREAK;
}
if ((size & 0x01) == 1) {
size++;
}
*off = offset + 4 + size;
/* Is it a thumbnail image: 0x0409 or 0x040c */
if ((id[0] == 0x04) && ((id[1] == 0x09) || (id[1] == 0x0c))) {
/* Yes */
cli_dbgmsg("found thumbnail\n");
} else {
/* No - Seek past record */
return CL_CLEAN;
}
/* Jump past header */
offset += 4 + 28;
/* Scan the thumbnail JPEG */
retval = cli_magic_scan_nested_fmap_type(map, offset, 0, ctx, CL_TYPE_JPEG,
"photoshop-thumbnail", LAYER_ATTRIBUTES_NONE);
return retval;
}
cl_error_t cli_parsejpeg(cli_ctx *ctx)
2011-04-05 16:33:38 +02:00
{
cl_error_t status = CL_SUCCESS;
GIF, PNG bugfixes; Add AlertBrokenMedia option Added a new scan option to alert on broken media (graphics) file formats. This feature mitigates the risk of malformed media files intended to exploit vulnerabilities in other software. At present media validation exists for JPEG, TIFF, PNG, and GIF files. To enable this feature, set `AlertBrokenMedia yes` in clamd.conf, or use the `--alert-broken-media` option when using `clamscan`. These options are disabled by default for now. Application developers may enable this scan option by enabling `CL_SCAN_HEURISTIC_BROKEN_MEDIA` for the `heuristic` scan option bit field. Fixed PNG parser logic bugs that caused an excess of parsing errors and fixed a stack exhaustion issue affecting some systems when scanning PNG files. PNG file type detection was disabled via signature database update for 0.103.0 to mitigate effects from these bugs. Fixed an issue where PNG and GIF files no longer work with Target:5 (graphics) signatures if detected as CL_TYPE_PNG/GIF rather than as CL_TYPE_GRAPHICS. Target types now support up to 10 possible file types to make way for additional graphics types in future releases. Scanning JPEG, TIFF, PNG, and GIF files will no longer return "parse" errors when file format validation fails. Instead, the scan will alert with the "Heuristics.Broken.Media" signature prefix and a descriptive suffix to indicate the issue, provided that the "alert broken media" feature is enabled. GIF format validation will no longer fail if the GIF image is missing the trailer byte, as this appears to be a relatively common issue in otherwise functional GIF files. Added a TIFF dynamic configuration (DCONF) option, which was missing. This will allow us to disable TIFF format validation via signature database update in the event that it proves to be problematic. This feature already exists for many other file types. Added CL_TYPE_JPEG and CL_TYPE_TIFF types.
2020-11-04 15:49:43 -08:00
fmap_t *map = NULL;
2023-04-07 19:51:04 -07:00
jpeg_marker_t marker = JPEG_MARKER_NOT_A_MARKER_0x00, prev_marker, prev_segment = JPEG_MARKER_NOT_A_MARKER_0x00;
uint8_t buff[50]; /* 50 should be sufficient for now */
GIF, PNG bugfixes; Add AlertBrokenMedia option Added a new scan option to alert on broken media (graphics) file formats. This feature mitigates the risk of malformed media files intended to exploit vulnerabilities in other software. At present media validation exists for JPEG, TIFF, PNG, and GIF files. To enable this feature, set `AlertBrokenMedia yes` in clamd.conf, or use the `--alert-broken-media` option when using `clamscan`. These options are disabled by default for now. Application developers may enable this scan option by enabling `CL_SCAN_HEURISTIC_BROKEN_MEDIA` for the `heuristic` scan option bit field. Fixed PNG parser logic bugs that caused an excess of parsing errors and fixed a stack exhaustion issue affecting some systems when scanning PNG files. PNG file type detection was disabled via signature database update for 0.103.0 to mitigate effects from these bugs. Fixed an issue where PNG and GIF files no longer work with Target:5 (graphics) signatures if detected as CL_TYPE_PNG/GIF rather than as CL_TYPE_GRAPHICS. Target types now support up to 10 possible file types to make way for additional graphics types in future releases. Scanning JPEG, TIFF, PNG, and GIF files will no longer return "parse" errors when file format validation fails. Instead, the scan will alert with the "Heuristics.Broken.Media" signature prefix and a descriptive suffix to indicate the issue, provided that the "alert broken media" feature is enabled. GIF format validation will no longer fail if the GIF image is missing the trailer byte, as this appears to be a relatively common issue in otherwise functional GIF files. Added a TIFF dynamic configuration (DCONF) option, which was missing. This will allow us to disable TIFF format validation via signature database update in the event that it proves to be problematic. This feature already exists for many other file types. Added CL_TYPE_JPEG and CL_TYPE_TIFF types.
2020-11-04 15:49:43 -08:00
uint16_t len_u16;
unsigned int offset = 0, i, len, segment = 0;
bool found_comment = false;
bool found_app = false;
uint32_t num_JFIF = 0;
uint32_t num_Exif = 0;
uint32_t num_SPIFF = 0;
2011-04-05 16:33:38 +02:00
cli_dbgmsg("in cli_parsejpeg()\n");
GIF, PNG bugfixes; Add AlertBrokenMedia option Added a new scan option to alert on broken media (graphics) file formats. This feature mitigates the risk of malformed media files intended to exploit vulnerabilities in other software. At present media validation exists for JPEG, TIFF, PNG, and GIF files. To enable this feature, set `AlertBrokenMedia yes` in clamd.conf, or use the `--alert-broken-media` option when using `clamscan`. These options are disabled by default for now. Application developers may enable this scan option by enabling `CL_SCAN_HEURISTIC_BROKEN_MEDIA` for the `heuristic` scan option bit field. Fixed PNG parser logic bugs that caused an excess of parsing errors and fixed a stack exhaustion issue affecting some systems when scanning PNG files. PNG file type detection was disabled via signature database update for 0.103.0 to mitigate effects from these bugs. Fixed an issue where PNG and GIF files no longer work with Target:5 (graphics) signatures if detected as CL_TYPE_PNG/GIF rather than as CL_TYPE_GRAPHICS. Target types now support up to 10 possible file types to make way for additional graphics types in future releases. Scanning JPEG, TIFF, PNG, and GIF files will no longer return "parse" errors when file format validation fails. Instead, the scan will alert with the "Heuristics.Broken.Media" signature prefix and a descriptive suffix to indicate the issue, provided that the "alert broken media" feature is enabled. GIF format validation will no longer fail if the GIF image is missing the trailer byte, as this appears to be a relatively common issue in otherwise functional GIF files. Added a TIFF dynamic configuration (DCONF) option, which was missing. This will allow us to disable TIFF format validation via signature database update in the event that it proves to be problematic. This feature already exists for many other file types. Added CL_TYPE_JPEG and CL_TYPE_TIFF types.
2020-11-04 15:49:43 -08:00
if (NULL == ctx) {
cli_dbgmsg("passed context was NULL\n");
GIF, PNG bugfixes; Add AlertBrokenMedia option Added a new scan option to alert on broken media (graphics) file formats. This feature mitigates the risk of malformed media files intended to exploit vulnerabilities in other software. At present media validation exists for JPEG, TIFF, PNG, and GIF files. To enable this feature, set `AlertBrokenMedia yes` in clamd.conf, or use the `--alert-broken-media` option when using `clamscan`. These options are disabled by default for now. Application developers may enable this scan option by enabling `CL_SCAN_HEURISTIC_BROKEN_MEDIA` for the `heuristic` scan option bit field. Fixed PNG parser logic bugs that caused an excess of parsing errors and fixed a stack exhaustion issue affecting some systems when scanning PNG files. PNG file type detection was disabled via signature database update for 0.103.0 to mitigate effects from these bugs. Fixed an issue where PNG and GIF files no longer work with Target:5 (graphics) signatures if detected as CL_TYPE_PNG/GIF rather than as CL_TYPE_GRAPHICS. Target types now support up to 10 possible file types to make way for additional graphics types in future releases. Scanning JPEG, TIFF, PNG, and GIF files will no longer return "parse" errors when file format validation fails. Instead, the scan will alert with the "Heuristics.Broken.Media" signature prefix and a descriptive suffix to indicate the issue, provided that the "alert broken media" feature is enabled. GIF format validation will no longer fail if the GIF image is missing the trailer byte, as this appears to be a relatively common issue in otherwise functional GIF files. Added a TIFF dynamic configuration (DCONF) option, which was missing. This will allow us to disable TIFF format validation via signature database update in the event that it proves to be problematic. This feature already exists for many other file types. Added CL_TYPE_JPEG and CL_TYPE_TIFF types.
2020-11-04 15:49:43 -08:00
status = CL_EARG;
goto done;
}
libclamav: Fix scan recursion tracking Scan recursion is the process of identifying files embedded in other files and then scanning them, recursively. Internally this process is more complex than it may sound because a file may have multiple layers of types before finding a new "file". At present we treat the recursion count in the scanning context as an index into both our fmap list AND our container list. These two lists are conceptually a part of the same thing and should be unified. But what's concerning is that the "recursion level" isn't actually incremented or decremented at the same time that we add a layer to the fmap or container lists but instead is more touchy-feely, increasing when we find a new "file". To account for this shadiness, the size of the fmap and container lists has always been a little longer than our "max scan recursion" limit so we don't accidentally overflow the fmap or container arrays (!). I've implemented a single recursion-stack as an array, similar to before, which includes a pointer to each fmap at each layer, along with the size and type. Push and pop functions add and remove layers whenever a new fmap is added. A boolean argument when pushing indicates if the new layer represents a new buffer or new file (descriptor). A new buffer will reset the "nested fmap level" (described below). This commit also provides a solution for an issue where we detect embedded files more than once during scan recursion. For illustration, imagine a tarball named foo.tar.gz with this structure: | description | type | rec level | nested fmap level | | ------------------------- | ----- | --------- | ----------------- | | foo.tar.gz | GZ | 0 | 0 | | └── foo.tar | TAR | 1 | 0 | | ├── bar.zip | ZIP | 2 | 1 | | │   └── hola.txt | ASCII | 3 | 0 | | └── baz.exe | PE | 2 | 1 | But suppose baz.exe embeds a ZIP archive and a 7Z archive, like this: | description | type | rec level | nested fmap level | | ------------------------- | ----- | --------- | ----------------- | | baz.exe | PE | 0 | 0 | | ├── sfx.zip | ZIP | 1 | 1 | | │   └── hello.txt | ASCII | 2 | 0 | | └── sfx.7z | 7Z | 1 | 1 | |    └── world.txt | ASCII | 2 | 0 | (A) If we scan for embedded files at any layer, we may detect: | description | type | rec level | nested fmap level | | ------------------------- | ----- | --------- | ----------------- | | foo.tar.gz | GZ | 0 | 0 | | ├── foo.tar | TAR | 1 | 0 | | │ ├── bar.zip | ZIP | 2 | 1 | | │ │   └── hola.txt | ASCII | 3 | 0 | | │ ├── baz.exe | PE | 2 | 1 | | │ │ ├── sfx.zip | ZIP | 3 | 1 | | │ │ │   └── hello.txt | ASCII | 4 | 0 | | │ │ └── sfx.7z | 7Z | 3 | 1 | | │ │    └── world.txt | ASCII | 4 | 0 | | │ ├── sfx.zip | ZIP | 2 | 1 | | │ │   └── hello.txt | ASCII | 3 | 0 | | │ └── sfx.7z | 7Z | 2 | 1 | | │   └── world.txt | ASCII | 3 | 0 | | ├── sfx.zip | ZIP | 1 | 1 | | └── sfx.7z | 7Z | 1 | 1 | (A) is bad because it scans content more than once. Note that for the GZ layer, it may detect the ZIP and 7Z if the signature hits on the compressed data, which it might, though extracting the ZIP and 7Z will likely fail. The reason the above doesn't happen now is that we restrict embedded type scans for a bunch of archive formats to include GZ and TAR. (B) If we scan for embedded files at the foo.tar layer, we may detect: | description | type | rec level | nested fmap level | | ------------------------- | ----- | --------- | ----------------- | | foo.tar.gz | GZ | 0 | 0 | | └── foo.tar | TAR | 1 | 0 | | ├── bar.zip | ZIP | 2 | 1 | | │   └── hola.txt | ASCII | 3 | 0 | | ├── baz.exe | PE | 2 | 1 | | ├── sfx.zip | ZIP | 2 | 1 | | │   └── hello.txt | ASCII | 3 | 0 | | └── sfx.7z | 7Z | 2 | 1 | |    └── world.txt | ASCII | 3 | 0 | (B) is almost right. But we can achieve it easily enough only scanning for embedded content in the current fmap when the "nested fmap level" is 0. The upside is that it should safely detect all embedded content, even if it may think the sfz.zip and sfx.7z are in foo.tar instead of in baz.exe. The biggest risk I can think of affects ZIPs. SFXZIP detection is identical to ZIP detection, which is why we don't allow SFXZIP to be detected if insize of a ZIP. If we only allow embedded type scanning at fmap-layer 0 in each buffer, this will fail to detect the embedded ZIP if the bar.exe was not compressed in foo.zip and if non-compressed files extracted from ZIPs aren't extracted as new buffers: | description | type | rec level | nested fmap level | | ------------------------- | ----- | --------- | ----------------- | | foo.zip | ZIP | 0 | 0 | | └── bar.exe | PE | 1 | 1 | | └── sfx.zip | ZIP | 2 | 2 | Provided that we ensure all files extracted from zips are scanned in new buffers, option (B) should be safe. (C) If we scan for embedded files at the baz.exe layer, we may detect: | description | type | rec level | nested fmap level | | ------------------------- | ----- | --------- | ----------------- | | foo.tar.gz | GZ | 0 | 0 | | └── foo.tar | TAR | 1 | 0 | | ├── bar.zip | ZIP | 2 | 1 | | │   └── hola.txt | ASCII | 3 | 0 | | └── baz.exe | PE | 2 | 1 | | ├── sfx.zip | ZIP | 3 | 1 | | │   └── hello.txt | ASCII | 4 | 0 | | └── sfx.7z | 7Z | 3 | 1 | |    └── world.txt | ASCII | 4 | 0 | (C) is right. But it's harder to achieve. For this example we can get it by restricting 7ZSFX and ZIPSFX detection only when scanning an executable. But that may mean losing detection of archives embedded elsewhere. And we'd have to identify allowable container types for each possible embedded type, which would be very difficult. So this commit aims to solve the issue the (B)-way. Note that in all situations, we still have to scan with file typing enabled to determine if we need to reassign the current file type, such as re-identifying a Bzip2 archive as a DMG that happens to be Bzip2- compressed. Detection of DMG and a handful of other types rely on finding data partway through or near the ned of a file before reassigning the entire file as the new type. Other fixes and considerations in this commit: - The utf16 HTML parser has weak error handling, particularly with respect to creating a nested fmap for scanning the ascii decoded file. This commit cleans up the error handling and wraps the nested scan with the recursion-stack push()/pop() for correct recursion tracking. Before this commit, each container layer had a flag to indicate if the container layer is valid. We need something similar so that the cli_recursion_stack_get_*() functions ignore normalized layers. Details... Imagine an LDB signature for HTML content that specifies a ZIP container. If the signature actually alerts on the normalized HTML and you don't ignore normalized layers for the container check, it will appear as though the alert is in an HTML container rather than a ZIP container. This commit accomplishes this with a boolean you set in the scan context before scanning a new layer. Then when the new fmap is created, it will use that flag to set similar flag for the layer. The context flag is reset those that anything after this doesn't have that flag. The flag allows the new recursion_stack_get() function to ignore normalized layers when iterating the stack to return a layer at a requested index, negative or positive. Scanning normalized extracted/normalized javascript and VBA should also use the 'layer is normalized' flag. - This commit also fixes Heuristic.Broken.Executable alert for ELF files to make sure that: A) these only alert if cli_append_virus() returns CL_VIRUS (aka it respects the FP check). B) all broken-executable alerts for ELF only happen if the SCAN_HEURISTIC_BROKEN option is enabled. - This commit also cleans up the error handling in cli_magic_scan_dir(). This was needed so we could correctly apply the layer-is-normalized-flag to all VBA macros extracted to a directory when scanning the directory. - Also fix an issue where exceeding scan maximums wouldn't cause embedded file detection scans to abort. Granted we don't actually want to abort if max filesize or max recursion depth are exceeded... only if max scansize, max files, and max scantime are exceeded. Add 'abort_scan' flag to scan context, to protect against depending on correct error propagation for fatal conditions. Instead, setting this flag in the scan context should guarantee that a fatal condition deep in scan recursion isn't lost which result in more stuff being scanned instead of aborting. This shouldn't be necessary, but some status codes like CL_ETIMEOUT never used to be fatal and it's easier to do this than to verify every parser only returns CL_ETIMEOUT and other "fatal status codes" in fatal conditions. - Remove duplicate is_tar() prototype from filestypes.c and include is_tar.h instead. - Presently we create the fmap hash when creating the fmap. This wastes a bit of CPU if the hash is never needed. Now that we're creating fmap's for all embedded files discovered with file type recognition scans, this is a much more frequent occurence and really slows things down. This commit fixes the issue by only creating fmap hashes as needed. This should not only resolve the perfomance impact of creating fmap's for all embedded files, but also should improve performance in general. - Add allmatch check to the zip parser after the central-header meta match. That way we don't multiple alerts with the same match except in allmatch mode. Clean up error handling in the zip parser a tiny bit. - Fixes to ensure that the scan limits such as scansize, filesize, recursion depth, # of embedded files, and scantime are always reported if AlertExceedsMax (--alert-exceeds-max) is enabled. - Fixed an issue where non-fatal alerts for exceeding scan maximums may mask signature matches later on. I changed it so these alerts use the "possibly unwanted" alert-type and thus only alert if no other alerts were found or if all-match or heuristic-precedence are enabled. - Added the "Heuristics.Limits.Exceeded.*" events to the JSON metadata when the --gen-json feature is enabled. These will show up once under "ParseErrors" the first time a limit is exceeded. In the present implementation, only one limits-exceeded events will be added, so as to prevent a malicious or malformed sample from filling the JSON buffer with millions of events and using a tonne of RAM.
2021-09-11 14:15:21 -07:00
map = ctx->fmap;
GIF, PNG bugfixes; Add AlertBrokenMedia option Added a new scan option to alert on broken media (graphics) file formats. This feature mitigates the risk of malformed media files intended to exploit vulnerabilities in other software. At present media validation exists for JPEG, TIFF, PNG, and GIF files. To enable this feature, set `AlertBrokenMedia yes` in clamd.conf, or use the `--alert-broken-media` option when using `clamscan`. These options are disabled by default for now. Application developers may enable this scan option by enabling `CL_SCAN_HEURISTIC_BROKEN_MEDIA` for the `heuristic` scan option bit field. Fixed PNG parser logic bugs that caused an excess of parsing errors and fixed a stack exhaustion issue affecting some systems when scanning PNG files. PNG file type detection was disabled via signature database update for 0.103.0 to mitigate effects from these bugs. Fixed an issue where PNG and GIF files no longer work with Target:5 (graphics) signatures if detected as CL_TYPE_PNG/GIF rather than as CL_TYPE_GRAPHICS. Target types now support up to 10 possible file types to make way for additional graphics types in future releases. Scanning JPEG, TIFF, PNG, and GIF files will no longer return "parse" errors when file format validation fails. Instead, the scan will alert with the "Heuristics.Broken.Media" signature prefix and a descriptive suffix to indicate the issue, provided that the "alert broken media" feature is enabled. GIF format validation will no longer fail if the GIF image is missing the trailer byte, as this appears to be a relatively common issue in otherwise functional GIF files. Added a TIFF dynamic configuration (DCONF) option, which was missing. This will allow us to disable TIFF format validation via signature database update in the event that it proves to be problematic. This feature already exists for many other file types. Added CL_TYPE_JPEG and CL_TYPE_TIFF types.
2020-11-04 15:49:43 -08:00
if (fmap_readn(map, buff, offset, 4) != 4) {
GIF, PNG bugfixes; Add AlertBrokenMedia option Added a new scan option to alert on broken media (graphics) file formats. This feature mitigates the risk of malformed media files intended to exploit vulnerabilities in other software. At present media validation exists for JPEG, TIFF, PNG, and GIF files. To enable this feature, set `AlertBrokenMedia yes` in clamd.conf, or use the `--alert-broken-media` option when using `clamscan`. These options are disabled by default for now. Application developers may enable this scan option by enabling `CL_SCAN_HEURISTIC_BROKEN_MEDIA` for the `heuristic` scan option bit field. Fixed PNG parser logic bugs that caused an excess of parsing errors and fixed a stack exhaustion issue affecting some systems when scanning PNG files. PNG file type detection was disabled via signature database update for 0.103.0 to mitigate effects from these bugs. Fixed an issue where PNG and GIF files no longer work with Target:5 (graphics) signatures if detected as CL_TYPE_PNG/GIF rather than as CL_TYPE_GRAPHICS. Target types now support up to 10 possible file types to make way for additional graphics types in future releases. Scanning JPEG, TIFF, PNG, and GIF files will no longer return "parse" errors when file format validation fails. Instead, the scan will alert with the "Heuristics.Broken.Media" signature prefix and a descriptive suffix to indicate the issue, provided that the "alert broken media" feature is enabled. GIF format validation will no longer fail if the GIF image is missing the trailer byte, as this appears to be a relatively common issue in otherwise functional GIF files. Added a TIFF dynamic configuration (DCONF) option, which was missing. This will allow us to disable TIFF format validation via signature database update in the event that it proves to be problematic. This feature already exists for many other file types. Added CL_TYPE_JPEG and CL_TYPE_TIFF types.
2020-11-04 15:49:43 -08:00
goto done; /* Ignore */
}
2011-04-06 15:53:28 +02:00
if (!memcmp(buff, "\xff\xd8\xff", 3)) {
offset = 2;
} else if (!memcmp(buff, "\xff\xd9\xff\xd8", 4)) {
offset = 4;
} else {
GIF, PNG bugfixes; Add AlertBrokenMedia option Added a new scan option to alert on broken media (graphics) file formats. This feature mitigates the risk of malformed media files intended to exploit vulnerabilities in other software. At present media validation exists for JPEG, TIFF, PNG, and GIF files. To enable this feature, set `AlertBrokenMedia yes` in clamd.conf, or use the `--alert-broken-media` option when using `clamscan`. These options are disabled by default for now. Application developers may enable this scan option by enabling `CL_SCAN_HEURISTIC_BROKEN_MEDIA` for the `heuristic` scan option bit field. Fixed PNG parser logic bugs that caused an excess of parsing errors and fixed a stack exhaustion issue affecting some systems when scanning PNG files. PNG file type detection was disabled via signature database update for 0.103.0 to mitigate effects from these bugs. Fixed an issue where PNG and GIF files no longer work with Target:5 (graphics) signatures if detected as CL_TYPE_PNG/GIF rather than as CL_TYPE_GRAPHICS. Target types now support up to 10 possible file types to make way for additional graphics types in future releases. Scanning JPEG, TIFF, PNG, and GIF files will no longer return "parse" errors when file format validation fails. Instead, the scan will alert with the "Heuristics.Broken.Media" signature prefix and a descriptive suffix to indicate the issue, provided that the "alert broken media" feature is enabled. GIF format validation will no longer fail if the GIF image is missing the trailer byte, as this appears to be a relatively common issue in otherwise functional GIF files. Added a TIFF dynamic configuration (DCONF) option, which was missing. This will allow us to disable TIFF format validation via signature database update in the event that it proves to be problematic. This feature already exists for many other file types. Added CL_TYPE_JPEG and CL_TYPE_TIFF types.
2020-11-04 15:49:43 -08:00
goto done; /* Not a JPEG file */
}
while (1) {
segment++;
prev_marker = JPEG_MARKER_NOT_A_MARKER_0x00;
for (i = 0; offset < map->len && i < 16; i++) {
uint8_t marker_u8;
if (fmap_readn(map, &marker_u8, offset, sizeof(marker_u8)) == sizeof(marker_u8)) {
offset += sizeof(marker_u8);
GIF, PNG bugfixes; Add AlertBrokenMedia option Added a new scan option to alert on broken media (graphics) file formats. This feature mitigates the risk of malformed media files intended to exploit vulnerabilities in other software. At present media validation exists for JPEG, TIFF, PNG, and GIF files. To enable this feature, set `AlertBrokenMedia yes` in clamd.conf, or use the `--alert-broken-media` option when using `clamscan`. These options are disabled by default for now. Application developers may enable this scan option by enabling `CL_SCAN_HEURISTIC_BROKEN_MEDIA` for the `heuristic` scan option bit field. Fixed PNG parser logic bugs that caused an excess of parsing errors and fixed a stack exhaustion issue affecting some systems when scanning PNG files. PNG file type detection was disabled via signature database update for 0.103.0 to mitigate effects from these bugs. Fixed an issue where PNG and GIF files no longer work with Target:5 (graphics) signatures if detected as CL_TYPE_PNG/GIF rather than as CL_TYPE_GRAPHICS. Target types now support up to 10 possible file types to make way for additional graphics types in future releases. Scanning JPEG, TIFF, PNG, and GIF files will no longer return "parse" errors when file format validation fails. Instead, the scan will alert with the "Heuristics.Broken.Media" signature prefix and a descriptive suffix to indicate the issue, provided that the "alert broken media" feature is enabled. GIF format validation will no longer fail if the GIF image is missing the trailer byte, as this appears to be a relatively common issue in otherwise functional GIF files. Added a TIFF dynamic configuration (DCONF) option, which was missing. This will allow us to disable TIFF format validation via signature database update in the event that it proves to be problematic. This feature already exists for many other file types. Added CL_TYPE_JPEG and CL_TYPE_TIFF types.
2020-11-04 15:49:43 -08:00
} else {
if (SCAN_HEURISTIC_BROKEN_MEDIA) {
cli_errmsg("JPEG: Failed to read marker, file corrupted?\n");
status = cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.CantReadMarker");
} else {
cli_dbgmsg("Failed to read marker, file corrupted?\n");
}
GIF, PNG bugfixes; Add AlertBrokenMedia option Added a new scan option to alert on broken media (graphics) file formats. This feature mitigates the risk of malformed media files intended to exploit vulnerabilities in other software. At present media validation exists for JPEG, TIFF, PNG, and GIF files. To enable this feature, set `AlertBrokenMedia yes` in clamd.conf, or use the `--alert-broken-media` option when using `clamscan`. These options are disabled by default for now. Application developers may enable this scan option by enabling `CL_SCAN_HEURISTIC_BROKEN_MEDIA` for the `heuristic` scan option bit field. Fixed PNG parser logic bugs that caused an excess of parsing errors and fixed a stack exhaustion issue affecting some systems when scanning PNG files. PNG file type detection was disabled via signature database update for 0.103.0 to mitigate effects from these bugs. Fixed an issue where PNG and GIF files no longer work with Target:5 (graphics) signatures if detected as CL_TYPE_PNG/GIF rather than as CL_TYPE_GRAPHICS. Target types now support up to 10 possible file types to make way for additional graphics types in future releases. Scanning JPEG, TIFF, PNG, and GIF files will no longer return "parse" errors when file format validation fails. Instead, the scan will alert with the "Heuristics.Broken.Media" signature prefix and a descriptive suffix to indicate the issue, provided that the "alert broken media" feature is enabled. GIF format validation will no longer fail if the GIF image is missing the trailer byte, as this appears to be a relatively common issue in otherwise functional GIF files. Added a TIFF dynamic configuration (DCONF) option, which was missing. This will allow us to disable TIFF format validation via signature database update in the event that it proves to be problematic. This feature already exists for many other file types. Added CL_TYPE_JPEG and CL_TYPE_TIFF types.
2020-11-04 15:49:43 -08:00
goto done;
}
marker = (jpeg_marker_t)marker_u8;
GIF, PNG bugfixes; Add AlertBrokenMedia option Added a new scan option to alert on broken media (graphics) file formats. This feature mitigates the risk of malformed media files intended to exploit vulnerabilities in other software. At present media validation exists for JPEG, TIFF, PNG, and GIF files. To enable this feature, set `AlertBrokenMedia yes` in clamd.conf, or use the `--alert-broken-media` option when using `clamscan`. These options are disabled by default for now. Application developers may enable this scan option by enabling `CL_SCAN_HEURISTIC_BROKEN_MEDIA` for the `heuristic` scan option bit field. Fixed PNG parser logic bugs that caused an excess of parsing errors and fixed a stack exhaustion issue affecting some systems when scanning PNG files. PNG file type detection was disabled via signature database update for 0.103.0 to mitigate effects from these bugs. Fixed an issue where PNG and GIF files no longer work with Target:5 (graphics) signatures if detected as CL_TYPE_PNG/GIF rather than as CL_TYPE_GRAPHICS. Target types now support up to 10 possible file types to make way for additional graphics types in future releases. Scanning JPEG, TIFF, PNG, and GIF files will no longer return "parse" errors when file format validation fails. Instead, the scan will alert with the "Heuristics.Broken.Media" signature prefix and a descriptive suffix to indicate the issue, provided that the "alert broken media" feature is enabled. GIF format validation will no longer fail if the GIF image is missing the trailer byte, as this appears to be a relatively common issue in otherwise functional GIF files. Added a TIFF dynamic configuration (DCONF) option, which was missing. This will allow us to disable TIFF format validation via signature database update in the event that it proves to be problematic. This feature already exists for many other file types. Added CL_TYPE_JPEG and CL_TYPE_TIFF types.
2020-11-04 15:49:43 -08:00
if (prev_marker == JPEG_MARKER_NOT_A_MARKER_0xFF && marker != JPEG_MARKER_NOT_A_MARKER_0xFF)
break;
prev_marker = marker;
}
if (i == 16) {
if (SCAN_HEURISTIC_BROKEN_MEDIA) {
cli_warnmsg("JPEG: Spurious bytes before segment %u\n", segment);
status = cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.SpuriousBytesBeforeSegment");
} else {
cli_dbgmsg("Spurious bytes before segment %u\n", segment);
}
GIF, PNG bugfixes; Add AlertBrokenMedia option Added a new scan option to alert on broken media (graphics) file formats. This feature mitigates the risk of malformed media files intended to exploit vulnerabilities in other software. At present media validation exists for JPEG, TIFF, PNG, and GIF files. To enable this feature, set `AlertBrokenMedia yes` in clamd.conf, or use the `--alert-broken-media` option when using `clamscan`. These options are disabled by default for now. Application developers may enable this scan option by enabling `CL_SCAN_HEURISTIC_BROKEN_MEDIA` for the `heuristic` scan option bit field. Fixed PNG parser logic bugs that caused an excess of parsing errors and fixed a stack exhaustion issue affecting some systems when scanning PNG files. PNG file type detection was disabled via signature database update for 0.103.0 to mitigate effects from these bugs. Fixed an issue where PNG and GIF files no longer work with Target:5 (graphics) signatures if detected as CL_TYPE_PNG/GIF rather than as CL_TYPE_GRAPHICS. Target types now support up to 10 possible file types to make way for additional graphics types in future releases. Scanning JPEG, TIFF, PNG, and GIF files will no longer return "parse" errors when file format validation fails. Instead, the scan will alert with the "Heuristics.Broken.Media" signature prefix and a descriptive suffix to indicate the issue, provided that the "alert broken media" feature is enabled. GIF format validation will no longer fail if the GIF image is missing the trailer byte, as this appears to be a relatively common issue in otherwise functional GIF files. Added a TIFF dynamic configuration (DCONF) option, which was missing. This will allow us to disable TIFF format validation via signature database update in the event that it proves to be problematic. This feature already exists for many other file types. Added CL_TYPE_JPEG and CL_TYPE_TIFF types.
2020-11-04 15:49:43 -08:00
goto done;
}
GIF, PNG bugfixes; Add AlertBrokenMedia option Added a new scan option to alert on broken media (graphics) file formats. This feature mitigates the risk of malformed media files intended to exploit vulnerabilities in other software. At present media validation exists for JPEG, TIFF, PNG, and GIF files. To enable this feature, set `AlertBrokenMedia yes` in clamd.conf, or use the `--alert-broken-media` option when using `clamscan`. These options are disabled by default for now. Application developers may enable this scan option by enabling `CL_SCAN_HEURISTIC_BROKEN_MEDIA` for the `heuristic` scan option bit field. Fixed PNG parser logic bugs that caused an excess of parsing errors and fixed a stack exhaustion issue affecting some systems when scanning PNG files. PNG file type detection was disabled via signature database update for 0.103.0 to mitigate effects from these bugs. Fixed an issue where PNG and GIF files no longer work with Target:5 (graphics) signatures if detected as CL_TYPE_PNG/GIF rather than as CL_TYPE_GRAPHICS. Target types now support up to 10 possible file types to make way for additional graphics types in future releases. Scanning JPEG, TIFF, PNG, and GIF files will no longer return "parse" errors when file format validation fails. Instead, the scan will alert with the "Heuristics.Broken.Media" signature prefix and a descriptive suffix to indicate the issue, provided that the "alert broken media" feature is enabled. GIF format validation will no longer fail if the GIF image is missing the trailer byte, as this appears to be a relatively common issue in otherwise functional GIF files. Added a TIFF dynamic configuration (DCONF) option, which was missing. This will allow us to disable TIFF format validation via signature database update in the event that it proves to be problematic. This feature already exists for many other file types. Added CL_TYPE_JPEG and CL_TYPE_TIFF types.
2020-11-04 15:49:43 -08:00
/*
* Check for MS04-028 exploit (See: https://docs.microsoft.com/en-us/security-updates/securitybulletins/2004/ms04-028)
* You can reproduce to test with https://www.exploit-db.com/exploits/474
* Checking here because the exploit PoC will fail our length check, below.
*/
if (JPEG_MARKER_SEGMENT_COM_COMMENT == marker) {
if (fmap_readn(map, buff, offset, 2) == 2) {
if (buff[0] == 0x00) {
if ((buff[1] == 0x00) || (buff[1] == 0x01)) {
/* Found exploit */
status = cli_append_potentially_unwanted(ctx, "Heuristics.Exploit.W32.MS04-028");
goto done;
}
}
}
}
GIF, PNG bugfixes; Add AlertBrokenMedia option Added a new scan option to alert on broken media (graphics) file formats. This feature mitigates the risk of malformed media files intended to exploit vulnerabilities in other software. At present media validation exists for JPEG, TIFF, PNG, and GIF files. To enable this feature, set `AlertBrokenMedia yes` in clamd.conf, or use the `--alert-broken-media` option when using `clamscan`. These options are disabled by default for now. Application developers may enable this scan option by enabling `CL_SCAN_HEURISTIC_BROKEN_MEDIA` for the `heuristic` scan option bit field. Fixed PNG parser logic bugs that caused an excess of parsing errors and fixed a stack exhaustion issue affecting some systems when scanning PNG files. PNG file type detection was disabled via signature database update for 0.103.0 to mitigate effects from these bugs. Fixed an issue where PNG and GIF files no longer work with Target:5 (graphics) signatures if detected as CL_TYPE_PNG/GIF rather than as CL_TYPE_GRAPHICS. Target types now support up to 10 possible file types to make way for additional graphics types in future releases. Scanning JPEG, TIFF, PNG, and GIF files will no longer return "parse" errors when file format validation fails. Instead, the scan will alert with the "Heuristics.Broken.Media" signature prefix and a descriptive suffix to indicate the issue, provided that the "alert broken media" feature is enabled. GIF format validation will no longer fail if the GIF image is missing the trailer byte, as this appears to be a relatively common issue in otherwise functional GIF files. Added a TIFF dynamic configuration (DCONF) option, which was missing. This will allow us to disable TIFF format validation via signature database update in the event that it proves to be problematic. This feature already exists for many other file types. Added CL_TYPE_JPEG and CL_TYPE_TIFF types.
2020-11-04 15:49:43 -08:00
if (fmap_readn(map, &len_u16, offset, sizeof(len_u16)) != sizeof(len_u16)) {
if (SCAN_HEURISTIC_BROKEN_MEDIA) {
cli_errmsg("JPEG: Failed to read the segment size, file corrupted?\n");
status = cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.CantReadSegmentSize");
} else {
cli_dbgmsg("Failed to read the segment size, file corrupted?\n");
}
GIF, PNG bugfixes; Add AlertBrokenMedia option Added a new scan option to alert on broken media (graphics) file formats. This feature mitigates the risk of malformed media files intended to exploit vulnerabilities in other software. At present media validation exists for JPEG, TIFF, PNG, and GIF files. To enable this feature, set `AlertBrokenMedia yes` in clamd.conf, or use the `--alert-broken-media` option when using `clamscan`. These options are disabled by default for now. Application developers may enable this scan option by enabling `CL_SCAN_HEURISTIC_BROKEN_MEDIA` for the `heuristic` scan option bit field. Fixed PNG parser logic bugs that caused an excess of parsing errors and fixed a stack exhaustion issue affecting some systems when scanning PNG files. PNG file type detection was disabled via signature database update for 0.103.0 to mitigate effects from these bugs. Fixed an issue where PNG and GIF files no longer work with Target:5 (graphics) signatures if detected as CL_TYPE_PNG/GIF rather than as CL_TYPE_GRAPHICS. Target types now support up to 10 possible file types to make way for additional graphics types in future releases. Scanning JPEG, TIFF, PNG, and GIF files will no longer return "parse" errors when file format validation fails. Instead, the scan will alert with the "Heuristics.Broken.Media" signature prefix and a descriptive suffix to indicate the issue, provided that the "alert broken media" feature is enabled. GIF format validation will no longer fail if the GIF image is missing the trailer byte, as this appears to be a relatively common issue in otherwise functional GIF files. Added a TIFF dynamic configuration (DCONF) option, which was missing. This will allow us to disable TIFF format validation via signature database update in the event that it proves to be problematic. This feature already exists for many other file types. Added CL_TYPE_JPEG and CL_TYPE_TIFF types.
2020-11-04 15:49:43 -08:00
goto done;
}
GIF, PNG bugfixes; Add AlertBrokenMedia option Added a new scan option to alert on broken media (graphics) file formats. This feature mitigates the risk of malformed media files intended to exploit vulnerabilities in other software. At present media validation exists for JPEG, TIFF, PNG, and GIF files. To enable this feature, set `AlertBrokenMedia yes` in clamd.conf, or use the `--alert-broken-media` option when using `clamscan`. These options are disabled by default for now. Application developers may enable this scan option by enabling `CL_SCAN_HEURISTIC_BROKEN_MEDIA` for the `heuristic` scan option bit field. Fixed PNG parser logic bugs that caused an excess of parsing errors and fixed a stack exhaustion issue affecting some systems when scanning PNG files. PNG file type detection was disabled via signature database update for 0.103.0 to mitigate effects from these bugs. Fixed an issue where PNG and GIF files no longer work with Target:5 (graphics) signatures if detected as CL_TYPE_PNG/GIF rather than as CL_TYPE_GRAPHICS. Target types now support up to 10 possible file types to make way for additional graphics types in future releases. Scanning JPEG, TIFF, PNG, and GIF files will no longer return "parse" errors when file format validation fails. Instead, the scan will alert with the "Heuristics.Broken.Media" signature prefix and a descriptive suffix to indicate the issue, provided that the "alert broken media" feature is enabled. GIF format validation will no longer fail if the GIF image is missing the trailer byte, as this appears to be a relatively common issue in otherwise functional GIF files. Added a TIFF dynamic configuration (DCONF) option, which was missing. This will allow us to disable TIFF format validation via signature database update in the event that it proves to be problematic. This feature already exists for many other file types. Added CL_TYPE_JPEG and CL_TYPE_TIFF types.
2020-11-04 15:49:43 -08:00
len = (unsigned int)be16_to_host(len_u16);
cli_dbgmsg("segment[%d] = 0x%02x, Length %u\n", segment, marker, len);
GIF, PNG bugfixes; Add AlertBrokenMedia option Added a new scan option to alert on broken media (graphics) file formats. This feature mitigates the risk of malformed media files intended to exploit vulnerabilities in other software. At present media validation exists for JPEG, TIFF, PNG, and GIF files. To enable this feature, set `AlertBrokenMedia yes` in clamd.conf, or use the `--alert-broken-media` option when using `clamscan`. These options are disabled by default for now. Application developers may enable this scan option by enabling `CL_SCAN_HEURISTIC_BROKEN_MEDIA` for the `heuristic` scan option bit field. Fixed PNG parser logic bugs that caused an excess of parsing errors and fixed a stack exhaustion issue affecting some systems when scanning PNG files. PNG file type detection was disabled via signature database update for 0.103.0 to mitigate effects from these bugs. Fixed an issue where PNG and GIF files no longer work with Target:5 (graphics) signatures if detected as CL_TYPE_PNG/GIF rather than as CL_TYPE_GRAPHICS. Target types now support up to 10 possible file types to make way for additional graphics types in future releases. Scanning JPEG, TIFF, PNG, and GIF files will no longer return "parse" errors when file format validation fails. Instead, the scan will alert with the "Heuristics.Broken.Media" signature prefix and a descriptive suffix to indicate the issue, provided that the "alert broken media" feature is enabled. GIF format validation will no longer fail if the GIF image is missing the trailer byte, as this appears to be a relatively common issue in otherwise functional GIF files. Added a TIFF dynamic configuration (DCONF) option, which was missing. This will allow us to disable TIFF format validation via signature database update in the event that it proves to be problematic. This feature already exists for many other file types. Added CL_TYPE_JPEG and CL_TYPE_TIFF types.
2020-11-04 15:49:43 -08:00
if (len < 2) {
if (SCAN_HEURISTIC_BROKEN_MEDIA) {
cli_warnmsg("JPEG: Invalid segment size\n");
status = cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.InvalidSegmentSize");
} else {
cli_dbgmsg("Invalid segment size\n");
}
GIF, PNG bugfixes; Add AlertBrokenMedia option Added a new scan option to alert on broken media (graphics) file formats. This feature mitigates the risk of malformed media files intended to exploit vulnerabilities in other software. At present media validation exists for JPEG, TIFF, PNG, and GIF files. To enable this feature, set `AlertBrokenMedia yes` in clamd.conf, or use the `--alert-broken-media` option when using `clamscan`. These options are disabled by default for now. Application developers may enable this scan option by enabling `CL_SCAN_HEURISTIC_BROKEN_MEDIA` for the `heuristic` scan option bit field. Fixed PNG parser logic bugs that caused an excess of parsing errors and fixed a stack exhaustion issue affecting some systems when scanning PNG files. PNG file type detection was disabled via signature database update for 0.103.0 to mitigate effects from these bugs. Fixed an issue where PNG and GIF files no longer work with Target:5 (graphics) signatures if detected as CL_TYPE_PNG/GIF rather than as CL_TYPE_GRAPHICS. Target types now support up to 10 possible file types to make way for additional graphics types in future releases. Scanning JPEG, TIFF, PNG, and GIF files will no longer return "parse" errors when file format validation fails. Instead, the scan will alert with the "Heuristics.Broken.Media" signature prefix and a descriptive suffix to indicate the issue, provided that the "alert broken media" feature is enabled. GIF format validation will no longer fail if the GIF image is missing the trailer byte, as this appears to be a relatively common issue in otherwise functional GIF files. Added a TIFF dynamic configuration (DCONF) option, which was missing. This will allow us to disable TIFF format validation via signature database update in the event that it proves to be problematic. This feature already exists for many other file types. Added CL_TYPE_JPEG and CL_TYPE_TIFF types.
2020-11-04 15:49:43 -08:00
goto done;
}
GIF, PNG bugfixes; Add AlertBrokenMedia option Added a new scan option to alert on broken media (graphics) file formats. This feature mitigates the risk of malformed media files intended to exploit vulnerabilities in other software. At present media validation exists for JPEG, TIFF, PNG, and GIF files. To enable this feature, set `AlertBrokenMedia yes` in clamd.conf, or use the `--alert-broken-media` option when using `clamscan`. These options are disabled by default for now. Application developers may enable this scan option by enabling `CL_SCAN_HEURISTIC_BROKEN_MEDIA` for the `heuristic` scan option bit field. Fixed PNG parser logic bugs that caused an excess of parsing errors and fixed a stack exhaustion issue affecting some systems when scanning PNG files. PNG file type detection was disabled via signature database update for 0.103.0 to mitigate effects from these bugs. Fixed an issue where PNG and GIF files no longer work with Target:5 (graphics) signatures if detected as CL_TYPE_PNG/GIF rather than as CL_TYPE_GRAPHICS. Target types now support up to 10 possible file types to make way for additional graphics types in future releases. Scanning JPEG, TIFF, PNG, and GIF files will no longer return "parse" errors when file format validation fails. Instead, the scan will alert with the "Heuristics.Broken.Media" signature prefix and a descriptive suffix to indicate the issue, provided that the "alert broken media" feature is enabled. GIF format validation will no longer fail if the GIF image is missing the trailer byte, as this appears to be a relatively common issue in otherwise functional GIF files. Added a TIFF dynamic configuration (DCONF) option, which was missing. This will allow us to disable TIFF format validation via signature database update in the event that it proves to be problematic. This feature already exists for many other file types. Added CL_TYPE_JPEG and CL_TYPE_TIFF types.
2020-11-04 15:49:43 -08:00
if (len >= map->len - offset + sizeof(len_u16)) {
if (SCAN_HEURISTIC_BROKEN_MEDIA) {
cli_warnmsg("JPEG: Segment data out of file\n");
status = cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.SegmentDataOutOfFile");
} else {
cli_dbgmsg("Segment data out of file\n");
}
GIF, PNG bugfixes; Add AlertBrokenMedia option Added a new scan option to alert on broken media (graphics) file formats. This feature mitigates the risk of malformed media files intended to exploit vulnerabilities in other software. At present media validation exists for JPEG, TIFF, PNG, and GIF files. To enable this feature, set `AlertBrokenMedia yes` in clamd.conf, or use the `--alert-broken-media` option when using `clamscan`. These options are disabled by default for now. Application developers may enable this scan option by enabling `CL_SCAN_HEURISTIC_BROKEN_MEDIA` for the `heuristic` scan option bit field. Fixed PNG parser logic bugs that caused an excess of parsing errors and fixed a stack exhaustion issue affecting some systems when scanning PNG files. PNG file type detection was disabled via signature database update for 0.103.0 to mitigate effects from these bugs. Fixed an issue where PNG and GIF files no longer work with Target:5 (graphics) signatures if detected as CL_TYPE_PNG/GIF rather than as CL_TYPE_GRAPHICS. Target types now support up to 10 possible file types to make way for additional graphics types in future releases. Scanning JPEG, TIFF, PNG, and GIF files will no longer return "parse" errors when file format validation fails. Instead, the scan will alert with the "Heuristics.Broken.Media" signature prefix and a descriptive suffix to indicate the issue, provided that the "alert broken media" feature is enabled. GIF format validation will no longer fail if the GIF image is missing the trailer byte, as this appears to be a relatively common issue in otherwise functional GIF files. Added a TIFF dynamic configuration (DCONF) option, which was missing. This will allow us to disable TIFF format validation via signature database update in the event that it proves to be problematic. This feature already exists for many other file types. Added CL_TYPE_JPEG and CL_TYPE_TIFF types.
2020-11-04 15:49:43 -08:00
goto done;
}
GIF, PNG bugfixes; Add AlertBrokenMedia option Added a new scan option to alert on broken media (graphics) file formats. This feature mitigates the risk of malformed media files intended to exploit vulnerabilities in other software. At present media validation exists for JPEG, TIFF, PNG, and GIF files. To enable this feature, set `AlertBrokenMedia yes` in clamd.conf, or use the `--alert-broken-media` option when using `clamscan`. These options are disabled by default for now. Application developers may enable this scan option by enabling `CL_SCAN_HEURISTIC_BROKEN_MEDIA` for the `heuristic` scan option bit field. Fixed PNG parser logic bugs that caused an excess of parsing errors and fixed a stack exhaustion issue affecting some systems when scanning PNG files. PNG file type detection was disabled via signature database update for 0.103.0 to mitigate effects from these bugs. Fixed an issue where PNG and GIF files no longer work with Target:5 (graphics) signatures if detected as CL_TYPE_PNG/GIF rather than as CL_TYPE_GRAPHICS. Target types now support up to 10 possible file types to make way for additional graphics types in future releases. Scanning JPEG, TIFF, PNG, and GIF files will no longer return "parse" errors when file format validation fails. Instead, the scan will alert with the "Heuristics.Broken.Media" signature prefix and a descriptive suffix to indicate the issue, provided that the "alert broken media" feature is enabled. GIF format validation will no longer fail if the GIF image is missing the trailer byte, as this appears to be a relatively common issue in otherwise functional GIF files. Added a TIFF dynamic configuration (DCONF) option, which was missing. This will allow us to disable TIFF format validation via signature database update in the event that it proves to be problematic. This feature already exists for many other file types. Added CL_TYPE_JPEG and CL_TYPE_TIFF types.
2020-11-04 15:49:43 -08:00
offset += len;
switch (marker) {
case JPEG_MARKER_SEGMENT_APP0:
/*
* JFIF, maybe
*/
if ((fmap_readn(map, buff, offset - len + sizeof(len_u16), strlen("JFIF") + 1) == strlen("JFIF") + 1) &&
(0 == memcmp(buff, "JFIF\0", strlen("JFIF") + 1))) {
/* Found a JFIF marker */
cli_dbgmsg(" JFIF application marker\n");
if (SCAN_HEURISTIC_BROKEN_MEDIA) {
if (found_app && num_JFIF > 0) {
cli_warnmsg("JPEG: Duplicate Application Marker found (JFIF)\n");
cli_warnmsg("JPEG: Already observed JFIF: %d, Exif: %d, SPIFF: %d\n", num_JFIF, num_Exif, num_SPIFF);
status = cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.JFIFdupAppMarker");
goto done;
}
if (!(segment == 1 ||
(segment == 2 && found_comment) ||
(segment == 2 && num_Exif > 0) ||
(segment == 3 && found_comment && num_Exif > 0))) {
/* The JFIF segment is technically required to appear first, though it has been observed
* appearing in segment 2 in functional images when segment 1 is a comment or an Exif segment.
* If segment 1 wasn't a comment or Exif, then the file structure is unusual. */
cli_warnmsg("JPEG: JFIF marker at wrong position, found in segment # %d\n", segment);
cli_warnmsg("JPEG: Already observed JFIF: %d, Exif: %d, SPIFF: %d\n", num_JFIF, num_Exif, num_SPIFF);
status = cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.JFIFmarkerBadPosition");
goto done;
}
if (len < 16) {
cli_warnmsg("JPEG: JFIF header too short\n");
status = cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.JFIFheaderTooShort");
goto done;
}
}
found_app = true;
num_JFIF += 1;
} else {
/* Found something else. Eg could be an Ocad Revision # (eg "Ocad$Rev: 14797 $"), for example.
Whatever it is, we don't really care for now */
cli_dbgmsg(" Unfamiliar use of application marker: 0x%02x\n", marker);
2011-04-05 16:33:38 +02:00
}
break;
case JPEG_MARKER_SEGMENT_APP1:
/*
* Exif, or maybe XMP data
*/
if ((fmap_readn(map, buff, offset - len + sizeof(len_u16), strlen("Exif") + 2) == strlen("Exif") + 2) &&
(0 == memcmp(buff, "Exif\0\0", strlen("Exif") + 2))) {
/* Found an Exif marker */
cli_dbgmsg(" Exif application marker\n");
if (SCAN_HEURISTIC_BROKEN_MEDIA) {
if (found_app && (num_Exif > 0 || num_SPIFF > 0)) {
cli_warnmsg("JPEG: Duplicate Application Marker found (Exif)\n");
cli_warnmsg("JPEG: Already observed JFIF: %d, Exif: %d, SPIFF: %d\n", num_JFIF, num_Exif, num_SPIFF);
status = cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.ExifDupAppMarker");
goto done;
}
if (segment > 3 && !found_comment && num_JFIF > 0) {
/* If Exif was found after segment 3 and previous segments weren't a comment or JFIF, something is unusual. */
cli_warnmsg("JPEG: Exif marker at wrong position\n");
status = cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.ExifHeaderBadPosition");
goto done;
}
if (len < 16) {
cli_warnmsg("JPEG: Exif header too short\n");
status = cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.ExifHeaderTooShort");
goto done;
}
}
found_app = true;
num_Exif += 1;
} else if ((fmap_readn(map, buff, offset - len + sizeof(len_u16), strlen("http://")) == strlen("http://")) &&
(0 == memcmp(buff, "http://", strlen("http://")))) {
cli_dbgmsg(" XMP metadata\n");
found_comment = true;
2011-04-05 16:33:38 +02:00
} else {
cli_dbgmsg(" Unfamiliar use of application marker: 0x%02x\n", marker);
}
break;
case JPEG_MARKER_SEGMENT_APP2:
/*
* ICC Profile
*/
if ((fmap_readn(map, buff, offset - len + sizeof(len_u16), strlen("ICC_PROFILE") + 2) == strlen("ICC_PROFILE") + 2) &&
(0 == memcmp(buff, "ICC_PROFILE\0", strlen("ICC_PROFILE") + 1))) {
/* Found ICC Profile Chunk. Let's print out the chunk #, which follows "ICC_PROFILE\0"... */
uint8_t chunk_no = buff[strlen("ICC_PROFILE") + 1];
cli_dbgmsg(" ICC Profile, chunk # %d\n", chunk_no);
} else {
cli_dbgmsg(" Unfamiliar use of application marker: 0x%02x\n", marker);
2011-04-05 16:33:38 +02:00
}
break;
case JPEG_MARKER_SEGMENT_APP8:
/*
* SPIFF
*/
if ((fmap_readn(map, buff, offset - len + sizeof(len_u16), strlen("SPIFF") + 1) == strlen("SPIFF") + 1) &&
(0 == memcmp(buff, "SPIFF\0", strlen("SPIFF") + 1))) {
/* Found SPIFF application marker */
cli_dbgmsg(" SPIFF application marker\n");
if (SCAN_HEURISTIC_BROKEN_MEDIA) {
if (found_app) {
cli_warnmsg("JPEG: Duplicate Application Marker found (SPIFF)\n");
cli_warnmsg("JPEG: Already observed JFIF: %d, Exif: %d, SPIFF: %d\n", num_JFIF, num_Exif, num_SPIFF);
status = cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.SPIFFdupAppMarker");
goto done;
}
if (segment != 1 && (segment != 2 || !found_comment)) {
cli_warnmsg("JPEG: SPIFF marker at wrong position\n");
status = cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.SPIFFmarkerBadPosition");
goto done;
}
if (len < 16) {
cli_warnmsg("JPEG: SPIFF header too short\n");
status = cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.SPIFFheaderTooShort");
goto done;
}
}
found_app = true;
num_SPIFF += 1;
} else {
cli_dbgmsg(" Unfamiliar use of application marker: 0x%02x\n", marker);
}
break;
case JPEG_MARKER_SEGMENT_APP13:
/*
* Check for Photoshop information
* Example file to test with: 2c5883a964917aa54c8b3e2c70dabf0a7b06ba8c21bcbaf6f1c19501be9d9196
*/
if ((fmap_readn(map, buff, offset - len + sizeof(len_u16), strlen("Photoshop 3.0") + 1) == strlen("Photoshop 3.0") + 1) &&
(0 == memcmp(buff, "Photoshop 3.0\0", strlen("Photoshop 3.0") + 1))) {
/* Found a Photoshop file */
2021-01-23 16:41:41 -08:00
size_t photoshop_data_offset = offset - len + sizeof(len_u16) + strlen("Photoshop 3.0") + 1;
size_t old_offset;
cli_dbgmsg("Found Photoshop segment\n");
do {
old_offset = photoshop_data_offset;
status = jpeg_check_photoshop_8bim(ctx, &photoshop_data_offset);
if (photoshop_data_offset <= old_offset)
break;
} while (status == CL_CLEAN);
if (status == CL_BREAK) {
status = CL_CLEAN;
}
} else {
cli_dbgmsg(" Unfamiliar use of application marker: 0x%02x\n", marker);
}
found_comment = true;
break;
case JPEG_MARKER_SEGMENT_APP14:
/*
* Adobe RGB, probably
*/
if ((fmap_readn(map, buff, offset - len + sizeof(len_u16), strlen("Adobe") + 1) == strlen("Adobe") + 1) &&
(0 == memcmp(buff, "Adobe\0", strlen("Adobe") + 1))) {
cli_dbgmsg(" AdobeRGB application marker\n");
} else {
/* Not Adobe, dunno what this is. */
cli_dbgmsg(" Unfamiliar use of application marker: 0x%02x\n", marker);
}
break;
case JPEG_MARKER_SEGMENT_APP3:
case JPEG_MARKER_SEGMENT_APP4:
case JPEG_MARKER_SEGMENT_APP5:
case JPEG_MARKER_SEGMENT_APP6:
case JPEG_MARKER_SEGMENT_APP7:
case JPEG_MARKER_SEGMENT_APP9:
case JPEG_MARKER_SEGMENT_APP10:
case JPEG_MARKER_SEGMENT_APP11:
case JPEG_MARKER_SEGMENT_APP12:
case JPEG_MARKER_SEGMENT_APP15:
/*
* Unknown
*/
cli_dbgmsg(" Unfamiliar application marker: 0x%02x\n", marker);
2011-04-05 16:33:38 +02:00
break;
case JPEG_MARKER_SEGMENT_S0F0_START_OF_FRAME_BASELINE_DCT:
case JPEG_MARKER_SEGMENT_S0F1_START_OF_FRAME_EXT_SEQ_DCT:
case JPEG_MARKER_SEGMENT_S0F2_START_OF_FRAME_PROG_DCT:
case JPEG_MARKER_SEGMENT_S0F3_START_OF_FRAME_DIFF_SEQ_DCT:
case JPEG_MARKER_SEGMENT_S0F5_START_OF_FRAME_DIFF_SEQ_DCT:
case JPEG_MARKER_SEGMENT_S0F6_START_OF_FRAME_DIFF_PROG_DCT:
case JPEG_MARKER_SEGMENT_S0F7_START_OF_FRAME_DIFF_LOSSLESS_DCT:
case JPEG_MARKER_SEGMENT_S0F9_START_OF_FRAME_DIFF_SEQ_ARITH:
case JPEG_MARKER_SEGMENT_S0F10_START_OF_FRAME_DIFF_PROG_ARITH:
case JPEG_MARKER_SEGMENT_S0F11_START_OF_FRAME_DIFF_LOSSLESS_ARITH:
cli_dbgmsg(" Start of Frame (S0F) %02x\n", (uint8_t)marker);
break;
case JPEG_MARKER_SEGMENT_DHT_DEFINE_HUFFMAN_TABLES:
cli_dbgmsg(" Huffman Tables definitions (DHT)\n");
break;
case JPEG_MARKER_SEGMENT_DQT_DEFINE_QUANTIZATION_TABLES:
cli_dbgmsg(" Quantization Tables definitions (DQT)\n");
break;
case JPEG_MARKER_SEGMENT_DRI_DEFINE_RESTART_INTERVAL:
cli_dbgmsg(" Restart Interval definition (DRI)\n");
break;
case JPEG_MARKER_SEGMENT_JPG7: /* JPG7 */
cli_dbgmsg(" JPG7 segment marker\n");
if (found_app) {
if (SCAN_HEURISTIC_BROKEN_MEDIA) {
cli_warnmsg("JPEG: Application Marker before JPG7\n");
status = cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.AppMarkerBeforeJPG7");
goto done;
}
2011-04-05 16:33:38 +02:00
}
GIF, PNG bugfixes; Add AlertBrokenMedia option Added a new scan option to alert on broken media (graphics) file formats. This feature mitigates the risk of malformed media files intended to exploit vulnerabilities in other software. At present media validation exists for JPEG, TIFF, PNG, and GIF files. To enable this feature, set `AlertBrokenMedia yes` in clamd.conf, or use the `--alert-broken-media` option when using `clamscan`. These options are disabled by default for now. Application developers may enable this scan option by enabling `CL_SCAN_HEURISTIC_BROKEN_MEDIA` for the `heuristic` scan option bit field. Fixed PNG parser logic bugs that caused an excess of parsing errors and fixed a stack exhaustion issue affecting some systems when scanning PNG files. PNG file type detection was disabled via signature database update for 0.103.0 to mitigate effects from these bugs. Fixed an issue where PNG and GIF files no longer work with Target:5 (graphics) signatures if detected as CL_TYPE_PNG/GIF rather than as CL_TYPE_GRAPHICS. Target types now support up to 10 possible file types to make way for additional graphics types in future releases. Scanning JPEG, TIFF, PNG, and GIF files will no longer return "parse" errors when file format validation fails. Instead, the scan will alert with the "Heuristics.Broken.Media" signature prefix and a descriptive suffix to indicate the issue, provided that the "alert broken media" feature is enabled. GIF format validation will no longer fail if the GIF image is missing the trailer byte, as this appears to be a relatively common issue in otherwise functional GIF files. Added a TIFF dynamic configuration (DCONF) option, which was missing. This will allow us to disable TIFF format validation via signature database update in the event that it proves to be problematic. This feature already exists for many other file types. Added CL_TYPE_JPEG and CL_TYPE_TIFF types.
2020-11-04 15:49:43 -08:00
goto done;
2011-04-05 16:33:38 +02:00
case JPEG_MARKER_SEGMENT_SOS_START_OF_SCAN: /* SOS */
cli_dbgmsg(" Start of Scan (SOS) segment marker\n");
if (!found_app) {
cli_dbgmsg(" Found the Start-of-Scan segment without identifying the JPEG application type.\n");
2011-04-05 16:33:38 +02:00
}
/* What follows would be scan data (compressed image data),
* parsing is not presently required for validation purposes
* so we'll just call it quits. */
GIF, PNG bugfixes; Add AlertBrokenMedia option Added a new scan option to alert on broken media (graphics) file formats. This feature mitigates the risk of malformed media files intended to exploit vulnerabilities in other software. At present media validation exists for JPEG, TIFF, PNG, and GIF files. To enable this feature, set `AlertBrokenMedia yes` in clamd.conf, or use the `--alert-broken-media` option when using `clamscan`. These options are disabled by default for now. Application developers may enable this scan option by enabling `CL_SCAN_HEURISTIC_BROKEN_MEDIA` for the `heuristic` scan option bit field. Fixed PNG parser logic bugs that caused an excess of parsing errors and fixed a stack exhaustion issue affecting some systems when scanning PNG files. PNG file type detection was disabled via signature database update for 0.103.0 to mitigate effects from these bugs. Fixed an issue where PNG and GIF files no longer work with Target:5 (graphics) signatures if detected as CL_TYPE_PNG/GIF rather than as CL_TYPE_GRAPHICS. Target types now support up to 10 possible file types to make way for additional graphics types in future releases. Scanning JPEG, TIFF, PNG, and GIF files will no longer return "parse" errors when file format validation fails. Instead, the scan will alert with the "Heuristics.Broken.Media" signature prefix and a descriptive suffix to indicate the issue, provided that the "alert broken media" feature is enabled. GIF format validation will no longer fail if the GIF image is missing the trailer byte, as this appears to be a relatively common issue in otherwise functional GIF files. Added a TIFF dynamic configuration (DCONF) option, which was missing. This will allow us to disable TIFF format validation via signature database update in the event that it proves to be problematic. This feature already exists for many other file types. Added CL_TYPE_JPEG and CL_TYPE_TIFF types.
2020-11-04 15:49:43 -08:00
goto done;
2011-04-05 16:33:38 +02:00
case JPEG_MARKER_SEGMENT_EOI_END_OF_IMAGE: /* EOI (End of Image) */
cli_dbgmsg(" End of Image (EOI) segment marker\n");
/*
* We shouldn't reach this marker because we exit out when we hit the Start of Scan marker.
*/
if (SCAN_HEURISTIC_BROKEN_MEDIA) {
cli_warnmsg("JPEG: No image in jpeg\n");
status = cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.NoImages");
}
GIF, PNG bugfixes; Add AlertBrokenMedia option Added a new scan option to alert on broken media (graphics) file formats. This feature mitigates the risk of malformed media files intended to exploit vulnerabilities in other software. At present media validation exists for JPEG, TIFF, PNG, and GIF files. To enable this feature, set `AlertBrokenMedia yes` in clamd.conf, or use the `--alert-broken-media` option when using `clamscan`. These options are disabled by default for now. Application developers may enable this scan option by enabling `CL_SCAN_HEURISTIC_BROKEN_MEDIA` for the `heuristic` scan option bit field. Fixed PNG parser logic bugs that caused an excess of parsing errors and fixed a stack exhaustion issue affecting some systems when scanning PNG files. PNG file type detection was disabled via signature database update for 0.103.0 to mitigate effects from these bugs. Fixed an issue where PNG and GIF files no longer work with Target:5 (graphics) signatures if detected as CL_TYPE_PNG/GIF rather than as CL_TYPE_GRAPHICS. Target types now support up to 10 possible file types to make way for additional graphics types in future releases. Scanning JPEG, TIFF, PNG, and GIF files will no longer return "parse" errors when file format validation fails. Instead, the scan will alert with the "Heuristics.Broken.Media" signature prefix and a descriptive suffix to indicate the issue, provided that the "alert broken media" feature is enabled. GIF format validation will no longer fail if the GIF image is missing the trailer byte, as this appears to be a relatively common issue in otherwise functional GIF files. Added a TIFF dynamic configuration (DCONF) option, which was missing. This will allow us to disable TIFF format validation via signature database update in the event that it proves to be problematic. This feature already exists for many other file types. Added CL_TYPE_JPEG and CL_TYPE_TIFF types.
2020-11-04 15:49:43 -08:00
goto done;
2011-04-05 16:33:38 +02:00
case JPEG_MARKER_SEGMENT_COM_COMMENT: /* COM (comment) */
cli_dbgmsg(" Comment (COM) segment marker\n");
found_comment = true;
2011-04-05 16:33:38 +02:00
break;
case JPEG_MARKER_SEGMENT_DTI: /* DTI */
cli_dbgmsg(" DTI segment marker\n");
2011-04-05 16:33:38 +02:00
break;
case JPEG_MARKER_SEGMENT_DTT: /* DTT */
cli_dbgmsg(" DTT segment marker\n");
if (SCAN_HEURISTIC_BROKEN_MEDIA) {
if (prev_segment != JPEG_MARKER_SEGMENT_DTI) {
cli_warnmsg("JPEG: No DTI segment before DTT\n");
status = cli_append_potentially_unwanted(ctx, "Heuristics.Broken.Media.JPEG.DTTMissingDTISegment");
goto done;
}
}
break;
2011-04-05 16:33:38 +02:00
default:
/* Some unknown marker we don't presently handle, don't worry about it. */
2011-04-05 16:33:38 +02:00
break;
}
prev_segment = marker;
2011-04-05 16:33:38 +02:00
}
GIF, PNG bugfixes; Add AlertBrokenMedia option Added a new scan option to alert on broken media (graphics) file formats. This feature mitigates the risk of malformed media files intended to exploit vulnerabilities in other software. At present media validation exists for JPEG, TIFF, PNG, and GIF files. To enable this feature, set `AlertBrokenMedia yes` in clamd.conf, or use the `--alert-broken-media` option when using `clamscan`. These options are disabled by default for now. Application developers may enable this scan option by enabling `CL_SCAN_HEURISTIC_BROKEN_MEDIA` for the `heuristic` scan option bit field. Fixed PNG parser logic bugs that caused an excess of parsing errors and fixed a stack exhaustion issue affecting some systems when scanning PNG files. PNG file type detection was disabled via signature database update for 0.103.0 to mitigate effects from these bugs. Fixed an issue where PNG and GIF files no longer work with Target:5 (graphics) signatures if detected as CL_TYPE_PNG/GIF rather than as CL_TYPE_GRAPHICS. Target types now support up to 10 possible file types to make way for additional graphics types in future releases. Scanning JPEG, TIFF, PNG, and GIF files will no longer return "parse" errors when file format validation fails. Instead, the scan will alert with the "Heuristics.Broken.Media" signature prefix and a descriptive suffix to indicate the issue, provided that the "alert broken media" feature is enabled. GIF format validation will no longer fail if the GIF image is missing the trailer byte, as this appears to be a relatively common issue in otherwise functional GIF files. Added a TIFF dynamic configuration (DCONF) option, which was missing. This will allow us to disable TIFF format validation via signature database update in the event that it proves to be problematic. This feature already exists for many other file types. Added CL_TYPE_JPEG and CL_TYPE_TIFF types.
2020-11-04 15:49:43 -08:00
done:
return status;
2011-04-05 16:33:38 +02:00
}