mirror of
https://github.com/Cisco-Talos/clamav.git
synced 2025-10-19 10:23:17 +00:00

A way is needed to record scanned file names for two purposes: 1. File names (and extensions) must be stored in the json metadata properties recorded when using the --gen-json clamscan option. Future work may use this to compare file extensions with detected file types. 2. File names are useful when interpretting tmp directory output when using the --leave-temps option. This commit enables file name retention for later use by storing file names in the fmap header structure, if a file name exists. To store the names in fmaps, an optional name argument has been added to any internal scan API's that create fmaps and every call to these APIs has been modified to pass a file name or NULL if a file name is not required. The zip and gpt parsers required some modification to record file names. The NSIS and XAR parsers fail to collect file names at all and will require future work to support file name extraction. Also: - Added recursive extraction to the tmp directory when the --leave-temps option is enabled. When not enabled, the tmp directory structure remains flat so as to prevent the likelihood of exceeding MAX_PATH. The current tmp directory is stored in the scan context. - Made the cli_scanfile() internal API non-static and added it to scanners.h so it would be accessible outside of scanners.c in order to remove code duplication within libmspack.c. - Added function comments to scanners.h and matcher.h - Converted a TDB-type macros and LSIG-type macros to enums for improved type safey. - Converted more return status variables from `int` to `cl_error_t` for improved type safety, and corrected ooxml file typing functions so they use `cli_file_t` exclusively rather than mixing types with `cl_error_t`. - Restructured the magic_scandesc() function to use goto's for error handling and removed the early_ret_from_magicscan() macro and magic_scandesc_cleanup() function. This makes the code easier to read and made it easier to add the recursive tmp directory cleanup to magic_scandesc(). - Corrected zip, egg, rar filename extraction issues. - Removed use of extra sub-directory layer for zip, egg, and rar file extraction. For Zip, this also involved changing the extracted filenames to be randomly generated rather than using the "zip.###" file name scheme.
204 lines
7 KiB
C
204 lines
7 KiB
C
/*
|
|
* Copyright (C) 2013-2020 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
|
|
* Copyright (C) 2011-2013 Sourcefire, Inc.
|
|
*
|
|
* Authors: Tomasz Kojm <tkojm@clamav.net>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
* MA 02110-1301, USA.
|
|
*/
|
|
|
|
#if HAVE_CONFIG_H
|
|
#include "clamav-config.h"
|
|
#endif
|
|
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <sys/types.h>
|
|
#include <sys/stat.h>
|
|
#include <fcntl.h>
|
|
#include <sys/stat.h>
|
|
#ifdef HAVE_UNISTD_H
|
|
#include <unistd.h>
|
|
#endif
|
|
#include <time.h>
|
|
|
|
#include "jpeg.h"
|
|
#include "clamav.h"
|
|
|
|
#define GETBYTE(v) \
|
|
if (fmap_readn(map, &v, offset, sizeof(v)) == sizeof(v)) { \
|
|
offset += sizeof(v); \
|
|
} else { \
|
|
cli_errmsg("cli_parsejpeg: Can't read file (corrupted?)\n"); \
|
|
return CL_EPARSE; \
|
|
}
|
|
|
|
cl_error_t cli_parsejpeg(cli_ctx *ctx)
|
|
{
|
|
fmap_t *map = *ctx->fmap;
|
|
unsigned char marker, prev_marker, prev_segment = 0, v1, v2, buff[8];
|
|
unsigned int offset = 0, i, len, comment = 0, segment = 0, app = 0;
|
|
|
|
cli_dbgmsg("in cli_parsejpeg()\n");
|
|
|
|
if (fmap_readn(map, buff, offset, 4) != 4)
|
|
return CL_SUCCESS; /* Ignore */
|
|
|
|
if (!memcmp(buff, "\xff\xd8\xff", 3))
|
|
offset = 2;
|
|
else if (!memcmp(buff, "\xff\xd9\xff\xd8", 4))
|
|
offset = 4;
|
|
else
|
|
return CL_SUCCESS; /* Not a JPEG file */
|
|
|
|
while (1) {
|
|
segment++;
|
|
prev_marker = 0;
|
|
for (i = 0; offset < map->len && i < 16; i++) {
|
|
GETBYTE(marker);
|
|
if (prev_marker == 0xff && marker != 0xff)
|
|
break;
|
|
prev_marker = marker;
|
|
}
|
|
if (i == 16) {
|
|
cli_warnmsg("cli_parsejpeg: Spurious bytes before segment %u\n", segment);
|
|
return CL_EPARSE;
|
|
}
|
|
if (offset == map->len) {
|
|
cli_warnmsg("cli_parsejpeg: Error looking for marker\n");
|
|
return CL_EPARSE;
|
|
}
|
|
GETBYTE(v1);
|
|
GETBYTE(v2);
|
|
len = (unsigned int)(v1 << 8) | v2;
|
|
cli_dbgmsg("JPEG: Marker %02x, length %u\n", marker, len);
|
|
if (len < 2) {
|
|
cli_warnmsg("cli_parsejpeg: Invalid segment size\n");
|
|
return CL_EPARSE;
|
|
}
|
|
if (len >= map->len - offset + 2) {
|
|
cli_warnmsg("cli_parsejpeg: Segment data out of file\n");
|
|
return CL_EPARSE;
|
|
}
|
|
offset += len - 2;
|
|
|
|
switch (marker) {
|
|
case 0xe0: /* JFIF */
|
|
if (app) {
|
|
cli_warnmsg("cli_parsejpeg: Duplicate Application Marker\n");
|
|
return CL_EPARSE;
|
|
}
|
|
if (segment != 1 && (segment != 2 || !comment)) {
|
|
cli_warnmsg("cli_parsejpeg: JFIF marker at wrong position\n");
|
|
return CL_EPARSE;
|
|
}
|
|
if (fmap_readn(map, buff, offset - len + 2, 5) != 5 || memcmp(buff, "JFIF\0", 5)) {
|
|
cli_warnmsg("cli_parsejpeg: No JFIF marker\n");
|
|
return CL_EPARSE;
|
|
}
|
|
if (len < 16) {
|
|
cli_warnmsg("cli_parsejpeg: JFIF header too short\n");
|
|
return CL_EPARSE;
|
|
}
|
|
app = 0xe0;
|
|
break;
|
|
|
|
case 0xe1: /* EXIF */
|
|
if (fmap_readn(map, buff, offset - len + 2, 7) != 7) {
|
|
cli_warnmsg("cli_parsejpeg: Can't read Exif header\n");
|
|
return CL_EPARSE;
|
|
}
|
|
if (!memcmp(buff, "Exif\0\0", 6)) {
|
|
if (app && app != 0xe0) {
|
|
cli_warnmsg("cli_parsejpeg: Duplicate Application Marker\n");
|
|
return CL_EPARSE;
|
|
}
|
|
if (segment > 3 && !comment && app != 0xe0) {
|
|
cli_warnmsg("cli_parsejpeg: Exif marker at wrong position\n");
|
|
return CL_EPARSE;
|
|
}
|
|
} else if (!memcmp(buff, "http://", 7)) {
|
|
cli_dbgmsg("JPEG: XMP data in segment %u\n", segment);
|
|
} else {
|
|
cli_warnmsg("cli_parsejpeg: Invalid Exif header\n");
|
|
return CL_EPARSE;
|
|
}
|
|
if (len < 16) {
|
|
cli_warnmsg("cli_parsejpeg: Exif header too short\n");
|
|
return CL_EPARSE;
|
|
}
|
|
app = 0xe1;
|
|
break;
|
|
|
|
case 0xe8: /* SPIFF */
|
|
if (app) {
|
|
cli_warnmsg("cli_parsejpeg: Duplicate Application Marker\n");
|
|
return CL_EPARSE;
|
|
}
|
|
if (segment != 1 && (segment != 2 || !comment)) {
|
|
cli_warnmsg("cli_parsejpeg: SPIFF marker at wrong position\n");
|
|
return CL_EPARSE;
|
|
}
|
|
if (fmap_readn(map, buff, offset - len + 2, 6) != 6 || memcmp(buff, "SPIFF\0", 6)) {
|
|
cli_warnmsg("cli_parsejpeg: No SPIFF marker\n");
|
|
return CL_EPARSE;
|
|
}
|
|
if (len < 16) {
|
|
cli_warnmsg("cli_parsejpeg: SPIFF header too short\n");
|
|
return CL_EPARSE;
|
|
}
|
|
app = 0xe8;
|
|
break;
|
|
|
|
case 0xf7: /* JPG7 */
|
|
if (app) {
|
|
cli_warnmsg("cli_parsejpeg: Application Marker before JPG7\n");
|
|
return CL_EPARSE;
|
|
}
|
|
return CL_SUCCESS;
|
|
|
|
case 0xda: /* SOS */
|
|
if (!app) {
|
|
cli_warnmsg("cli_parsejpeg: Invalid file structure\n");
|
|
return CL_EPARSE;
|
|
}
|
|
return CL_SUCCESS;
|
|
|
|
case 0xd9: /* EOI */
|
|
cli_warnmsg("cli_parsejpeg: No image in jpeg\n");
|
|
return CL_EPARSE;
|
|
|
|
case 0xfe: /* COM */
|
|
comment = 1;
|
|
break;
|
|
|
|
case 0xed: /* IPTC */
|
|
comment = 1;
|
|
break;
|
|
|
|
case 0xf2: /* DTT */
|
|
if (prev_segment != 0xf1) {
|
|
cli_warnmsg("cli_parsejpeg: No DTI segment before DTT\n");
|
|
return CL_EPARSE;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
prev_segment = marker;
|
|
}
|
|
return CL_SUCCESS;
|
|
}
|