clamav/libclamav/filetypes.h
Val S. 0cf59435dc
Increase max embedded objects limit from 10 -> 16
By limiting the embedded file recognition in embedded files, we detect
fewer embedded files overall.

For example, imagine a PE with a structure of embedded files like so:

outer pe:
 emb. file #1: valid pe #1
 emb. file #2: valid pe #2
 emb. file #3: valid pe #3
 emb. file #4: false positive for pe
 emb. file #5: false positive for pe
 emb. file #6: false positive for pe
 emb. file #7: false positive for pe
 emb. file #8: false positive for pe
 emb. file #9: false positive for pe
 emb. file #10: false positive for pe
 emb. file #10: valid pe #4

With an embedded objects limit of 10, we won't extract that 4th valid PE
file.

However, previous we allowed detection of embedded files within embedded
files, so ClamAV mistook the above structure for something like this:

outer pe:
 emb. file #1: valid pe #1
   emb. file #1: valid pe #2
     emb. file #1: valid pe #3
       emb. file #1: false positive for pe
       emb. file #2: false positive for pe
       emb. file #3: false positive for pe
       emb. file #4: false positive for pe
       emb. file #5: false positive for pe
       emb. file #6: false positive for pe
       emb. file #7: false positive for pe
       emb. file #8: valid pe #4

As you can see, this is able to find and scan that 4th PE file without
exceeding an embedded object limit of 10.

The old way of detecting embedded files within embedded files has other
drawbacks and is obviously inaccurate in terms of the actual file
structure. But it did have that going for it.

Anyways, to improve detection, this PR bumps the embedded objects limit
to 16. I think that's okay since we've added header checks for several
types like PE's, and have also removed the need to drop embedded PE
files to a temp file for each scan.

CLAM-2897
2025-10-10 21:59:43 -04:00

181 lines
4.7 KiB
C

/*
* Copyright (C) 2013-2025 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
* Copyright (C) 2007-2013 Sourcefire, Inc.
*
* Authors: Tomasz Kojm
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef __FILETYPES_H
#define __FILETYPES_H
#include <sys/types.h>
#include "clamav.h"
#include "other_types.h"
#define CL_FILE_MBUFF_SIZE 1024
#define CL_PART_MBUFF_SIZE 1028
/* MAGIC_BUFFER_SIZE must be the bigger of the two sizes above */
#define MAGIC_BUFFER_SIZE 1028
#define CL_TYPENO 500
#define MAX_EMBEDDED_OBJ 16
typedef enum cli_file {
CL_TYPE_ANY = 0,
CL_TYPE_TEXT_ASCII = CL_TYPENO, /* X3.4, ISO-8859, non-ISO ext. ASCII */
CL_TYPE_TEXT_UTF8,
CL_TYPE_TEXT_UTF16LE,
CL_TYPE_TEXT_UTF16BE,
CL_TYPE_BINARY_DATA,
/* Please do not add any new types above this line */
CL_TYPE_ERROR,
CL_TYPE_MSEXE,
CL_TYPE_ELF,
CL_TYPE_MACHO,
CL_TYPE_MACHO_UNIBIN,
CL_TYPE_POSIX_TAR,
CL_TYPE_OLD_TAR,
CL_TYPE_CPIO_OLD,
CL_TYPE_CPIO_ODC,
CL_TYPE_CPIO_NEWC,
CL_TYPE_CPIO_CRC,
CL_TYPE_GZ,
CL_TYPE_ZIP,
CL_TYPE_BZ,
CL_TYPE_RAR,
CL_TYPE_ARJ,
CL_TYPE_MSSZDD,
CL_TYPE_MSOLE2,
CL_TYPE_MSCAB,
CL_TYPE_MSCHM,
CL_TYPE_SIS,
CL_TYPE_SCRENC,
CL_TYPE_GRAPHICS,
CL_TYPE_GIF,
CL_TYPE_PNG,
CL_TYPE_JPEG,
CL_TYPE_TIFF,
CL_TYPE_RIFF,
CL_TYPE_BINHEX,
CL_TYPE_TNEF,
CL_TYPE_CRYPTFF,
CL_TYPE_PDF,
CL_TYPE_UUENCODED,
CL_TYPE_SCRIPT,
CL_TYPE_HTML_UTF16,
CL_TYPE_RTF,
CL_TYPE_7Z,
CL_TYPE_SWF,
CL_TYPE_JAVA,
CL_TYPE_XAR,
CL_TYPE_XZ,
CL_TYPE_OOXML_WORD,
CL_TYPE_OOXML_PPT,
CL_TYPE_OOXML_XL,
CL_TYPE_INTERNAL,
CL_TYPE_HWP3,
CL_TYPE_OOXML_HWP,
CL_TYPE_PS,
CL_TYPE_EGG,
CL_TYPE_ONENOTE,
CL_TYPE_PYTHON_COMPILED,
CL_TYPE_LHA_LZH,
CL_TYPE_AI_MODEL,
/* Section for partition types */
CL_TYPE_PART_ANY, /* unknown partition type */
CL_TYPE_PART_HFSPLUS,
/* bigger numbers have higher priority (in o-t-f detection) */
CL_TYPE_MBR,
CL_TYPE_HTML, /* on the fly */
CL_TYPE_MAIL, /* magic + on the fly */
CL_TYPE_SFX, /* foo SFX marker */
CL_TYPE_ZIPSFX, /* on the fly */
CL_TYPE_RARSFX, /* on the fly */
CL_TYPE_7ZSFX,
CL_TYPE_CABSFX,
CL_TYPE_ARJSFX,
CL_TYPE_EGGSFX,
CL_TYPE_NULSFT, /* on the fly */
CL_TYPE_AUTOIT,
CL_TYPE_ISHIELD_MSI,
CL_TYPE_ISO9660,
CL_TYPE_DMG,
CL_TYPE_GPT,
CL_TYPE_APM,
CL_TYPE_XDP,
CL_TYPE_XML_WORD,
CL_TYPE_XML_XL,
CL_TYPE_XML_HWP,
CL_TYPE_HWPOLE2,
CL_TYPE_MHTML,
CL_TYPE_LNK,
CL_TYPE_UDF,
CL_TYPE_ALZ,
CL_TYPE_OTHER, /* on-the-fly, used for target 14 (OTHER) */
CL_TYPE_IGNORED /* please don't add anything below */
} cli_file_t;
struct cli_ftype {
cli_file_t type;
uint32_t offset;
unsigned char *magic;
char *tname;
struct cli_ftype *next;
uint16_t length;
};
struct cli_matched_type {
struct cli_matched_type *next;
off_t offset;
cli_file_t type;
unsigned short cnt;
};
/**
* @brief Convert a file type name to a file type code.
*
* @param name The name of the file type. E.g. "CL_TYPE_PE", "CL_TYPE_ELF", etc.
* @return cli_file_t
*/
cli_file_t cli_ftcode(const char *name);
/**
* @brief Convert a human-friendly file type name to a file type code.
*
* @param name The human-friendly name of the file type. E.g. "pe", "ZIP", "CL_TYPE_ELF", etc.
* @return cli_file_t
*/
cli_file_t cli_ftcode_human_friendly(const char *name);
/**
* @brief Convert a file type code to a file type name.
*
* @param code The file type code. E.g. CL_TYPE_PE, CL_TYPE_ELF, etc.
* @return const char* A ame of the file type. E.g. "CL_TYPE_PE", "CL_TYPE_ELF", etc.
*/
const char *cli_ftname(cli_file_t code);
void cli_ftfree(const struct cl_engine *engine);
cli_file_t cli_compare_ftm_file(const unsigned char *buf, size_t buflen, const struct cl_engine *engine);
cli_file_t cli_compare_ftm_partition(const unsigned char *buf, size_t buflen, const struct cl_engine *engine);
cli_file_t cli_determine_fmap_type(cli_ctx_t ctx, cli_file_t basetype);
int cli_addtypesigs(struct cl_engine *engine);
#endif