mirror of
https://github.com/Cisco-Talos/clamav.git
synced 2025-10-19 10:23:17 +00:00

Some checks failed
clang-format / check-16 (push) Has been cancelled
CMake Build / build-windows (push) Has been cancelled
CMake Build / build-macos (push) Has been cancelled
CodeQL / Analyze (push) Has been cancelled
CMake Build / build-ubuntu (push) Has been cancelled
clang-format / check (push) Has been cancelled
clang-format / check-1 (push) Has been cancelled
clang-format / check-2 (push) Has been cancelled
clang-format / check-3 (push) Has been cancelled
clang-format / check-4 (push) Has been cancelled
clang-format / check-5 (push) Has been cancelled
clang-format / check-6 (push) Has been cancelled
clang-format / check-7 (push) Has been cancelled
clang-format / check-8 (push) Has been cancelled
clang-format / check-9 (push) Has been cancelled
clang-format / check-10 (push) Has been cancelled
clang-format / check-11 (push) Has been cancelled
clang-format / check-12 (push) Has been cancelled
clang-format / check-13 (push) Has been cancelled
clang-format / check-14 (push) Has been cancelled
clang-format / check-15 (push) Has been cancelled
CodeQL / Analyze-1 (push) Has been cancelled
Fix the number of NULL arguments in `scanmem.c` call to `cl_scandesc_ex()`.
6674 lines
224 KiB
C
6674 lines
224 KiB
C
/*
|
|
* Copyright (C) 2013-2025 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
|
|
* Copyright (C) 2007-2013 Sourcefire, Inc.
|
|
*
|
|
* Authors: Tomasz Kojm
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
* MA 02110-1301, USA.
|
|
*/
|
|
|
|
#if HAVE_CONFIG_H
|
|
#include "clamav-config.h"
|
|
#endif
|
|
|
|
#ifndef _WIN32
|
|
#include <sys/time.h>
|
|
#endif
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include <libgen.h>
|
|
#include <errno.h>
|
|
#include <sys/types.h>
|
|
#include <sys/stat.h>
|
|
#include <stdbool.h>
|
|
#ifdef HAVE_UNISTD_H
|
|
#include <unistd.h>
|
|
#endif
|
|
#ifdef HAVE_SYS_PARAM_H
|
|
#include <sys/param.h>
|
|
#endif
|
|
#include <fcntl.h>
|
|
#include <dirent.h>
|
|
#ifdef HAVE_SYS_TIMES_H
|
|
#include <sys/times.h>
|
|
#endif
|
|
|
|
#define DCONF_ARCH ctx->dconf->archive
|
|
#define DCONF_DOC ctx->dconf->doc
|
|
#define DCONF_MAIL ctx->dconf->mail
|
|
#define DCONF_OTHER ctx->dconf->other
|
|
|
|
#include <zlib.h>
|
|
|
|
#include "clamav_rust.h"
|
|
#include "clamav.h"
|
|
#include "others.h"
|
|
#include "dconf.h"
|
|
#include "scanners.h"
|
|
#include "matcher-ac.h"
|
|
#include "matcher-bm.h"
|
|
#include "matcher.h"
|
|
#include "ole2_extract.h"
|
|
#include "vba_extract.h"
|
|
#include "xlm_extract.h"
|
|
#include "msexpand.h"
|
|
#include "mbox.h"
|
|
#include "libmspack.h"
|
|
#include "pe.h"
|
|
#include "elf.h"
|
|
#include "filetypes.h"
|
|
#include "htmlnorm.h"
|
|
#include "untar.h"
|
|
#include "special.h"
|
|
#include "binhex.h"
|
|
/* #include "uuencode.h" */
|
|
#include "tnef.h"
|
|
#include "sis.h"
|
|
#include "pdf.h"
|
|
#include "str.h"
|
|
#include "entconv.h"
|
|
#include "rtf.h"
|
|
#include "unarj.h"
|
|
#include "nsis/nulsft.h"
|
|
#include "autoit.h"
|
|
#include "textnorm.h"
|
|
#include "unzip.h"
|
|
#include "dlp.h"
|
|
#include "default.h"
|
|
#include "cpio.h"
|
|
#include "macho.h"
|
|
#include "ishield.h"
|
|
#include "7z_iface.h"
|
|
#include "fmap.h"
|
|
#include "cache.h"
|
|
#include "events.h"
|
|
#include "swf.h"
|
|
#include "jpeg.h"
|
|
#include "gif.h"
|
|
#include "png.h"
|
|
#include "iso9660.h"
|
|
#include "udf.h"
|
|
#include "dmg.h"
|
|
#include "xar.h"
|
|
#include "hfsplus.h"
|
|
#include "xz_iface.h"
|
|
#include "mbr.h"
|
|
#include "gpt.h"
|
|
#include "apm.h"
|
|
#include "ooxml.h"
|
|
#include "xdp.h"
|
|
#include "json_api.h"
|
|
#include "msxml.h"
|
|
#include "tiff.h"
|
|
#include "hwp.h"
|
|
#include "msdoc.h"
|
|
#include "execs.h"
|
|
#include "egg.h"
|
|
|
|
// libclamunrar_iface
|
|
#include "unrar_iface.h"
|
|
|
|
#include <bzlib.h>
|
|
|
|
#include <fcntl.h>
|
|
#include <string.h>
|
|
|
|
cl_error_t cli_magic_scan_dir(const char *dir, cli_ctx *ctx, uint32_t attributes)
|
|
{
|
|
cl_error_t status = CL_SUCCESS;
|
|
DIR *dd = NULL;
|
|
struct dirent *dent;
|
|
STATBUF statbuf;
|
|
char *fname = NULL;
|
|
|
|
if ((dd = opendir(dir)) != NULL) {
|
|
while ((dent = readdir(dd))) {
|
|
if (dent->d_ino) {
|
|
if (strcmp(dent->d_name, ".") && strcmp(dent->d_name, "..")) {
|
|
/* build the full name */
|
|
fname = malloc(strlen(dir) + strlen(dent->d_name) + 2);
|
|
if (!fname) {
|
|
cli_dbgmsg("cli_magic_scan_dir: Unable to allocate memory for filename\n");
|
|
status = CL_EMEM;
|
|
goto done;
|
|
}
|
|
|
|
sprintf(fname, "%s" PATHSEP "%s", dir, dent->d_name);
|
|
|
|
/* stat the file */
|
|
if (LSTAT(fname, &statbuf) != -1) {
|
|
if (S_ISDIR(statbuf.st_mode) && !S_ISLNK(statbuf.st_mode)) {
|
|
status = cli_magic_scan_dir(fname, ctx, attributes);
|
|
if (CL_SUCCESS != status) {
|
|
goto done;
|
|
}
|
|
} else {
|
|
if (S_ISREG(statbuf.st_mode)) {
|
|
status = cli_magic_scan_file(fname, ctx, dent->d_name, attributes);
|
|
if (CL_SUCCESS != status) {
|
|
goto done;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
free(fname);
|
|
fname = NULL;
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
cli_dbgmsg("cli_magic_scan_dir: Can't open directory %s.\n", dir);
|
|
status = CL_EOPEN;
|
|
goto done;
|
|
}
|
|
|
|
done:
|
|
if (NULL != dd) {
|
|
closedir(dd);
|
|
}
|
|
if (NULL != fname) {
|
|
free(fname);
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
/**
|
|
* @brief Scan the metadata using cli_matchmeta()
|
|
*
|
|
* @param metadata unrar metadata structure
|
|
* @param ctx scanning context structure
|
|
* @param files
|
|
* @return cl_error_t Returns CL_SUCCESS if nothing found, CL_VIRUS if something found, CL_EUNPACK if encrypted.
|
|
*/
|
|
static cl_error_t cli_unrar_scanmetadata(unrar_metadata_t *metadata, cli_ctx *ctx, unsigned int files)
|
|
{
|
|
cl_error_t status = CL_SUCCESS;
|
|
|
|
cli_dbgmsg("RAR: %s, crc32: 0x%x, encrypted: %u, compressed: %u, normal: %u, method: %u, ratio: %u\n",
|
|
metadata->filename, metadata->crc, metadata->encrypted, (unsigned int)metadata->pack_size,
|
|
(unsigned int)metadata->unpack_size, metadata->method,
|
|
metadata->pack_size ? (unsigned int)(metadata->unpack_size / metadata->pack_size) : 0);
|
|
|
|
if (CL_VIRUS == cli_matchmeta(ctx, metadata->filename, metadata->pack_size, metadata->unpack_size, metadata->encrypted, files, metadata->crc)) {
|
|
status = CL_VIRUS;
|
|
} else if (SCAN_HEURISTIC_ENCRYPTED_ARCHIVE && metadata->encrypted) {
|
|
cli_dbgmsg("RAR: Encrypted files found in archive.\n");
|
|
status = CL_EUNPACK;
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
static cl_error_t cli_scanrar_file(const char *filepath, int desc, cli_ctx *ctx)
|
|
{
|
|
cl_error_t status = CL_EPARSE;
|
|
cl_unrar_error_t unrar_ret = UNRAR_ERR;
|
|
|
|
unsigned int file_count = 0;
|
|
|
|
uint32_t nEncryptedFilesFound = 0;
|
|
|
|
void *hArchive = NULL;
|
|
|
|
char *comment = NULL;
|
|
uint32_t comment_size = 0;
|
|
|
|
unrar_metadata_t metadata;
|
|
char *filename_base = NULL;
|
|
char *extract_fullpath = NULL;
|
|
char *comment_fullpath = NULL;
|
|
|
|
UNUSEDPARAM(desc);
|
|
|
|
if (filepath == NULL || ctx == NULL) {
|
|
cli_dbgmsg("RAR: Invalid arguments!\n");
|
|
return CL_EARG;
|
|
}
|
|
|
|
cli_dbgmsg("in scanrar()\n");
|
|
|
|
/* Zero out the metadata struct before we read the header */
|
|
memset(&metadata, 0, sizeof(unrar_metadata_t));
|
|
|
|
/*
|
|
* Open the archive.
|
|
*/
|
|
if (UNRAR_OK != (unrar_ret = cli_unrar_open(filepath, &hArchive, &comment, &comment_size, cli_debug_flag))) {
|
|
if (unrar_ret == UNRAR_ENCRYPTED) {
|
|
cli_dbgmsg("RAR: Encrypted main header\n");
|
|
status = CL_SUCCESS;
|
|
nEncryptedFilesFound += 1;
|
|
goto done;
|
|
}
|
|
if (unrar_ret == UNRAR_EMEM) {
|
|
status = CL_EMEM;
|
|
goto done;
|
|
} else if (unrar_ret == UNRAR_EOPEN) {
|
|
status = CL_EOPEN;
|
|
goto done;
|
|
} else {
|
|
status = CL_EFORMAT;
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
/* If the archive header had a comment, write it to the comment dir. */
|
|
if ((comment != NULL) && (comment_size > 0)) {
|
|
|
|
if (ctx->engine->keeptmp) {
|
|
int comment_fd = -1;
|
|
if (!(comment_fullpath = cli_gentemp_with_prefix(ctx->this_layer_tmpdir, "comments"))) {
|
|
status = CL_EMEM;
|
|
goto done;
|
|
}
|
|
|
|
comment_fd = open(comment_fullpath, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0600);
|
|
if (comment_fd < 0) {
|
|
cli_dbgmsg("RAR: ERROR: Failed to open output file\n");
|
|
} else {
|
|
cli_dbgmsg("RAR: Writing the archive comment to temp file: %s\n", comment_fullpath);
|
|
if (0 == write(comment_fd, comment, comment_size)) {
|
|
cli_dbgmsg("RAR: ERROR: Failed to write to output file\n");
|
|
}
|
|
close(comment_fd);
|
|
}
|
|
}
|
|
|
|
/* Scan the comment */
|
|
status = cli_magic_scan_buff(comment, comment_size, ctx, NULL, LAYER_ATTRIBUTES_NONE);
|
|
if (status != CL_SUCCESS) {
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Read & scan each file header.
|
|
* Extract & scan each file.
|
|
*
|
|
* Skip files if they will exceed max filesize or max scansize.
|
|
* Count the number of encrypted file headers and encrypted files.
|
|
* - Alert if there are encrypted files,
|
|
* if the Heuristic for encrypted archives is enabled,
|
|
* and if we have not detected a signature match.
|
|
*/
|
|
do {
|
|
status = CL_SUCCESS;
|
|
|
|
/* Zero out the metadata struct before we read the header */
|
|
memset(&metadata, 0, sizeof(unrar_metadata_t));
|
|
|
|
/*
|
|
* Get the header information for the next file in the archive.
|
|
*/
|
|
unrar_ret = cli_unrar_peek_file_header(hArchive, &metadata);
|
|
if (unrar_ret != UNRAR_OK) {
|
|
if (unrar_ret == UNRAR_ENCRYPTED) {
|
|
/* Found an encrypted file header, must skip. */
|
|
cli_dbgmsg("RAR: Encrypted file header, unable to reading file metadata and file contents. Skipping file...\n");
|
|
nEncryptedFilesFound += 1;
|
|
|
|
if (UNRAR_OK != cli_unrar_skip_file(hArchive)) {
|
|
/* Failed to skip! Break extraction loop. */
|
|
cli_dbgmsg("RAR: Failed to skip file. RAR archive extraction has failed.\n");
|
|
break;
|
|
}
|
|
} else if (unrar_ret == UNRAR_BREAK) {
|
|
/* No more files. Break extraction loop. */
|
|
cli_dbgmsg("RAR: No more files in archive.\n");
|
|
break;
|
|
} else {
|
|
/* Memory error or some other error reading the header info. */
|
|
cli_dbgmsg("RAR: Error (%u) reading file header!\n", unrar_ret);
|
|
break;
|
|
}
|
|
} else {
|
|
file_count += 1;
|
|
|
|
/*
|
|
* Scan the metadata for the file in question since the content was clean, or we're running in all-match.
|
|
*/
|
|
status = cli_unrar_scanmetadata(&metadata, ctx, file_count);
|
|
if (status == CL_EUNPACK) {
|
|
nEncryptedFilesFound += 1;
|
|
} else if (status != CL_SUCCESS) {
|
|
break;
|
|
}
|
|
|
|
/* Check if we've already exceeded the scan limit */
|
|
if (cli_checklimits("RAR", ctx, 0, 0, 0))
|
|
break;
|
|
|
|
if (metadata.is_dir) {
|
|
/* Entry is a directory. Skip. */
|
|
cli_dbgmsg("RAR: Found directory. Skipping to next file.\n");
|
|
|
|
if (UNRAR_OK != cli_unrar_skip_file(hArchive)) {
|
|
/* Failed to skip! Break extraction loop. */
|
|
cli_dbgmsg("RAR: Failed to skip directory. RAR archive extraction has failed.\n");
|
|
break;
|
|
}
|
|
} else if (cli_checklimits("RAR", ctx, metadata.unpack_size, 0, 0)) {
|
|
/* File size exceeds maxfilesize, must skip extraction.
|
|
* Although we may be able to scan the metadata */
|
|
|
|
cli_dbgmsg("RAR: Next file is too large (%" PRIu64 " bytes); it would exceed max scansize. Skipping to next file.\n", metadata.unpack_size);
|
|
|
|
if (UNRAR_OK != cli_unrar_skip_file(hArchive)) {
|
|
/* Failed to skip! Break extraction loop. */
|
|
cli_dbgmsg("RAR: Failed to skip file. RAR archive extraction has failed.\n");
|
|
break;
|
|
}
|
|
} else if (metadata.encrypted != 0) {
|
|
/* Found an encrypted file, must skip. */
|
|
cli_dbgmsg("RAR: Encrypted file, unable to extract file contents. Skipping file...\n");
|
|
nEncryptedFilesFound += 1;
|
|
|
|
if (UNRAR_OK != cli_unrar_skip_file(hArchive)) {
|
|
/* Failed to skip! Break extraction loop. */
|
|
cli_dbgmsg("RAR: Failed to skip file. RAR archive extraction has failed.\n");
|
|
break;
|
|
}
|
|
} else {
|
|
/*
|
|
* Extract the file...
|
|
*/
|
|
if (0 != metadata.filename[0]) {
|
|
(void)cli_basename(metadata.filename, strlen(metadata.filename), &filename_base, true /* posix_support_backslash_pathsep */);
|
|
}
|
|
|
|
if (!(ctx->engine->keeptmp) ||
|
|
(NULL == filename_base)) {
|
|
extract_fullpath = cli_gentemp(ctx->this_layer_tmpdir);
|
|
} else {
|
|
extract_fullpath = cli_gentemp_with_prefix(ctx->this_layer_tmpdir, filename_base);
|
|
}
|
|
if (NULL == extract_fullpath) {
|
|
cli_dbgmsg("RAR: Memory error allocating filename for extracted file.");
|
|
status = CL_EMEM;
|
|
break;
|
|
}
|
|
cli_dbgmsg("RAR: Extracting file: %s to %s\n", metadata.filename, extract_fullpath);
|
|
|
|
unrar_ret = cli_unrar_extract_file(hArchive, extract_fullpath, NULL);
|
|
if (unrar_ret != UNRAR_OK) {
|
|
/*
|
|
* Some other error extracting the file
|
|
*/
|
|
cli_dbgmsg("RAR: Error extracting file: %s\n", metadata.filename);
|
|
|
|
/* TODO:
|
|
* may need to manually skip the file depending on what, specifically, cli_unrar_extract_file() returned.
|
|
*/
|
|
} else {
|
|
/*
|
|
* File should be extracted...
|
|
* ... make sure we have read permissions to the file.
|
|
*/
|
|
#ifdef _WIN32
|
|
if (0 != _access_s(extract_fullpath, R_OK)) {
|
|
#else
|
|
if (0 != access(extract_fullpath, R_OK)) {
|
|
#endif
|
|
cli_dbgmsg("RAR: Don't have read permissions, attempting to change file permissions to make it readable..\n");
|
|
#ifdef _WIN32
|
|
if (0 != _chmod(extract_fullpath, _S_IREAD)) {
|
|
#else
|
|
if (0 != chmod(extract_fullpath, S_IRUSR | S_IRGRP)) {
|
|
#endif
|
|
cli_dbgmsg("RAR: Failed to change permission bits so the extracted file is readable..\n");
|
|
}
|
|
}
|
|
|
|
/*
|
|
* ... scan the extracted file.
|
|
*/
|
|
cli_dbgmsg("RAR: Extraction complete. Scanning now...\n");
|
|
status = cli_magic_scan_file(extract_fullpath, ctx, filename_base, LAYER_ATTRIBUTES_NONE);
|
|
if (status == CL_EOPEN) {
|
|
cli_dbgmsg("RAR: File not found, Extraction failed!\n");
|
|
|
|
// Don't abort the scan just because one file failed to extract.
|
|
status = CL_SUCCESS;
|
|
} else {
|
|
/* Delete the tempfile if not --leave-temps */
|
|
if (!ctx->engine->keeptmp) {
|
|
if (cli_unlink(extract_fullpath)) {
|
|
cli_dbgmsg("RAR: Failed to unlink the extracted file: %s\n", extract_fullpath);
|
|
}
|
|
}
|
|
|
|
if (status != CL_SUCCESS) {
|
|
// Bail out if "virus" and also if exceeded scan maximums, etc.
|
|
goto done;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Free up that the filepath */
|
|
if (NULL != extract_fullpath) {
|
|
free(extract_fullpath);
|
|
extract_fullpath = NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Free up any malloced metadata...
|
|
*/
|
|
if (NULL != filename_base) {
|
|
free(filename_base);
|
|
filename_base = NULL;
|
|
}
|
|
|
|
} while (status == CL_SUCCESS);
|
|
|
|
if (status == CL_BREAK) {
|
|
status = CL_SUCCESS;
|
|
}
|
|
|
|
done:
|
|
if (NULL != comment) {
|
|
free(comment);
|
|
comment = NULL;
|
|
}
|
|
|
|
if (NULL != comment_fullpath) {
|
|
if (!ctx->engine->keeptmp) {
|
|
cli_rmdirs(comment_fullpath);
|
|
}
|
|
free(comment_fullpath);
|
|
comment_fullpath = NULL;
|
|
}
|
|
|
|
if (NULL != hArchive) {
|
|
cli_unrar_close(hArchive);
|
|
hArchive = NULL;
|
|
}
|
|
|
|
if (NULL != filename_base) {
|
|
free(filename_base);
|
|
filename_base = NULL;
|
|
}
|
|
|
|
if (NULL != extract_fullpath) {
|
|
free(extract_fullpath);
|
|
extract_fullpath = NULL;
|
|
}
|
|
|
|
if ((CL_VIRUS != status) && (nEncryptedFilesFound > 0)) {
|
|
/* If user requests enabled the Heuristic for encrypted archives... */
|
|
if (SCAN_HEURISTIC_ENCRYPTED_ARCHIVE) {
|
|
if (CL_VIRUS == cli_append_potentially_unwanted(ctx, "Heuristics.Encrypted.RAR")) {
|
|
status = CL_VIRUS;
|
|
}
|
|
}
|
|
}
|
|
|
|
cli_dbgmsg("RAR: Exit code: %d\n", status);
|
|
|
|
return status;
|
|
}
|
|
|
|
static cl_error_t cli_scanrar(cli_ctx *ctx)
|
|
{
|
|
cl_error_t status = CL_SUCCESS;
|
|
|
|
const char *filepath = NULL;
|
|
int fd = -1;
|
|
|
|
char *tmpname = NULL;
|
|
int tmpfd = -1;
|
|
|
|
#ifdef _WIN32
|
|
if ((SCAN_UNPRIVILEGED) || (NULL == ctx->fmap->path) || (0 != _access_s(ctx->fmap->path, R_OK))) {
|
|
#else
|
|
if ((SCAN_UNPRIVILEGED) || (NULL == ctx->fmap->path) || (0 != access(ctx->fmap->path, R_OK))) {
|
|
#endif
|
|
/* If map is not file-backed have to dump to file for scanrar. */
|
|
status = fmap_dump_to_file(ctx->fmap, ctx->fmap->path, ctx->this_layer_tmpdir, &tmpname, &tmpfd, 0, SIZE_MAX);
|
|
if (status != CL_SUCCESS) {
|
|
cli_dbgmsg("cli_magic_scan: failed to generate temporary file.\n");
|
|
goto done;
|
|
}
|
|
filepath = tmpname;
|
|
fd = tmpfd;
|
|
} else {
|
|
/* Use the original file and file descriptor. */
|
|
filepath = ctx->fmap->path;
|
|
fd = fmap_fd(ctx->fmap);
|
|
}
|
|
|
|
/* scan file */
|
|
status = cli_scanrar_file(filepath, fd, ctx);
|
|
|
|
if ((NULL == tmpname) && (CL_EOPEN == status)) {
|
|
/*
|
|
* Failed to open the file using the original filename.
|
|
* Try writing the file descriptor to a temp file and try again.
|
|
*/
|
|
status = fmap_dump_to_file(ctx->fmap, ctx->fmap->path, ctx->this_layer_tmpdir, &tmpname, &tmpfd, 0, SIZE_MAX);
|
|
if (status != CL_SUCCESS) {
|
|
cli_dbgmsg("cli_magic_scan: failed to generate temporary file.\n");
|
|
goto done;
|
|
}
|
|
filepath = tmpname;
|
|
fd = tmpfd;
|
|
|
|
/* try to scan again */
|
|
status = cli_scanrar_file(filepath, fd, ctx);
|
|
}
|
|
|
|
done:
|
|
if (tmpfd != -1) {
|
|
/* If dumped tempfile, need to cleanup */
|
|
close(tmpfd);
|
|
if (!ctx->engine->keeptmp) {
|
|
if (cli_unlink(tmpname)) {
|
|
status = CL_EUNLINK;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (tmpname != NULL) {
|
|
free(tmpname);
|
|
}
|
|
return status;
|
|
}
|
|
|
|
/**
|
|
* @brief Scan the metadata using cli_matchmeta()
|
|
*
|
|
* @param metadata egg metadata structure
|
|
* @param ctx scanning context structure
|
|
* @param files number of files
|
|
* @return cl_error_t Returns CL_SUCCESS if nothing found, CL_VIRUS if something found, CL_EUNPACK if encrypted.
|
|
*/
|
|
static cl_error_t cli_egg_scanmetadata(cl_egg_metadata *metadata, cli_ctx *ctx, unsigned int files)
|
|
{
|
|
cl_error_t status = CL_SUCCESS;
|
|
|
|
cli_dbgmsg("EGG: %s, encrypted: %u, compressed: %u, normal: %u, ratio: %u\n",
|
|
metadata->filename, metadata->encrypted, (unsigned int)metadata->pack_size,
|
|
(unsigned int)metadata->unpack_size,
|
|
metadata->pack_size ? (unsigned int)(metadata->unpack_size / metadata->pack_size) : 0);
|
|
|
|
if (CL_VIRUS == cli_matchmeta(ctx, metadata->filename, metadata->pack_size, metadata->unpack_size, metadata->encrypted, files, 0)) {
|
|
status = CL_VIRUS;
|
|
} else if (SCAN_HEURISTIC_ENCRYPTED_ARCHIVE && metadata->encrypted) {
|
|
cli_dbgmsg("EGG: Encrypted files found in archive.\n");
|
|
status = CL_EUNPACK;
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
static cl_error_t cli_scanegg(cli_ctx *ctx)
|
|
{
|
|
cl_error_t status = CL_SUCCESS;
|
|
cl_error_t egg_ret;
|
|
|
|
unsigned int file_count = 0;
|
|
|
|
uint32_t nEncryptedFilesFound = 0;
|
|
|
|
void *hArchive = NULL;
|
|
|
|
char **comments = NULL;
|
|
uint32_t nComments = 0;
|
|
|
|
cl_egg_metadata metadata;
|
|
char *filename_base = NULL;
|
|
char *extract_fullpath = NULL;
|
|
char *comment_fullpath = NULL;
|
|
|
|
char *extract_filename = NULL;
|
|
char *extract_buffer = NULL;
|
|
size_t extract_buffer_len = 0;
|
|
|
|
if (ctx == NULL) {
|
|
cli_dbgmsg("EGG: Invalid arguments!\n");
|
|
return CL_EARG;
|
|
}
|
|
|
|
cli_dbgmsg("in scanegg()\n");
|
|
|
|
/* Zero out the metadata struct before we read the header */
|
|
memset(&metadata, 0, sizeof(cl_egg_metadata));
|
|
|
|
/*
|
|
* Open the archive.
|
|
*/
|
|
if (CL_SUCCESS != (egg_ret = cli_egg_open(ctx->fmap, &hArchive, &comments, &nComments))) {
|
|
if (egg_ret == CL_EUNPACK) {
|
|
cli_dbgmsg("EGG: Encrypted main header\n");
|
|
nEncryptedFilesFound += 1;
|
|
status = CL_SUCCESS;
|
|
goto done;
|
|
}
|
|
if (egg_ret == CL_EMEM) {
|
|
status = CL_EMEM;
|
|
goto done;
|
|
} else {
|
|
status = CL_EFORMAT;
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
/* If the archive header had a comment, write it to the comment dir. */
|
|
if (comments != NULL) {
|
|
uint32_t i;
|
|
for (i = 0; i < nComments; i++) {
|
|
/*
|
|
* Drop the comment to a temp file, if requested
|
|
*/
|
|
if (ctx->engine->keeptmp) {
|
|
int comment_fd = -1;
|
|
size_t prefixLen = strlen("comments_") + 5;
|
|
char *prefix = (char *)malloc(prefixLen + 1);
|
|
|
|
snprintf(prefix, prefixLen, "comments_%u", i);
|
|
prefix[prefixLen] = '\0';
|
|
|
|
if (!(comment_fullpath = cli_gentemp_with_prefix(ctx->this_layer_tmpdir, prefix))) {
|
|
free(prefix);
|
|
status = CL_EMEM;
|
|
goto done;
|
|
}
|
|
free(prefix);
|
|
|
|
comment_fd = open(comment_fullpath, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0600);
|
|
if (comment_fd < 0) {
|
|
cli_dbgmsg("EGG: ERROR: Failed to open output file\n");
|
|
} else {
|
|
cli_dbgmsg("EGG: Writing the archive comment to temp file: %s\n", comment_fullpath);
|
|
if (0 == write(comment_fd, comments[i], nComments)) {
|
|
cli_dbgmsg("EGG: ERROR: Failed to write to output file\n");
|
|
}
|
|
close(comment_fd);
|
|
}
|
|
free(comment_fullpath);
|
|
comment_fullpath = NULL;
|
|
}
|
|
|
|
/*
|
|
* Scan the comment.
|
|
*/
|
|
status = cli_magic_scan_buff(comments[i], strlen(comments[i]), ctx, NULL, LAYER_ATTRIBUTES_NONE);
|
|
if (status != CL_SUCCESS) {
|
|
goto done;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Read & scan each file header.
|
|
* Extract & scan each file.
|
|
*
|
|
* Skip files if they will exceed max filesize or max scansize.
|
|
* Count the number of encrypted file headers and encrypted files.
|
|
* - Alert if there are encrypted files,
|
|
* if the Heuristic for encrypted archives is enabled,
|
|
* and if we have not detected a signature match.
|
|
*/
|
|
do {
|
|
status = CL_SUCCESS;
|
|
|
|
/* Zero out the metadata struct before we read the header */
|
|
memset(&metadata, 0, sizeof(cl_egg_metadata));
|
|
|
|
/*
|
|
* Get the header information for the next file in the archive.
|
|
*/
|
|
egg_ret = cli_egg_peek_file_header(hArchive, &metadata);
|
|
if (egg_ret != CL_SUCCESS) {
|
|
if (egg_ret == CL_EUNPACK) {
|
|
/* Found an encrypted file header, must skip. */
|
|
cli_dbgmsg("EGG: Encrypted file header, unable to reading file metadata and file contents. Skipping file...\n");
|
|
nEncryptedFilesFound += 1;
|
|
|
|
if (CL_SUCCESS != cli_egg_skip_file(hArchive)) {
|
|
/* Failed to skip! Break extraction loop. */
|
|
cli_dbgmsg("EGG: Failed to skip file. EGG archive extraction has failed.\n");
|
|
break;
|
|
}
|
|
} else if (egg_ret == CL_BREAK) {
|
|
/* No more files. Break extraction loop. */
|
|
cli_dbgmsg("EGG: No more files in archive.\n");
|
|
break;
|
|
} else {
|
|
/* Memory error or some other error reading the header info. */
|
|
cli_dbgmsg("EGG: Error (%u) reading file header!\n", egg_ret);
|
|
break;
|
|
}
|
|
} else {
|
|
file_count += 1;
|
|
|
|
/*
|
|
* Scan the metadata for the file in question since the content was clean, or we're running in all-match.
|
|
*/
|
|
status = cli_egg_scanmetadata(&metadata, ctx, file_count);
|
|
if (status == CL_EUNPACK) {
|
|
nEncryptedFilesFound += 1;
|
|
} else if (status != CL_SUCCESS) {
|
|
break;
|
|
}
|
|
|
|
/* Check if we've already exceeded the scan limit */
|
|
if (cli_checklimits("EGG", ctx, 0, 0, 0))
|
|
break;
|
|
|
|
if (metadata.is_dir) {
|
|
/* Entry is a directory. Skip. */
|
|
cli_dbgmsg("EGG: Found directory. Skipping to next file.\n");
|
|
|
|
if (CL_SUCCESS != cli_egg_skip_file(hArchive)) {
|
|
/* Failed to skip! Break extraction loop. */
|
|
cli_dbgmsg("EGG: Failed to skip directory. EGG archive extraction has failed.\n");
|
|
break;
|
|
}
|
|
} else if (cli_checklimits("EGG", ctx, metadata.unpack_size, 0, 0)) {
|
|
/* File size exceeds maxfilesize, must skip extraction.
|
|
* Although we may be able to scan the metadata */
|
|
|
|
cli_dbgmsg("EGG: Next file is too large (%" PRIu64 " bytes); it would exceed max scansize. Skipping to next file.\n", metadata.unpack_size);
|
|
|
|
if (CL_SUCCESS != cli_egg_skip_file(hArchive)) {
|
|
/* Failed to skip! Break extraction loop. */
|
|
cli_dbgmsg("EGG: Failed to skip file. EGG archive extraction has failed.\n");
|
|
break;
|
|
}
|
|
} else if (metadata.encrypted != 0) {
|
|
/* Found an encrypted file, must skip. */
|
|
cli_dbgmsg("EGG: Encrypted file, unable to extract file contents. Skipping file...\n");
|
|
nEncryptedFilesFound += 1;
|
|
|
|
if (CL_SUCCESS != cli_egg_skip_file(hArchive)) {
|
|
/* Failed to skip! Break extraction loop. */
|
|
cli_dbgmsg("EGG: Failed to skip file. EGG archive extraction has failed.\n");
|
|
break;
|
|
}
|
|
} else {
|
|
/*
|
|
* Extract the file...
|
|
*/
|
|
|
|
cli_dbgmsg("EGG: Extracting file: %s\n", metadata.filename);
|
|
|
|
egg_ret = cli_egg_extract_file(hArchive, (const char **)&extract_filename, (const char **)&extract_buffer, &extract_buffer_len);
|
|
if (egg_ret != CL_SUCCESS) {
|
|
/*
|
|
* Some other error extracting the file
|
|
*/
|
|
cli_dbgmsg("EGG: Error extracting file: %s\n", metadata.filename);
|
|
} else if (!extract_buffer || 0 == extract_buffer_len) {
|
|
/*
|
|
* Empty file. Skip.
|
|
*/
|
|
cli_dbgmsg("EGG: Skipping empty file: %s\n", metadata.filename);
|
|
|
|
if (NULL != extract_filename) {
|
|
free(extract_filename);
|
|
extract_filename = NULL;
|
|
}
|
|
if (NULL != extract_buffer) {
|
|
free(extract_buffer);
|
|
extract_buffer = NULL;
|
|
}
|
|
} else {
|
|
/*
|
|
* Drop to a temp file, if requested.
|
|
*/
|
|
if (NULL != metadata.filename) {
|
|
(void)cli_basename(metadata.filename, strlen(metadata.filename), &filename_base, true /* posix_support_backslash_pathsep */);
|
|
}
|
|
|
|
if (ctx->engine->keeptmp) {
|
|
int extracted_fd = -1;
|
|
if (NULL == filename_base) {
|
|
extract_fullpath = cli_gentemp(ctx->this_layer_tmpdir);
|
|
} else {
|
|
extract_fullpath = cli_gentemp_with_prefix(ctx->this_layer_tmpdir, filename_base);
|
|
}
|
|
if (NULL == extract_fullpath) {
|
|
cli_dbgmsg("EGG: Memory error allocating filename for extracted file.");
|
|
status = CL_EMEM;
|
|
break;
|
|
}
|
|
|
|
extracted_fd = open(extract_fullpath, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0600);
|
|
if (extracted_fd < 0) {
|
|
cli_dbgmsg("EGG: ERROR: Failed to open output file\n");
|
|
} else {
|
|
cli_dbgmsg("EGG: Writing the extracted file contents to temp file: %s\n", extract_fullpath);
|
|
if (0 == write(extracted_fd, extract_buffer, extract_buffer_len)) {
|
|
cli_dbgmsg("EGG: ERROR: Failed to write to output file\n");
|
|
} else {
|
|
close(extracted_fd);
|
|
extracted_fd = -1;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Scan the extracted file...
|
|
*/
|
|
cli_dbgmsg("EGG: Extraction complete. Scanning now...\n");
|
|
status = cli_magic_scan_buff(extract_buffer, extract_buffer_len, ctx, filename_base, LAYER_ATTRIBUTES_NONE);
|
|
if (status != CL_SUCCESS) {
|
|
goto done;
|
|
}
|
|
|
|
if (NULL != filename_base) {
|
|
free(filename_base);
|
|
filename_base = NULL;
|
|
}
|
|
if (NULL != extract_filename) {
|
|
free(extract_filename);
|
|
extract_filename = NULL;
|
|
}
|
|
if (NULL != extract_buffer) {
|
|
free(extract_buffer);
|
|
extract_buffer = NULL;
|
|
}
|
|
}
|
|
|
|
/* Free up that the filepath */
|
|
if (NULL != extract_fullpath) {
|
|
free(extract_fullpath);
|
|
extract_fullpath = NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (ctx->engine->maxscansize && ctx->scansize >= ctx->engine->maxscansize) {
|
|
status = CL_SUCCESS;
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* TODO: Free up any malloced metadata...
|
|
*/
|
|
if (metadata.filename != NULL) {
|
|
free(metadata.filename);
|
|
metadata.filename = NULL;
|
|
}
|
|
|
|
} while (status == CL_SUCCESS);
|
|
|
|
if (status == CL_BREAK) {
|
|
status = CL_SUCCESS;
|
|
}
|
|
|
|
done:
|
|
|
|
if (NULL != extract_filename) {
|
|
free(extract_filename);
|
|
extract_filename = NULL;
|
|
}
|
|
|
|
if (NULL != extract_buffer) {
|
|
free(extract_buffer);
|
|
extract_buffer = NULL;
|
|
}
|
|
|
|
if (NULL != comment_fullpath) {
|
|
free(comment_fullpath);
|
|
comment_fullpath = NULL;
|
|
}
|
|
|
|
if (NULL != hArchive) {
|
|
cli_egg_close(hArchive);
|
|
hArchive = NULL;
|
|
}
|
|
|
|
if (NULL != filename_base) {
|
|
free(filename_base);
|
|
filename_base = NULL;
|
|
}
|
|
|
|
if (metadata.filename != NULL) {
|
|
free(metadata.filename);
|
|
metadata.filename = NULL;
|
|
}
|
|
|
|
if (NULL != extract_fullpath) {
|
|
free(extract_fullpath);
|
|
extract_fullpath = NULL;
|
|
}
|
|
|
|
if ((CL_VIRUS != status) && (nEncryptedFilesFound > 0)) {
|
|
/* If user requests enabled the Heuristic for encrypted archives... */
|
|
if (SCAN_HEURISTIC_ENCRYPTED_ARCHIVE) {
|
|
if (CL_VIRUS == cli_append_potentially_unwanted(ctx, "Heuristics.Encrypted.EGG")) {
|
|
status = CL_VIRUS;
|
|
}
|
|
}
|
|
}
|
|
|
|
cli_dbgmsg("EGG: Exit code: %d\n", status);
|
|
|
|
return status;
|
|
}
|
|
|
|
static cl_error_t cli_scanarj(cli_ctx *ctx)
|
|
{
|
|
cl_error_t ret = CL_SUCCESS;
|
|
int file = 0;
|
|
arj_metadata_t metadata;
|
|
char *dir = NULL;
|
|
|
|
cli_dbgmsg("in cli_scanarj()\n");
|
|
|
|
memset(&metadata, 0, sizeof(arj_metadata_t));
|
|
|
|
/* generate the temporary directory */
|
|
if (!(dir = cli_gentemp_with_prefix(ctx->this_layer_tmpdir, "arj-tmp")))
|
|
return CL_EMEM;
|
|
|
|
if (mkdir(dir, 0700)) {
|
|
cli_dbgmsg("ARJ: Can't create temporary directory %s\n", dir);
|
|
free(dir);
|
|
return CL_ETMPDIR;
|
|
}
|
|
|
|
ret = cli_unarj_open(ctx->fmap, dir, &metadata);
|
|
if (ret != CL_SUCCESS) {
|
|
if (!ctx->engine->keeptmp)
|
|
cli_rmdirs(dir);
|
|
free(dir);
|
|
cli_dbgmsg("ARJ: Error: %s\n", cl_strerror(ret));
|
|
return ret;
|
|
}
|
|
|
|
do {
|
|
metadata.filename = NULL;
|
|
|
|
ret = cli_unarj_prepare_file(&metadata);
|
|
if (ret != CL_SUCCESS) {
|
|
cli_dbgmsg("ARJ: cli_unarj_prepare_file Error: %s\n", cl_strerror(ret));
|
|
break;
|
|
}
|
|
|
|
file++;
|
|
|
|
if (CL_VIRUS == cli_matchmeta(ctx, metadata.filename, metadata.comp_size, metadata.orig_size, metadata.encrypted, file, 0)) {
|
|
cli_rmdirs(dir);
|
|
free(dir);
|
|
return CL_VIRUS;
|
|
}
|
|
|
|
if ((ret = cli_checklimits("ARJ", ctx, metadata.orig_size, metadata.comp_size, 0)) != CL_SUCCESS) {
|
|
ret = CL_SUCCESS;
|
|
if (metadata.filename)
|
|
free(metadata.filename);
|
|
continue;
|
|
}
|
|
|
|
ret = cli_unarj_extract_file(dir, &metadata);
|
|
if (ret != CL_SUCCESS) {
|
|
cli_dbgmsg("ARJ: cli_unarj_extract_file Error: %s\n", cl_strerror(ret));
|
|
}
|
|
|
|
if (metadata.ofd >= 0) {
|
|
if (lseek(metadata.ofd, 0, SEEK_SET) == -1) {
|
|
cli_dbgmsg("ARJ: call to lseek() failed\n");
|
|
}
|
|
|
|
ret = cli_magic_scan_desc(metadata.ofd, NULL, ctx, metadata.filename, LAYER_ATTRIBUTES_NONE);
|
|
close(metadata.ofd);
|
|
if (ret != CL_SUCCESS) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (metadata.filename) {
|
|
free(metadata.filename);
|
|
metadata.filename = NULL;
|
|
}
|
|
|
|
} while (ret == CL_SUCCESS);
|
|
|
|
if (!ctx->engine->keeptmp) {
|
|
cli_rmdirs(dir);
|
|
}
|
|
|
|
if (NULL != dir) {
|
|
free(dir);
|
|
}
|
|
|
|
if (metadata.filename) {
|
|
free(metadata.filename);
|
|
}
|
|
|
|
cli_dbgmsg("ARJ: Exit code: %d\n", ret);
|
|
|
|
if (ret == CL_BREAK) {
|
|
ret = CL_SUCCESS;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static cl_error_t cli_scangzip_with_zib_from_the_80s(cli_ctx *ctx, unsigned char *buff)
|
|
{
|
|
int fd;
|
|
cl_error_t ret;
|
|
size_t outsize = 0;
|
|
int bytes;
|
|
fmap_t *map = ctx->fmap;
|
|
char *tmpname;
|
|
gzFile gz;
|
|
|
|
ret = fmap_fd(map);
|
|
if (ret < 0)
|
|
return CL_EDUP;
|
|
fd = dup(ret);
|
|
if (fd < 0)
|
|
return CL_EDUP;
|
|
|
|
if (!(gz = gzdopen(fd, "rb"))) {
|
|
close(fd);
|
|
return CL_EOPEN;
|
|
}
|
|
|
|
if ((ret = cli_gentempfd(ctx->this_layer_tmpdir, &tmpname, &fd)) != CL_SUCCESS) {
|
|
cli_dbgmsg("GZip: Can't generate temporary file.\n");
|
|
gzclose(gz);
|
|
close(fd);
|
|
return ret;
|
|
}
|
|
|
|
while ((bytes = gzread(gz, buff, FILEBUFF)) > 0) {
|
|
outsize += bytes;
|
|
if (cli_checklimits("GZip", ctx, outsize, 0, 0) != CL_SUCCESS)
|
|
break;
|
|
if (cli_writen(fd, buff, (size_t)bytes) != (size_t)bytes) {
|
|
close(fd);
|
|
gzclose(gz);
|
|
if (cli_unlink(tmpname)) {
|
|
free(tmpname);
|
|
return CL_EUNLINK;
|
|
}
|
|
free(tmpname);
|
|
return CL_EWRITE;
|
|
}
|
|
}
|
|
|
|
gzclose(gz);
|
|
|
|
if (CL_SUCCESS != (ret = cli_magic_scan_desc(fd, tmpname, ctx, NULL, LAYER_ATTRIBUTES_NONE))) {
|
|
close(fd);
|
|
if (!ctx->engine->keeptmp) {
|
|
(void)cli_unlink(tmpname);
|
|
}
|
|
free(tmpname);
|
|
return ret;
|
|
}
|
|
close(fd);
|
|
if (!ctx->engine->keeptmp) {
|
|
if (cli_unlink(tmpname)) {
|
|
ret = CL_EUNLINK;
|
|
}
|
|
}
|
|
free(tmpname);
|
|
return ret;
|
|
}
|
|
|
|
static cl_error_t cli_scangzip(cli_ctx *ctx)
|
|
{
|
|
int fd;
|
|
cl_error_t ret = CL_SUCCESS;
|
|
unsigned char buff[FILEBUFF];
|
|
char *tmpname;
|
|
z_stream z;
|
|
size_t at = 0, outsize = 0;
|
|
fmap_t *map = ctx->fmap;
|
|
|
|
cli_dbgmsg("in cli_scangzip()\n");
|
|
|
|
memset(&z, 0, sizeof(z));
|
|
if ((ret = inflateInit2(&z, MAX_WBITS + 16)) != Z_OK) {
|
|
cli_dbgmsg("GZip: InflateInit failed: %d\n", ret);
|
|
return cli_scangzip_with_zib_from_the_80s(ctx, buff);
|
|
}
|
|
|
|
if ((ret = cli_gentempfd(ctx->this_layer_tmpdir, &tmpname, &fd)) != CL_SUCCESS) {
|
|
cli_dbgmsg("GZip: Can't generate temporary file.\n");
|
|
inflateEnd(&z);
|
|
return ret;
|
|
}
|
|
|
|
while (at < map->len) {
|
|
unsigned int bytes = MIN(map->len - at, map->pgsz);
|
|
if (!(z.next_in = (void *)fmap_need_off_once(map, at, bytes))) {
|
|
cli_dbgmsg("GZip: Can't read %u bytes @ %lu.\n", bytes, (long unsigned)at);
|
|
inflateEnd(&z);
|
|
close(fd);
|
|
if (cli_unlink(tmpname)) {
|
|
free(tmpname);
|
|
return CL_EUNLINK;
|
|
}
|
|
free(tmpname);
|
|
return CL_EREAD;
|
|
}
|
|
at += bytes;
|
|
z.avail_in = bytes;
|
|
do {
|
|
int inf;
|
|
z.avail_out = sizeof(buff);
|
|
z.next_out = buff;
|
|
inf = inflate(&z, Z_NO_FLUSH);
|
|
if (inf != Z_OK && inf != Z_STREAM_END && inf != Z_BUF_ERROR) {
|
|
if (sizeof(buff) == z.avail_out) {
|
|
cli_dbgmsg("GZip: Bad stream, nothing in output buffer.\n");
|
|
at = map->len;
|
|
break;
|
|
} else {
|
|
cli_dbgmsg("GZip: Bad stream, data in output buffer.\n");
|
|
/* no break yet, flush extracted bytes to file */
|
|
}
|
|
}
|
|
if (cli_writen(fd, buff, sizeof(buff) - z.avail_out) == (size_t)-1) {
|
|
inflateEnd(&z);
|
|
close(fd);
|
|
if (cli_unlink(tmpname)) {
|
|
free(tmpname);
|
|
return CL_EUNLINK;
|
|
}
|
|
free(tmpname);
|
|
return CL_EWRITE;
|
|
}
|
|
outsize += sizeof(buff) - z.avail_out;
|
|
if (cli_checklimits("GZip", ctx, outsize, 0, 0) != CL_SUCCESS) {
|
|
at = map->len;
|
|
break;
|
|
}
|
|
if (inf == Z_STREAM_END) {
|
|
at -= z.avail_in;
|
|
inflateReset(&z);
|
|
break;
|
|
} else if (inf != Z_OK && inf != Z_BUF_ERROR) {
|
|
at = map->len;
|
|
break;
|
|
}
|
|
} while (z.avail_out == 0);
|
|
}
|
|
|
|
inflateEnd(&z);
|
|
|
|
if (CL_SUCCESS != (ret = cli_magic_scan_desc(fd, tmpname, ctx, NULL, LAYER_ATTRIBUTES_NONE))) {
|
|
close(fd);
|
|
if (!ctx->engine->keeptmp) {
|
|
if (cli_unlink(tmpname)) {
|
|
free(tmpname);
|
|
return CL_EUNLINK;
|
|
}
|
|
}
|
|
free(tmpname);
|
|
return ret;
|
|
}
|
|
close(fd);
|
|
if (!ctx->engine->keeptmp)
|
|
if (cli_unlink(tmpname))
|
|
ret = CL_EUNLINK;
|
|
free(tmpname);
|
|
|
|
return ret;
|
|
}
|
|
|
|
#ifdef NOBZ2PREFIX
|
|
#define BZ2_bzDecompressInit bzDecompressInit
|
|
#define BZ2_bzDecompress bzDecompress
|
|
#define BZ2_bzDecompressEnd bzDecompressEnd
|
|
#endif
|
|
|
|
static cl_error_t cli_scanbzip(cli_ctx *ctx)
|
|
{
|
|
cl_error_t ret = CL_SUCCESS;
|
|
int fd, rc;
|
|
uint64_t size = 0;
|
|
char *tmpname;
|
|
bz_stream strm;
|
|
size_t off = 0;
|
|
size_t avail;
|
|
char buf[FILEBUFF];
|
|
|
|
memset(&strm, 0, sizeof(strm));
|
|
strm.next_out = buf;
|
|
strm.avail_out = sizeof(buf);
|
|
rc = BZ2_bzDecompressInit(&strm, 0, 0);
|
|
if (BZ_OK != rc) {
|
|
cli_dbgmsg("Bzip: DecompressInit failed: %d\n", rc);
|
|
return CL_EOPEN;
|
|
}
|
|
|
|
if ((ret = cli_gentempfd(ctx->this_layer_tmpdir, &tmpname, &fd))) {
|
|
cli_dbgmsg("Bzip: Can't generate temporary file.\n");
|
|
BZ2_bzDecompressEnd(&strm);
|
|
return ret;
|
|
}
|
|
|
|
do {
|
|
if (!strm.avail_in) {
|
|
strm.next_in = (void *)fmap_need_off_once_len(ctx->fmap, off, FILEBUFF, &avail);
|
|
strm.avail_in = avail;
|
|
off += avail;
|
|
if (!strm.avail_in) {
|
|
cli_dbgmsg("Bzip: premature end of compressed stream\n");
|
|
break;
|
|
}
|
|
}
|
|
|
|
rc = BZ2_bzDecompress(&strm);
|
|
if (BZ_OK != rc && BZ_STREAM_END != rc) {
|
|
cli_dbgmsg("Bzip: decompress error: %d\n", rc);
|
|
break;
|
|
}
|
|
|
|
if (!strm.avail_out || BZ_STREAM_END == rc) {
|
|
|
|
size += sizeof(buf) - strm.avail_out;
|
|
|
|
if (cli_writen(fd, buf, sizeof(buf) - strm.avail_out) != sizeof(buf) - strm.avail_out) {
|
|
cli_dbgmsg("Bzip: Can't write to file.\n");
|
|
BZ2_bzDecompressEnd(&strm);
|
|
close(fd);
|
|
if (!ctx->engine->keeptmp) {
|
|
if (cli_unlink(tmpname)) {
|
|
free(tmpname);
|
|
return CL_EUNLINK;
|
|
}
|
|
}
|
|
free(tmpname);
|
|
return CL_EWRITE;
|
|
}
|
|
|
|
if (cli_checklimits("Bzip", ctx, size, 0, 0) != CL_SUCCESS)
|
|
break;
|
|
|
|
strm.next_out = buf;
|
|
strm.avail_out = sizeof(buf);
|
|
}
|
|
} while (BZ_STREAM_END != rc);
|
|
|
|
BZ2_bzDecompressEnd(&strm);
|
|
|
|
if (CL_SUCCESS != (ret = cli_magic_scan_desc(fd, tmpname, ctx, NULL, LAYER_ATTRIBUTES_NONE))) {
|
|
close(fd);
|
|
if (!ctx->engine->keeptmp) {
|
|
if (cli_unlink(tmpname)) {
|
|
free(tmpname);
|
|
return CL_EUNLINK;
|
|
}
|
|
}
|
|
free(tmpname);
|
|
return ret;
|
|
}
|
|
close(fd);
|
|
if (!ctx->engine->keeptmp)
|
|
if (cli_unlink(tmpname))
|
|
ret = CL_EUNLINK;
|
|
free(tmpname);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static cl_error_t cli_scanxz(cli_ctx *ctx)
|
|
{
|
|
cl_error_t ret = CL_SUCCESS;
|
|
int fd, rc;
|
|
unsigned long int size = 0;
|
|
char *tmpname;
|
|
struct CLI_XZ strm;
|
|
size_t off = 0;
|
|
size_t avail;
|
|
unsigned char *buf;
|
|
|
|
buf = malloc(CLI_XZ_OBUF_SIZE);
|
|
if (buf == NULL) {
|
|
cli_errmsg("cli_scanxz: nomemory for decompress buffer.\n");
|
|
return CL_EMEM;
|
|
}
|
|
memset(&strm, 0x00, sizeof(struct CLI_XZ));
|
|
strm.next_out = buf;
|
|
strm.avail_out = CLI_XZ_OBUF_SIZE;
|
|
rc = cli_XzInit(&strm);
|
|
if (rc != XZ_RESULT_OK) {
|
|
cli_errmsg("cli_scanxz: DecompressInit failed: %i\n", rc);
|
|
free(buf);
|
|
return CL_EOPEN;
|
|
}
|
|
|
|
if ((ret = cli_gentempfd(ctx->this_layer_tmpdir, &tmpname, &fd))) {
|
|
cli_errmsg("cli_scanxz: Can't generate temporary file.\n");
|
|
cli_XzShutdown(&strm);
|
|
free(buf);
|
|
return ret;
|
|
}
|
|
cli_dbgmsg("cli_scanxz: decompressing to file %s\n", tmpname);
|
|
|
|
do {
|
|
/* set up input buffer */
|
|
if (!strm.avail_in) {
|
|
strm.next_in = (void *)fmap_need_off_once_len(ctx->fmap, off, CLI_XZ_IBUF_SIZE, &avail);
|
|
strm.avail_in = avail;
|
|
off += avail;
|
|
if (!strm.avail_in) {
|
|
cli_errmsg("cli_scanxz: premature end of compressed stream\n");
|
|
ret = CL_EFORMAT;
|
|
goto xz_exit;
|
|
}
|
|
}
|
|
|
|
/* xz decompress a chunk */
|
|
rc = cli_XzDecode(&strm);
|
|
if (XZ_RESULT_OK != rc && XZ_STREAM_END != rc) {
|
|
cli_dbgmsg("cli_scanxz: decompress error: %d\n", rc);
|
|
ret = CL_EMEM;
|
|
goto xz_exit;
|
|
}
|
|
// cli_dbgmsg("cli_scanxz: xz decompressed %li of %li available bytes\n",
|
|
// avail - strm.avail_in, avail);
|
|
|
|
/* write decompress buffer */
|
|
if (!strm.avail_out || rc == XZ_STREAM_END) {
|
|
size_t towrite = CLI_XZ_OBUF_SIZE - strm.avail_out;
|
|
size += towrite;
|
|
|
|
// cli_dbgmsg("Writing %li bytes to XZ decompress temp file(%li byte total)\n",
|
|
// towrite, size);
|
|
|
|
if (cli_writen(fd, buf, towrite) != towrite) {
|
|
cli_errmsg("cli_scanxz: Can't write to file.\n");
|
|
ret = CL_EWRITE;
|
|
goto xz_exit;
|
|
}
|
|
if (cli_checklimits("cli_scanxz", ctx, size, 0, 0) != CL_SUCCESS) {
|
|
cli_warnmsg("cli_scanxz: decompress file size exceeds limits - "
|
|
"only scanning %li bytes\n",
|
|
size);
|
|
break;
|
|
}
|
|
strm.next_out = buf;
|
|
strm.avail_out = CLI_XZ_OBUF_SIZE;
|
|
}
|
|
} while (XZ_STREAM_END != rc);
|
|
|
|
/* scan decompressed file */
|
|
ret = cli_magic_scan_desc(fd, tmpname, ctx, NULL, LAYER_ATTRIBUTES_NONE);
|
|
|
|
xz_exit:
|
|
cli_XzShutdown(&strm);
|
|
close(fd);
|
|
if (!ctx->engine->keeptmp) {
|
|
if (cli_unlink(tmpname) && ret == CL_SUCCESS) {
|
|
ret = CL_EUNLINK;
|
|
}
|
|
}
|
|
free(tmpname);
|
|
free(buf);
|
|
return ret;
|
|
}
|
|
|
|
static cl_error_t cli_scanszdd(cli_ctx *ctx)
|
|
{
|
|
int ofd;
|
|
cl_error_t ret;
|
|
char *tmpname;
|
|
|
|
cli_dbgmsg("in cli_scanszdd()\n");
|
|
|
|
if ((ret = cli_gentempfd(ctx->this_layer_tmpdir, &tmpname, &ofd))) {
|
|
cli_dbgmsg("MSEXPAND: Can't generate temporary file/descriptor\n");
|
|
return ret;
|
|
}
|
|
|
|
ret = cli_msexpand(ctx, ofd);
|
|
|
|
if (ret != CL_SUCCESS) { /* CL_VIRUS or some error */
|
|
close(ofd);
|
|
if (!ctx->engine->keeptmp)
|
|
if (cli_unlink(tmpname))
|
|
ret = CL_EUNLINK;
|
|
free(tmpname);
|
|
return ret;
|
|
}
|
|
|
|
cli_dbgmsg("MSEXPAND: Decompressed into %s\n", tmpname);
|
|
ret = cli_magic_scan_desc(ofd, tmpname, ctx, NULL, LAYER_ATTRIBUTES_NONE);
|
|
close(ofd);
|
|
if (!ctx->engine->keeptmp)
|
|
if (cli_unlink(tmpname))
|
|
ret = CL_EUNLINK;
|
|
free(tmpname);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static cl_error_t vba_scandata(const unsigned char *data, size_t len, cli_ctx *ctx)
|
|
{
|
|
cl_error_t ret = CL_SUCCESS;
|
|
struct cli_matcher *generic_ac_root = ctx->engine->root[0];
|
|
struct cli_matcher *target_ac_root = ctx->engine->root[2];
|
|
struct cli_ac_data gmdata, tmdata;
|
|
bool gmdata_initialized = false;
|
|
bool tmdata_initialized = false;
|
|
struct cli_ac_data *mdata[2];
|
|
bool must_pop_stack = false;
|
|
|
|
cl_fmap_t *new_map = NULL;
|
|
|
|
if ((ret = cli_ac_initdata(&tmdata, target_ac_root->ac_partsigs, target_ac_root->ac_lsigs, target_ac_root->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN))) {
|
|
goto done;
|
|
}
|
|
tmdata_initialized = true;
|
|
|
|
if ((ret = cli_ac_initdata(&gmdata, generic_ac_root->ac_partsigs, generic_ac_root->ac_lsigs, generic_ac_root->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN))) {
|
|
goto done;
|
|
}
|
|
gmdata_initialized = true;
|
|
|
|
mdata[0] = &tmdata;
|
|
mdata[1] = &gmdata;
|
|
|
|
ret = cli_scan_buff(data, len, 0, ctx, CL_TYPE_MSOLE2, mdata);
|
|
if (CL_SUCCESS != ret) {
|
|
goto done;
|
|
}
|
|
|
|
/*
|
|
* Evaluate logical & yara rules given the new matches to see if anything alerts.
|
|
*/
|
|
new_map = fmap_open_memory(data, len, NULL);
|
|
if (new_map == NULL) {
|
|
cli_dbgmsg("Failed to create fmap for evaluating logical/yara rules after call to cli_scan_buff()\n");
|
|
ret = CL_EMEM;
|
|
goto done;
|
|
}
|
|
|
|
ret = cli_recursion_stack_push(ctx, new_map, CL_TYPE_MSOLE2, true, LAYER_ATTRIBUTES_NONE); /* Perform exp_eval with child fmap */
|
|
if (CL_SUCCESS != ret) {
|
|
cli_dbgmsg("Failed to scan fmap.\n");
|
|
goto done;
|
|
}
|
|
|
|
must_pop_stack = true;
|
|
|
|
ret = cli_exp_eval(ctx, target_ac_root, &tmdata, NULL);
|
|
if (CL_SUCCESS != ret) {
|
|
goto done;
|
|
}
|
|
|
|
ret = cli_exp_eval(ctx, generic_ac_root, &gmdata, NULL);
|
|
|
|
done:
|
|
|
|
if (must_pop_stack) {
|
|
(void)cli_recursion_stack_pop(ctx); /* Restore the parent fmap */
|
|
}
|
|
|
|
if (NULL != new_map) {
|
|
fmap_free(new_map);
|
|
}
|
|
|
|
if (tmdata_initialized) {
|
|
cli_ac_freedata(&tmdata);
|
|
}
|
|
|
|
if (gmdata_initialized) {
|
|
cli_ac_freedata(&gmdata);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* Find a file in a directory tree.
|
|
* \param filename Name of the file to find
|
|
* \param dir Directory path where to find the file
|
|
* \param A pointer to the string to store the result into
|
|
* \param Size of the string to store the result in
|
|
*/
|
|
cl_error_t find_file(const char *filename, const char *dir, char *result, size_t result_size)
|
|
{
|
|
DIR *dd;
|
|
struct dirent *dent;
|
|
char fullname[PATH_MAX];
|
|
cl_error_t ret;
|
|
size_t len;
|
|
STATBUF statbuf;
|
|
|
|
if (!result) {
|
|
return CL_ENULLARG;
|
|
}
|
|
|
|
if ((dd = opendir(dir)) != NULL) {
|
|
while ((dent = readdir(dd))) {
|
|
if (dent->d_ino) {
|
|
if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0) {
|
|
|
|
snprintf(fullname, sizeof(fullname), "%s" PATHSEP "%s", dir, dent->d_name);
|
|
fullname[sizeof(fullname) - 1] = '\0';
|
|
|
|
/* stat the file */
|
|
if (LSTAT(fullname, &statbuf) != -1) {
|
|
if (S_ISDIR(statbuf.st_mode) && !S_ISLNK(statbuf.st_mode)) {
|
|
ret = find_file(filename, fullname, result, result_size);
|
|
if (ret == CL_SUCCESS) {
|
|
closedir(dd);
|
|
return ret;
|
|
}
|
|
} else if (S_ISREG(statbuf.st_mode)) {
|
|
if (strcmp(dent->d_name, filename) == 0) {
|
|
len = MIN(strlen(dir) + 1, result_size);
|
|
memcpy(result, dir, len);
|
|
result[len - 1] = '\0';
|
|
closedir(dd);
|
|
return CL_SUCCESS;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
closedir(dd);
|
|
}
|
|
|
|
return CL_EOPEN;
|
|
}
|
|
|
|
/**
|
|
* Scan an OLE directory for a VBA project.
|
|
* Contrary to cli_ole2_tempdir_scan_vba, this function uses the dir file to locate VBA modules.
|
|
*/
|
|
static cl_error_t cli_ole2_tempdir_scan_vba_new(const char *dir, cli_ctx *ctx, struct uniq *U, int *has_macros)
|
|
{
|
|
cl_error_t ret = CL_SUCCESS;
|
|
uint32_t hashcnt = 0;
|
|
char *hash = NULL;
|
|
char path[PATH_MAX];
|
|
char filename[PATH_MAX];
|
|
int tempfd = -1;
|
|
char *tempfile = NULL;
|
|
|
|
if (CL_SUCCESS != (ret = uniq_get(U, "dir", 3, &hash, &hashcnt))) {
|
|
cli_dbgmsg("cli_ole2_tempdir_scan_vba_new: uniq_get('dir') failed with ret code (%d)!\n", ret);
|
|
return ret;
|
|
}
|
|
|
|
while (hashcnt) {
|
|
// Find the directory containing the extracted dir file. This is complicated
|
|
// because ClamAV doesn't use the file names from the OLE file, but temporary names,
|
|
// and we have neither the complete path of the dir file in the OLE container,
|
|
// nor the mapping of the temporary directory names to their OLE names.
|
|
snprintf(filename, sizeof(filename), "%s_%u", hash, hashcnt);
|
|
filename[sizeof(filename) - 1] = '\0';
|
|
|
|
if (CL_SUCCESS == find_file(filename, dir, path, sizeof(path))) {
|
|
cli_dbgmsg("cli_ole2_tempdir_scan_vba_new: Found dir file: %s\n", path);
|
|
if ((ret = cli_vba_readdir_new(ctx, path, U, hash, hashcnt, &tempfd, has_macros, &tempfile)) != CL_SUCCESS) {
|
|
// FIXME: Since we only know the stream name of the OLE2 stream, but not its path inside the
|
|
// OLE2 archive, we don't know if we have the right file. The only thing we can do is
|
|
// iterate all of them until one succeeds.
|
|
cli_dbgmsg("cli_ole2_tempdir_scan_vba_new: Failed to read dir from %s, trying others (error: %s (%d))\n", path, cl_strerror(ret), (int)ret);
|
|
|
|
if (tempfile) {
|
|
if (!ctx->engine->keeptmp) {
|
|
remove(tempfile);
|
|
}
|
|
free(tempfile);
|
|
tempfile = NULL;
|
|
}
|
|
|
|
ret = CL_SUCCESS;
|
|
hashcnt--;
|
|
continue;
|
|
}
|
|
|
|
if (*has_macros && SCAN_COLLECT_METADATA && (ctx->this_layer_metadata_json != NULL)) {
|
|
cli_jsonbool(ctx->this_layer_metadata_json, "HasMacros", 1);
|
|
json_object *macro_languages = cli_jsonarray(ctx->this_layer_metadata_json, "MacroLanguages");
|
|
if (macro_languages) {
|
|
cli_jsonstr(macro_languages, NULL, "VBA");
|
|
} else {
|
|
cli_dbgmsg("[cli_ole2_tempdir_scan_vba_new] Failed to add \"VBA\" entry to MacroLanguages JSON array\n");
|
|
}
|
|
}
|
|
|
|
if (SCAN_HEURISTIC_MACROS && *has_macros) {
|
|
ret = cli_append_potentially_unwanted(ctx, "Heuristics.OLE2.ContainsMacros.VBA");
|
|
if (ret == CL_VIRUS) {
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Now rewind the extracted vba-project output FD and scan it!
|
|
*/
|
|
if (lseek(tempfd, 0, SEEK_SET) != 0) {
|
|
cli_dbgmsg("cli_ole2_tempdir_scan_vba_new: Failed to seek to beginning of temporary VBA project file\n");
|
|
ret = CL_ESEEK;
|
|
goto done;
|
|
}
|
|
|
|
ret = cli_scan_desc(tempfd, ctx, CL_TYPE_SCRIPT, false, NULL, AC_SCAN_VIR, NULL, "extracted-vba-project", tempfile, LAYER_ATTRIBUTES_NORMALIZED);
|
|
if (CL_SUCCESS != ret) {
|
|
goto done;
|
|
}
|
|
|
|
close(tempfd);
|
|
tempfd = -1;
|
|
|
|
if (tempfile) {
|
|
if (!ctx->engine->keeptmp) {
|
|
remove(tempfile);
|
|
}
|
|
free(tempfile);
|
|
tempfile = NULL;
|
|
}
|
|
}
|
|
|
|
hashcnt--;
|
|
}
|
|
|
|
done:
|
|
if (tempfd != -1) {
|
|
close(tempfd);
|
|
tempfd = -1;
|
|
}
|
|
|
|
if (tempfile) {
|
|
if (!ctx->engine->keeptmp) {
|
|
remove(tempfile);
|
|
}
|
|
free(tempfile);
|
|
tempfile = NULL;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* @brief find the summary information files and write out the meta to the JSON.
|
|
*
|
|
* @param dir The directory containing ole2 temp files
|
|
* @param ctx The scan context
|
|
* @param U The unique structure indicating while files exist in the directory
|
|
* @return cl_error_t
|
|
*/
|
|
static cl_error_t cli_ole2_tempdir_scan_summary(const char *dir, cli_ctx *ctx, struct uniq *U)
|
|
{
|
|
cl_error_t status = CL_SUCCESS;
|
|
cl_error_t ret;
|
|
char summary_filename[1024];
|
|
char *hash;
|
|
uint32_t hashcnt = 0;
|
|
|
|
if (CL_SUCCESS != (ret = uniq_get(U, "_5_summaryinformation", 21, &hash, &hashcnt))) {
|
|
cli_dbgmsg("cli_ole2_tempdir_scan_summary: uniq_get('_5_summaryinformation') failed with ret code (%d)!\n", ret);
|
|
status = ret;
|
|
goto done;
|
|
}
|
|
while (hashcnt) {
|
|
int fd = -1;
|
|
|
|
snprintf(summary_filename, sizeof(summary_filename), "%s" PATHSEP "%s_%u", dir, hash, hashcnt);
|
|
summary_filename[sizeof(summary_filename) - 1] = '\0';
|
|
|
|
fd = open(summary_filename, O_RDONLY | O_BINARY);
|
|
if (fd >= 0) {
|
|
cli_dbgmsg("cli_ole2_tempdir_scan_summary: detected a '_5_summaryinformation' stream\n");
|
|
/* JSONOLE2 - what to do if something breaks? */
|
|
cli_ole2_summary_json(ctx, fd, 0, summary_filename);
|
|
close(fd);
|
|
}
|
|
hashcnt--;
|
|
}
|
|
|
|
if (CL_SUCCESS != (ret = uniq_get(U, "_5_documentsummaryinformation", 29, &hash, &hashcnt))) {
|
|
cli_dbgmsg("cli_ole2_tempdir_scan_summary: uniq_get('_5_documentsummaryinformation') failed with ret code (%d)!\n", ret);
|
|
status = ret;
|
|
goto done;
|
|
}
|
|
while (hashcnt) {
|
|
int fd = -1;
|
|
|
|
snprintf(summary_filename, sizeof(summary_filename), "%s" PATHSEP "%s_%u", dir, hash, hashcnt);
|
|
summary_filename[sizeof(summary_filename) - 1] = '\0';
|
|
|
|
fd = open(summary_filename, O_RDONLY | O_BINARY);
|
|
if (fd >= 0) {
|
|
cli_dbgmsg("cli_ole2_tempdir_scan_summary: detected a '_5_documentsummaryinformation' stream\n");
|
|
/* JSONOLE2 - what to do if something breaks? */
|
|
cli_ole2_summary_json(ctx, fd, 1, summary_filename);
|
|
close(fd);
|
|
}
|
|
hashcnt--;
|
|
}
|
|
|
|
done:
|
|
|
|
return status;
|
|
}
|
|
|
|
/**
|
|
* @brief Check the ole2 temp directory for embedded OLE objects
|
|
*
|
|
* @param dir The ole2 temp directory
|
|
* @param ctx The scan context
|
|
* @param U The uniq structure which recors what files are in the temp directory
|
|
* @return cl_error_t
|
|
*/
|
|
static cl_error_t cli_ole2_tempdir_scan_embedded_ole10(const char *dir, cli_ctx *ctx, struct uniq *U)
|
|
{
|
|
cl_error_t status = CL_SUCCESS;
|
|
cl_error_t ret;
|
|
char ole10_filename[1024];
|
|
char *hash;
|
|
uint32_t hashcnt = 0;
|
|
|
|
int fd = -1;
|
|
|
|
/* Check directory for embedded OLE objects */
|
|
if (CL_SUCCESS != (ret = uniq_get(U, "_1_ole10native", 14, &hash, &hashcnt))) {
|
|
cli_dbgmsg("cli_ole2_tempdir_scan_embedded_ole10: uniq_get('_1_ole10native') failed with ret code (%d)!\n", ret);
|
|
status = ret;
|
|
goto done;
|
|
}
|
|
while (hashcnt) {
|
|
snprintf(ole10_filename, sizeof(ole10_filename), "%s" PATHSEP "%s_%u", dir, hash, hashcnt);
|
|
ole10_filename[sizeof(ole10_filename) - 1] = '\0';
|
|
|
|
fd = open(ole10_filename, O_RDONLY | O_BINARY);
|
|
if (fd < 0) {
|
|
hashcnt--;
|
|
continue;
|
|
}
|
|
|
|
ret = cli_scan_ole10(fd, ctx);
|
|
if (CL_SUCCESS != ret) {
|
|
status = ret;
|
|
goto done;
|
|
}
|
|
|
|
close(fd);
|
|
fd = -1;
|
|
|
|
hashcnt--;
|
|
}
|
|
|
|
done:
|
|
|
|
if (fd >= 0) {
|
|
close(fd);
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
static cl_error_t cli_ole2_tempdir_scan_vba(const char *dir, cli_ctx *ctx, struct uniq *U, int *has_macros)
|
|
{
|
|
cl_error_t status = CL_SUCCESS;
|
|
cl_error_t ret;
|
|
int i, j;
|
|
size_t data_len;
|
|
vba_project_t *vba_project = NULL;
|
|
char *fullname = NULL;
|
|
char vbaname[1024];
|
|
unsigned char *data = NULL;
|
|
char *hash;
|
|
uint32_t hashcnt = 0;
|
|
|
|
int fd = -1;
|
|
|
|
int proj_contents_fd = -1;
|
|
char *proj_contents_fname = NULL;
|
|
|
|
if (CL_SUCCESS != (status = uniq_get(U, "_vba_project", 12, NULL, &hashcnt))) {
|
|
cli_dbgmsg("cli_ole2_tempdir_scan_vba: uniq_get('_vba_project') failed with ret code (%d)!\n", status);
|
|
goto done;
|
|
}
|
|
while (hashcnt) {
|
|
if (!(vba_project = (vba_project_t *)cli_vba_readdir(dir, U, hashcnt))) {
|
|
hashcnt--;
|
|
continue;
|
|
}
|
|
|
|
for (i = 0; i < vba_project->count; i++) {
|
|
for (j = 1; (unsigned int)j <= vba_project->colls[i]; j++) {
|
|
snprintf(vbaname, 1024, "%s" PATHSEP "%s_%u", vba_project->dir, vba_project->name[i], j);
|
|
vbaname[sizeof(vbaname) - 1] = '\0';
|
|
|
|
fd = open(vbaname, O_RDONLY | O_BINARY);
|
|
if (fd == -1) {
|
|
continue;
|
|
}
|
|
|
|
cli_dbgmsg("cli_ole2_tempdir_scan_vba: Decompress VBA project '%s_%u'\n", vba_project->name[i], j);
|
|
|
|
data = (unsigned char *)cli_vba_inflate(fd, vba_project->offset[i], &data_len);
|
|
|
|
close(fd);
|
|
fd = -1;
|
|
|
|
*has_macros = *has_macros + 1;
|
|
|
|
if (NULL != data) {
|
|
/* cli_dbgmsg("Project content:\n%s", data); */
|
|
if (ctx->scanned)
|
|
*ctx->scanned += data_len;
|
|
if (ctx->engine->keeptmp) {
|
|
if (CL_SUCCESS != (status = cli_gentempfd(ctx->this_layer_tmpdir, &proj_contents_fname, &proj_contents_fd))) {
|
|
cli_warnmsg("WARNING: VBA project '%s_%u' cannot be dumped to file\n", vba_project->name[i], j);
|
|
goto done;
|
|
}
|
|
|
|
if (cli_writen(proj_contents_fd, data, data_len) != data_len) {
|
|
cli_warnmsg("WARNING: VBA project '%s_%u' failed to write to file\n", vba_project->name[i], j);
|
|
status = CL_EWRITE;
|
|
goto done;
|
|
}
|
|
|
|
close(proj_contents_fd);
|
|
proj_contents_fd = -1;
|
|
|
|
cli_dbgmsg("cli_ole2_tempdir_scan_vba: VBA project '%s_%u' dumped to %s\n", vba_project->name[i], j, proj_contents_fname);
|
|
|
|
free(proj_contents_fname);
|
|
proj_contents_fname = NULL;
|
|
}
|
|
|
|
status = vba_scandata(data, data_len, ctx);
|
|
if (CL_SUCCESS != status) {
|
|
goto done;
|
|
}
|
|
|
|
free(data);
|
|
data = NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
cli_free_vba_project(vba_project);
|
|
vba_project = NULL;
|
|
|
|
hashcnt--;
|
|
}
|
|
|
|
if (CL_SUCCESS != (status = uniq_get(U, "powerpoint document", 19, &hash, &hashcnt))) {
|
|
cli_dbgmsg("cli_ole2_tempdir_scan_vba: uniq_get('powerpoint document') failed with ret code (%d)!\n", status);
|
|
goto done;
|
|
}
|
|
while (hashcnt) {
|
|
snprintf(vbaname, 1024, "%s" PATHSEP "%s_%u", dir, hash, hashcnt);
|
|
vbaname[sizeof(vbaname) - 1] = '\0';
|
|
|
|
fd = open(vbaname, O_RDONLY | O_BINARY);
|
|
if (fd == -1) {
|
|
hashcnt--;
|
|
continue;
|
|
}
|
|
|
|
fullname = cli_ppt_vba_read(fd, ctx);
|
|
if (NULL != fullname) {
|
|
status = cli_magic_scan_dir(fullname, ctx, LAYER_ATTRIBUTES_NONE);
|
|
if (CL_SUCCESS != status) {
|
|
goto done;
|
|
}
|
|
|
|
if (!ctx->engine->keeptmp) {
|
|
cli_rmdirs(fullname);
|
|
}
|
|
free(fullname);
|
|
fullname = NULL;
|
|
}
|
|
|
|
close(fd);
|
|
fd = -1;
|
|
|
|
hashcnt--;
|
|
}
|
|
|
|
if (CL_SUCCESS != (status = uniq_get(U, "worddocument", 12, &hash, &hashcnt))) {
|
|
cli_dbgmsg("cli_ole2_tempdir_scan_vba: uniq_get('worddocument') failed with ret code (%d)!\n", status);
|
|
goto done;
|
|
}
|
|
while (hashcnt) {
|
|
snprintf(vbaname, sizeof(vbaname), "%s" PATHSEP "%s_%u", dir, hash, hashcnt);
|
|
vbaname[sizeof(vbaname) - 1] = '\0';
|
|
|
|
fd = open(vbaname, O_RDONLY | O_BINARY);
|
|
if (fd == -1) {
|
|
hashcnt--;
|
|
continue;
|
|
}
|
|
|
|
if (!(vba_project = (vba_project_t *)cli_wm_readdir(fd))) {
|
|
close(fd);
|
|
fd = -1;
|
|
hashcnt--;
|
|
continue;
|
|
}
|
|
|
|
for (i = 0; i < vba_project->count; i++) {
|
|
cli_dbgmsg("cli_ole2_tempdir_scan_vba: Decompress WM project macro:%d key:%d length:%d\n", i, vba_project->key[i], vba_project->length[i]);
|
|
|
|
data = (unsigned char *)cli_wm_decrypt_macro(fd, vba_project->offset[i], vba_project->length[i], vba_project->key[i]);
|
|
if (!data) {
|
|
cli_dbgmsg("cli_ole2_tempdir_scan_vba: WARNING: WM project '%s' macro %d decrypted to NULL\n", vba_project->name[i], i);
|
|
} else {
|
|
cli_dbgmsg("cli_ole2_tempdir_scan_vba: Project content:\n%s", data);
|
|
|
|
if (ctx->scanned) {
|
|
*ctx->scanned += vba_project->length[i];
|
|
}
|
|
|
|
status = vba_scandata(data, vba_project->length[i], ctx);
|
|
if (CL_SUCCESS != status) {
|
|
goto done;
|
|
}
|
|
|
|
free(data);
|
|
data = NULL;
|
|
}
|
|
}
|
|
|
|
close(fd);
|
|
fd = -1;
|
|
|
|
cli_free_vba_project(vba_project);
|
|
vba_project = NULL;
|
|
|
|
hashcnt--;
|
|
}
|
|
|
|
done:
|
|
|
|
if (*has_macros) {
|
|
if (SCAN_COLLECT_METADATA && (ctx->this_layer_metadata_json != NULL)) {
|
|
cli_jsonbool(ctx->this_layer_metadata_json, "HasMacros", 1);
|
|
json_object *macro_languages = cli_jsonarray(ctx->this_layer_metadata_json, "MacroLanguages");
|
|
if (macro_languages) {
|
|
cli_jsonstr(macro_languages, NULL, "VBA");
|
|
} else {
|
|
cli_dbgmsg("cli_ole2_tempdir_scan_vba: Failed to add \"VBA\" entry to MacroLanguages JSON array\n");
|
|
}
|
|
}
|
|
|
|
if (SCAN_HEURISTIC_MACROS) {
|
|
ret = cli_append_potentially_unwanted(ctx, "Heuristics.OLE2.ContainsMacros.VBA");
|
|
if (ret == CL_VIRUS) {
|
|
status = ret;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (proj_contents_fd >= 0) {
|
|
close(proj_contents_fd);
|
|
}
|
|
if (NULL != proj_contents_fname) {
|
|
free(proj_contents_fname);
|
|
}
|
|
|
|
if (NULL != vba_project) {
|
|
cli_free_vba_project(vba_project);
|
|
}
|
|
|
|
if (NULL != data) {
|
|
free(data);
|
|
}
|
|
|
|
if (NULL != fullname) {
|
|
if (!ctx->engine->keeptmp) {
|
|
(void)cli_rmdirs(fullname);
|
|
}
|
|
|
|
free(fullname);
|
|
}
|
|
|
|
if (fd >= 0) {
|
|
close(fd);
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
static cl_error_t cli_ole2_tempdir_scan_for_xlm_and_images(const char *dir, cli_ctx *ctx, struct uniq *U)
|
|
{
|
|
cl_error_t ret = CL_SUCCESS;
|
|
char *hash = NULL;
|
|
uint32_t hashcnt = 0;
|
|
char STR_WORKBOOK[] = "workbook";
|
|
char STR_BOOK[] = "book";
|
|
|
|
if (CL_SUCCESS != (ret = uniq_get(U, STR_WORKBOOK, sizeof(STR_WORKBOOK) - 1, &hash, &hashcnt))) {
|
|
if (CL_SUCCESS != (ret = uniq_get(U, STR_BOOK, sizeof(STR_BOOK) - 1, &hash, &hashcnt))) {
|
|
cli_dbgmsg("cli_ole2_tempdir_scan_for_xlm_and_images: uniq_get('%s') failed with ret code (%d)!\n", STR_BOOK, ret);
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
for (; hashcnt > 0; hashcnt--) {
|
|
if (CL_SUCCESS != (ret = cli_extract_xlm_macros_and_images(dir, ctx, hash, hashcnt))) {
|
|
switch (ret) {
|
|
case CL_VIRUS:
|
|
case CL_EMEM:
|
|
goto done;
|
|
default:
|
|
cli_dbgmsg("cli_ole2_tempdir_scan_for_xlm_and_images: An error occurred when parsing XLM BIFF temp file, skipping to next file.\n");
|
|
}
|
|
}
|
|
}
|
|
|
|
done:
|
|
return ret;
|
|
}
|
|
|
|
const char *const HTML_URIS_JSON_KEY = "URIs";
|
|
/* https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml */
|
|
const char *URI_LIST[] = {
|
|
"aaa://",
|
|
"aaas://",
|
|
"about://",
|
|
"acap://",
|
|
"acct://",
|
|
"acd://",
|
|
"acr://",
|
|
"adiumxtra://",
|
|
"adt://",
|
|
"afp://",
|
|
"afs://",
|
|
"aim://",
|
|
"amss://",
|
|
"android://",
|
|
"appdata://",
|
|
"apt://",
|
|
"ar://",
|
|
"ark://",
|
|
"at://",
|
|
"attachment://",
|
|
"aw://",
|
|
"barion://",
|
|
"bb://",
|
|
"beshare://",
|
|
"bitcoin://",
|
|
"bitcoincash://",
|
|
"blob://",
|
|
"bolo://",
|
|
"brid://",
|
|
"browserext://",
|
|
"cabal://",
|
|
"calculator://",
|
|
"callto://",
|
|
"cap://",
|
|
"cast://",
|
|
"casts://",
|
|
"chrome://",
|
|
"chrome-extension://",
|
|
"cid://",
|
|
"coap://",
|
|
"coap+tcp://",
|
|
"coap+ws://",
|
|
"coaps://",
|
|
"coaps+tcp://",
|
|
"coaps+ws://",
|
|
"com-eventbrite-attendee://",
|
|
"content://",
|
|
"content-type://",
|
|
"crid://",
|
|
"cstr://",
|
|
"cvs://",
|
|
"dab://",
|
|
"dat://",
|
|
"data://",
|
|
"dav://",
|
|
"dhttp://",
|
|
"diaspora://",
|
|
"dict://",
|
|
"did://",
|
|
"dis://",
|
|
"dlna-playcontainer://",
|
|
"dlna-playsingle://",
|
|
"dns://",
|
|
"dntp://",
|
|
"doi://",
|
|
"dpp://",
|
|
"drm://",
|
|
"drop://",
|
|
"dtmi://",
|
|
"dtn://",
|
|
"dvb://",
|
|
"dvx://",
|
|
"dweb://",
|
|
"ed2k://",
|
|
"eid://",
|
|
"elsi://",
|
|
"embedded://",
|
|
"ens://",
|
|
"ethereum://",
|
|
"example://",
|
|
"facetime://",
|
|
"fax://",
|
|
"feed://",
|
|
"feedready://",
|
|
"fido://",
|
|
"file://",
|
|
"filesystem://",
|
|
"finger://",
|
|
"first-run-pen-experience://",
|
|
"fish://",
|
|
"fm://",
|
|
"ftp://",
|
|
"fuchsia-pkg://",
|
|
"geo://",
|
|
"gg://",
|
|
"git://",
|
|
"gitoid://",
|
|
"gizmoproject://",
|
|
"go://",
|
|
"gopher://",
|
|
"graph://",
|
|
"grd://",
|
|
"gtalk://",
|
|
"h323://",
|
|
"ham://",
|
|
"hcap://",
|
|
"hcp://",
|
|
"hs20://",
|
|
"http://",
|
|
"https://",
|
|
"hxxp://",
|
|
"hxxps://",
|
|
"hydrazone://",
|
|
"hyper://",
|
|
"iax://",
|
|
"icap://",
|
|
"icon://",
|
|
"im://",
|
|
"imap://",
|
|
"info://",
|
|
"iotdisco://",
|
|
"ipfs://",
|
|
"ipn://",
|
|
"ipns://",
|
|
"ipp://",
|
|
"ipps://",
|
|
"irc://",
|
|
"irc6://",
|
|
"ircs://",
|
|
"iris://",
|
|
"iris.beep://",
|
|
"iris.lwz://",
|
|
"iris.xpc://",
|
|
"iris.xpcs://",
|
|
"isostore://",
|
|
"itms://",
|
|
"jabber://",
|
|
"jar://",
|
|
"jms://",
|
|
"keyparc://",
|
|
"lastfm://",
|
|
"lbry://",
|
|
"ldap://",
|
|
"ldaps://",
|
|
"leaptofrogans://",
|
|
"lid://",
|
|
"lorawan://",
|
|
"lpa://",
|
|
"lvlt://",
|
|
"machineProvisioningProgressReporter://",
|
|
"magnet://",
|
|
"mailserver://",
|
|
"mailto://",
|
|
"maps://",
|
|
"market://",
|
|
"matrix://",
|
|
"message://",
|
|
"microsoft.windows.camera://",
|
|
"microsoft.windows.camera.multipicker://",
|
|
"microsoft.windows.camera.picker://",
|
|
"mid://",
|
|
"mms://",
|
|
"modem://",
|
|
"mongodb://",
|
|
"moz://",
|
|
"ms-access://",
|
|
"ms-appinstaller://",
|
|
"ms-browser-extension://",
|
|
"ms-calculator://",
|
|
"ms-drive-to://",
|
|
"ms-enrollment://",
|
|
"ms-excel://",
|
|
"ms-eyecontrolspeech://",
|
|
"ms-gamebarservices://",
|
|
"ms-gamingoverlay://",
|
|
"ms-getoffice://",
|
|
"ms-help://",
|
|
"ms-infopath://",
|
|
"ms-inputapp://",
|
|
"ms-launchremotedesktop://",
|
|
"ms-lockscreencomponent-config://",
|
|
"ms-media-stream-id://",
|
|
"ms-meetnow://",
|
|
"ms-mixedrealitycapture://",
|
|
"ms-mobileplans://",
|
|
"ms-newsandinterests://",
|
|
"ms-officeapp://",
|
|
"ms-people://",
|
|
"ms-project://",
|
|
"ms-powerpoint://",
|
|
"ms-publisher://",
|
|
"ms-recall://",
|
|
"ms-remotedesktop://",
|
|
"ms-remotedesktop-launch://",
|
|
"ms-restoretabcompanion://",
|
|
"ms-screenclip://",
|
|
"ms-screensketch://",
|
|
"ms-search://",
|
|
"ms-search-repair://",
|
|
"ms-secondary-screen-controller://",
|
|
"ms-secondary-screen-setup://",
|
|
"ms-settings://",
|
|
"ms-settings-airplanemode://",
|
|
"ms-settings-bluetooth://",
|
|
"ms-settings-camera://",
|
|
"ms-settings-cellular://",
|
|
"ms-settings-cloudstorage://",
|
|
"ms-settings-connectabledevices://",
|
|
"ms-settings-displays-topology://",
|
|
"ms-settings-emailandaccounts://",
|
|
"ms-settings-language://",
|
|
"ms-settings-location://",
|
|
"ms-settings-lock://",
|
|
"ms-settings-nfctransactions://",
|
|
"ms-settings-notifications://",
|
|
"ms-settings-power://",
|
|
"ms-settings-privacy://",
|
|
"ms-settings-proximity://",
|
|
"ms-settings-screenrotation://",
|
|
"ms-settings-wifi://",
|
|
"ms-settings-workplace://",
|
|
"ms-spd://",
|
|
"ms-stickers://",
|
|
"ms-sttoverlay://",
|
|
"ms-transit-to://",
|
|
"ms-useractivityset://",
|
|
"ms-virtualtouchpad://",
|
|
"ms-visio://",
|
|
"ms-walk-to://",
|
|
"ms-whiteboard://",
|
|
"ms-whiteboard-cmd://",
|
|
"ms-word://",
|
|
"msnim://",
|
|
"msrp://",
|
|
"msrps://",
|
|
"mss://",
|
|
"mt://",
|
|
"mtqp://",
|
|
"mumble://",
|
|
"mupdate://",
|
|
"mvn://",
|
|
"mvrp://",
|
|
"mvrps://",
|
|
"news://",
|
|
"nfs://",
|
|
"ni://",
|
|
"nih://",
|
|
"nntp://",
|
|
"notes://",
|
|
"num://",
|
|
"ocf://",
|
|
"oid://",
|
|
"onenote://",
|
|
"onenote-cmd://",
|
|
"opaquelocktoken://",
|
|
"openid://",
|
|
"openpgp4fpr://",
|
|
"otpauth://",
|
|
"p1://",
|
|
"pack://",
|
|
"palm://",
|
|
"paparazzi://",
|
|
"payment://",
|
|
"payto://",
|
|
"pkcs11://",
|
|
"platform://",
|
|
"pop://",
|
|
"pres://",
|
|
"prospero://",
|
|
"proxy://",
|
|
"pwid://",
|
|
"psyc://",
|
|
"pttp://",
|
|
"qb://",
|
|
"query://",
|
|
"quic-transport://",
|
|
"redis://",
|
|
"rediss://",
|
|
"reload://",
|
|
"res://",
|
|
"resource://",
|
|
"rmi://",
|
|
"rsync://",
|
|
"rtmfp://",
|
|
"rtmp://",
|
|
"rtsp://",
|
|
"rtsps://",
|
|
"rtspu://",
|
|
"sarif://",
|
|
"secondlife://",
|
|
"secret-token://",
|
|
"service://",
|
|
"session://",
|
|
"sftp://",
|
|
"sgn://",
|
|
"shc://",
|
|
"shttp://",
|
|
"sieve://",
|
|
"simpleledger://",
|
|
"simplex://",
|
|
"sip://",
|
|
"sips://",
|
|
"skype://",
|
|
"smb://",
|
|
"smp://",
|
|
"sms://",
|
|
"smtp://",
|
|
"snews://",
|
|
"snmp://",
|
|
"soap.beep://",
|
|
"soap.beeps://",
|
|
"soldat://",
|
|
"spiffe://",
|
|
"spotify://",
|
|
"ssb://",
|
|
"ssh://",
|
|
"starknet://",
|
|
"steam://",
|
|
"stun://",
|
|
"stuns://",
|
|
"submit://",
|
|
"svn://",
|
|
"swh://",
|
|
"swid://",
|
|
"swidpath://",
|
|
"tag://",
|
|
"taler://",
|
|
"teamspeak://",
|
|
"tel://",
|
|
"teliaeid://",
|
|
"telnet://",
|
|
"tftp://",
|
|
"things://",
|
|
"thismessage://",
|
|
"tip://",
|
|
"tn3270://",
|
|
"tool://",
|
|
"turn://",
|
|
"turns://",
|
|
"tv://",
|
|
"udp://",
|
|
"unreal://",
|
|
"upt://",
|
|
"urn://",
|
|
"ut2004://",
|
|
"uuid-in-package://",
|
|
"v-event://",
|
|
"vemmi://",
|
|
"ventrilo://",
|
|
"ves://",
|
|
"videotex://",
|
|
"vnc://",
|
|
"view-source://",
|
|
"vscode://",
|
|
"vscode-insiders://",
|
|
"vsls://",
|
|
"w3://",
|
|
"wais://",
|
|
"web3://",
|
|
"wcr://",
|
|
"webcal://",
|
|
"web+ap://",
|
|
"wifi://",
|
|
"wpid://",
|
|
"ws://",
|
|
"wss://",
|
|
"wtai://",
|
|
"wyciwyg://",
|
|
"xcon://",
|
|
"xcon-userid://",
|
|
"xfire://",
|
|
"xmlrpc.beep://",
|
|
"xmlrpc.beeps://",
|
|
"xmpp://",
|
|
"xftp://",
|
|
"xrcp://",
|
|
"xri://",
|
|
"ymsgr://",
|
|
"z39.50://",
|
|
"z39.50r://",
|
|
"z39.50s://"};
|
|
|
|
static bool is_url(const char *const str, size_t str_len)
|
|
{
|
|
bool bRet = false;
|
|
size_t i;
|
|
|
|
for (i = 0; i < sizeof(URI_LIST) / sizeof(URI_LIST[0]); i++) {
|
|
if (str && (str_len > strlen(URI_LIST[i])) && (0 == strncasecmp(str, URI_LIST[i], strlen(URI_LIST[i])))) {
|
|
bRet = true;
|
|
goto done;
|
|
}
|
|
}
|
|
done:
|
|
return bRet;
|
|
}
|
|
|
|
static void save_urls(cli_ctx *ctx, tag_arguments_t *hrefs, form_data_t *form_data)
|
|
{
|
|
int i = 0;
|
|
json_object *ary = NULL;
|
|
|
|
if (NULL == hrefs) {
|
|
return;
|
|
}
|
|
|
|
if (!(SCAN_STORE_HTML_URIS && SCAN_COLLECT_METADATA && (ctx->this_layer_metadata_json != NULL))) {
|
|
return;
|
|
}
|
|
|
|
/*Add hrefs*/
|
|
for (i = 0; i < hrefs->count; i++) {
|
|
if (is_url((const char *)hrefs->value[i], strlen((const char *)hrefs->value[i]))) {
|
|
if (NULL == ary) {
|
|
ary = cli_jsonarray(ctx->this_layer_metadata_json, HTML_URIS_JSON_KEY);
|
|
if (!ary) {
|
|
cli_dbgmsg("[cli_scanhtml] Failed to add \"%s\" entry JSON array\n", HTML_URIS_JSON_KEY);
|
|
return;
|
|
}
|
|
}
|
|
cli_jsonstr(ary, NULL, (const char *)hrefs->value[i]);
|
|
}
|
|
}
|
|
|
|
/*Add form_data*/
|
|
for (i = 0; i < (int)form_data->count; i++) {
|
|
if (is_url((const char *)form_data->urls[i], strlen((const char *)form_data->urls[i]))) {
|
|
if (NULL == ary) {
|
|
ary = cli_jsonarray(ctx->this_layer_metadata_json, HTML_URIS_JSON_KEY);
|
|
if (!ary) {
|
|
cli_dbgmsg("[cli_scanhtml] Failed to add \"%s\" entry JSON array\n", HTML_URIS_JSON_KEY);
|
|
return;
|
|
}
|
|
}
|
|
cli_jsonstr(ary, NULL, (const char *)form_data->urls[i]);
|
|
}
|
|
}
|
|
}
|
|
|
|
static cl_error_t cli_scanhtml(cli_ctx *ctx)
|
|
{
|
|
cl_error_t status = CL_SUCCESS;
|
|
char *tempname = NULL;
|
|
char fullname[1024];
|
|
int fd = -1;
|
|
fmap_t *map = ctx->fmap;
|
|
uint64_t curr_len = map->len;
|
|
|
|
cli_dbgmsg("in cli_scanhtml()\n");
|
|
|
|
/* CL_ENGINE_MAX_HTMLNORMALIZE */
|
|
if (curr_len > ctx->engine->maxhtmlnormalize) {
|
|
cli_dbgmsg("cli_scanhtml: exiting (file larger than MaxHTMLNormalize)\n");
|
|
status = CL_SUCCESS;
|
|
goto done;
|
|
}
|
|
|
|
if (NULL == (tempname = cli_gentemp_with_prefix(ctx->this_layer_tmpdir, "html-tmp"))) {
|
|
status = CL_EMEM;
|
|
goto done;
|
|
}
|
|
|
|
if (mkdir(tempname, 0700)) {
|
|
cli_errmsg("cli_scanhtml: Can't create temporary directory %s\n", tempname);
|
|
status = CL_ETMPDIR;
|
|
goto done;
|
|
}
|
|
|
|
cli_dbgmsg("cli_scanhtml: using tempdir %s\n", tempname);
|
|
|
|
/* Output JSON Summary Information */
|
|
if (SCAN_STORE_HTML_URIS && SCAN_COLLECT_METADATA && (ctx->this_layer_metadata_json != NULL)) {
|
|
tag_arguments_t hrefs = {0};
|
|
hrefs.scanContents = 1;
|
|
form_data_t form_data = {0};
|
|
(void)html_normalise_map_form_data(ctx, map, tempname, &hrefs, ctx->dconf, &form_data);
|
|
save_urls(ctx, &hrefs, &form_data);
|
|
html_tag_arg_free(&hrefs);
|
|
html_form_data_tag_free(&form_data);
|
|
} else {
|
|
(void)html_normalise_map(ctx, map, tempname, NULL, ctx->dconf);
|
|
}
|
|
|
|
snprintf(fullname, 1024, "%s" PATHSEP "nocomment.html", tempname);
|
|
fd = open(fullname, O_RDONLY | O_BINARY);
|
|
if (fd >= 0) {
|
|
// nocomment.html file exists, so lets scan it.
|
|
|
|
status = cli_scan_desc(fd, ctx, CL_TYPE_HTML, false, NULL, AC_SCAN_VIR, NULL, "no-comment", fullname, LAYER_ATTRIBUTES_NORMALIZED);
|
|
if (CL_SUCCESS != status) {
|
|
goto done;
|
|
}
|
|
|
|
close(fd);
|
|
fd = -1;
|
|
}
|
|
|
|
/* CL_ENGINE_MAX_HTMLNOTAGS */
|
|
curr_len = map->len;
|
|
if (curr_len > ctx->engine->maxhtmlnotags) {
|
|
/* we're not interested in scanning large files in notags form */
|
|
/* TODO: don't even create notags if file is over limit */
|
|
cli_dbgmsg("cli_scanhtml: skipping notags (normalized size over MaxHTMLNoTags)\n");
|
|
} else {
|
|
snprintf(fullname, 1024, "%s" PATHSEP "notags.html", tempname);
|
|
|
|
fd = open(fullname, O_RDONLY | O_BINARY);
|
|
if (fd >= 0) {
|
|
// notags.html file exists, so lets scan it.
|
|
|
|
status = cli_scan_desc(fd, ctx, CL_TYPE_HTML, false, NULL, AC_SCAN_VIR, NULL, "no-tags", fullname, LAYER_ATTRIBUTES_NORMALIZED);
|
|
if (CL_SUCCESS != status) {
|
|
goto done;
|
|
}
|
|
|
|
close(fd);
|
|
fd = -1;
|
|
}
|
|
}
|
|
|
|
snprintf(fullname, 1024, "%s" PATHSEP "javascript", tempname);
|
|
fd = open(fullname, O_RDONLY | O_BINARY);
|
|
if (fd >= 0) {
|
|
// javascript file exists, so lets scan it (twice, as different types).
|
|
|
|
status = cli_scan_desc(fd, ctx, CL_TYPE_HTML, false, NULL, AC_SCAN_VIR, NULL, "javascript-as-html", fullname, LAYER_ATTRIBUTES_NORMALIZED);
|
|
if (CL_SUCCESS != status) {
|
|
goto done;
|
|
}
|
|
|
|
status = cli_scan_desc(fd, ctx, CL_TYPE_TEXT_ASCII, false, NULL, AC_SCAN_VIR, NULL, "javascript-as-text-ascii", fullname, LAYER_ATTRIBUTES_NORMALIZED);
|
|
if (CL_SUCCESS != status) {
|
|
goto done;
|
|
}
|
|
|
|
close(fd);
|
|
fd = -1;
|
|
}
|
|
|
|
snprintf(fullname, 1024, "%s" PATHSEP "rfc2397", tempname);
|
|
|
|
status = cli_magic_scan_dir(fullname, ctx, LAYER_ATTRIBUTES_NORMALIZED);
|
|
if (CL_EOPEN == status) {
|
|
/* If the directory doesn't exist, that's fine */
|
|
status = CL_SUCCESS;
|
|
} else {
|
|
goto done;
|
|
}
|
|
|
|
done:
|
|
if (fd >= 0) {
|
|
close(fd);
|
|
}
|
|
if (NULL != tempname) {
|
|
if (!ctx->engine->keeptmp) {
|
|
cli_rmdirs(tempname);
|
|
}
|
|
free(tempname);
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
static cl_error_t cli_scanscript(cli_ctx *ctx)
|
|
{
|
|
cl_error_t ret = CL_SUCCESS;
|
|
const unsigned char *buff;
|
|
unsigned char *normalized = NULL;
|
|
struct text_norm_state state;
|
|
char *tmpname = NULL;
|
|
int ofd = -1;
|
|
struct cli_matcher *target_ac_root;
|
|
uint32_t maxpatlen, offset = 0;
|
|
struct cli_matcher *generic_ac_root;
|
|
struct cli_ac_data gmdata, tmdata;
|
|
int gmdata_initialized = 0;
|
|
int tmdata_initialized = 0;
|
|
struct cli_ac_data *mdata[2];
|
|
cl_fmap_t *new_map = NULL;
|
|
fmap_t *map;
|
|
size_t at = 0;
|
|
uint64_t curr_len;
|
|
struct cli_target_info info;
|
|
|
|
if (!ctx || !ctx->engine->root)
|
|
return CL_ENULLARG;
|
|
|
|
map = ctx->fmap;
|
|
curr_len = map->len;
|
|
generic_ac_root = ctx->engine->root[0];
|
|
target_ac_root = ctx->engine->root[7];
|
|
maxpatlen = target_ac_root ? target_ac_root->maxpatlen : 0;
|
|
|
|
// Initialize info so it's safe to pass to destroy later
|
|
cli_targetinfo_init(&info);
|
|
|
|
cli_dbgmsg("in cli_scanscript()\n");
|
|
|
|
/* CL_ENGINE_MAX_SCRIPTNORMALIZE */
|
|
if (curr_len > ctx->engine->maxscriptnormalize) {
|
|
cli_dbgmsg("cli_scanscript: exiting (file larger than MaxScriptSize)\n");
|
|
ret = CL_SUCCESS;
|
|
goto done;
|
|
}
|
|
|
|
if (!(normalized = malloc(SCANBUFF + maxpatlen))) {
|
|
cli_dbgmsg("cli_scanscript: Unable to malloc %u bytes\n", SCANBUFF);
|
|
ret = CL_EMEM;
|
|
goto done;
|
|
}
|
|
text_normalize_init(&state, normalized, SCANBUFF + maxpatlen);
|
|
|
|
if ((ret = cli_ac_initdata(&tmdata, target_ac_root ? target_ac_root->ac_partsigs : 0, target_ac_root ? target_ac_root->ac_lsigs : 0, target_ac_root ? target_ac_root->ac_reloff_num : 0, CLI_DEFAULT_AC_TRACKLEN))) {
|
|
goto done;
|
|
}
|
|
tmdata_initialized = 1;
|
|
|
|
if ((ret = cli_ac_initdata(&gmdata, generic_ac_root->ac_partsigs, generic_ac_root->ac_lsigs, generic_ac_root->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN))) {
|
|
goto done;
|
|
}
|
|
gmdata_initialized = 1;
|
|
|
|
/* dump to disk only if explicitly asked to
|
|
* or if necessary to check relative offsets,
|
|
* otherwise we can process just in-memory */
|
|
if (ctx->engine->keeptmp || (target_ac_root && (target_ac_root->ac_reloff_num > 0 || target_ac_root->linked_bcs))) {
|
|
if ((ret = cli_gentempfd(ctx->this_layer_tmpdir, &tmpname, &ofd))) {
|
|
cli_dbgmsg("cli_scanscript: Can't generate temporary file/descriptor\n");
|
|
goto done;
|
|
}
|
|
if (ctx->engine->keeptmp)
|
|
cli_dbgmsg("cli_scanscript: saving normalized file to %s\n", tmpname);
|
|
}
|
|
|
|
mdata[0] = &tmdata;
|
|
mdata[1] = &gmdata;
|
|
|
|
/* If there's a relative offset in target_ac_root or triggered bytecodes, normalize to file.*/
|
|
if (target_ac_root && (target_ac_root->ac_reloff_num > 0 || target_ac_root->linked_bcs)) {
|
|
size_t map_off = 0;
|
|
while (map_off < map->len) {
|
|
size_t written;
|
|
if (!(written = text_normalize_map(&state, map, map_off)))
|
|
break;
|
|
map_off += written;
|
|
|
|
if (write(ofd, state.out, state.out_pos) == -1) {
|
|
cli_errmsg("cli_scanscript: can't write to file %s\n", tmpname);
|
|
ret = CL_EWRITE;
|
|
goto done;
|
|
}
|
|
text_normalize_reset(&state);
|
|
}
|
|
|
|
/* Temporarily store the normalized file map in the context. */
|
|
new_map = fmap_new(ofd, 0, 0, NULL, tmpname);
|
|
if (new_map == NULL) {
|
|
cli_dbgmsg("cli_scanscript: could not map file %s\n", tmpname);
|
|
goto done;
|
|
}
|
|
|
|
/* Perform cli_scan_fmap with child fmap */
|
|
ret = cli_recursion_stack_push(ctx, new_map, CL_TYPE_TEXT_ASCII, true, LAYER_ATTRIBUTES_NORMALIZED);
|
|
if (CL_SUCCESS != ret) {
|
|
cli_dbgmsg("Failed to scan fmap.\n");
|
|
goto done;
|
|
}
|
|
|
|
/* scan map */
|
|
ret = cli_scan_fmap(ctx, CL_TYPE_TEXT_ASCII, false, NULL, AC_SCAN_VIR, NULL);
|
|
|
|
(void)cli_recursion_stack_pop(ctx); /* Restore the parent fmap */
|
|
|
|
if (CL_SUCCESS != ret) {
|
|
goto done;
|
|
}
|
|
|
|
} else {
|
|
/* Since the above is moderately costly all in all,
|
|
* do the old stuff if there's no relative offsets. */
|
|
|
|
if (target_ac_root) {
|
|
cli_targetinfo(&info, 7, ctx);
|
|
ret = cli_ac_caloff(target_ac_root, &tmdata, &info);
|
|
if (ret)
|
|
goto done;
|
|
}
|
|
|
|
while (1) {
|
|
size_t len = MIN(map->pgsz, map->len - at);
|
|
buff = fmap_need_off_once(map, at, len);
|
|
at += len;
|
|
if (!buff || !len || state.out_pos + len > state.out_len) {
|
|
/* flush if error/EOF, or too little buffer space left */
|
|
if ((ofd != -1) && (write(ofd, state.out, state.out_pos) == -1)) {
|
|
cli_errmsg("cli_scanscript: can't write to file %s\n", tmpname);
|
|
close(ofd);
|
|
ofd = -1;
|
|
/* we can continue to scan in memory */
|
|
}
|
|
/* when we flush the buffer also scan */
|
|
ret = cli_scan_buff(state.out, state.out_pos, offset, ctx, CL_TYPE_TEXT_ASCII, mdata);
|
|
if (CL_SUCCESS != ret) {
|
|
goto done;
|
|
}
|
|
|
|
if (ctx->scanned)
|
|
*ctx->scanned += state.out_pos;
|
|
offset += state.out_pos;
|
|
|
|
/* carry over maxpatlen from previous buffer */
|
|
if (state.out_pos > maxpatlen)
|
|
memmove(state.out, state.out + state.out_pos - maxpatlen, maxpatlen);
|
|
text_normalize_reset(&state);
|
|
state.out_pos = maxpatlen;
|
|
}
|
|
if (!len)
|
|
break;
|
|
if (!buff || text_normalize_buffer(&state, buff, len) != len) {
|
|
cli_dbgmsg("cli_scanscript: short read during normalizing\n");
|
|
}
|
|
}
|
|
}
|
|
|
|
ret = cli_exp_eval(ctx, target_ac_root, &tmdata, NULL);
|
|
if (CL_SUCCESS != ret) {
|
|
goto done;
|
|
}
|
|
|
|
ret = cli_exp_eval(ctx, generic_ac_root, &gmdata, NULL);
|
|
if (CL_SUCCESS != ret) {
|
|
goto done;
|
|
}
|
|
|
|
done:
|
|
if (NULL != new_map) {
|
|
fmap_free(new_map);
|
|
}
|
|
|
|
cli_targetinfo_destroy(&info);
|
|
|
|
if (NULL != normalized) {
|
|
free(normalized);
|
|
}
|
|
|
|
if (tmdata_initialized) {
|
|
cli_ac_freedata(&tmdata);
|
|
}
|
|
|
|
if (gmdata_initialized) {
|
|
cli_ac_freedata(&gmdata);
|
|
}
|
|
|
|
if (ofd != -1) {
|
|
close(ofd);
|
|
}
|
|
|
|
if (tmpname != NULL) {
|
|
if (!ctx->engine->keeptmp) {
|
|
(void)cli_unlink(tmpname);
|
|
}
|
|
free(tmpname);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static cl_error_t cli_scanhtml_utf16(cli_ctx *ctx)
|
|
{
|
|
cl_error_t status = CL_ERROR;
|
|
char *tempname = NULL;
|
|
char *decoded = NULL;
|
|
const char *buff;
|
|
int fd = -1;
|
|
int bytes;
|
|
size_t at = 0;
|
|
fmap_t *new_map = NULL;
|
|
|
|
cli_dbgmsg("in cli_scanhtml_utf16()\n");
|
|
|
|
if (!(tempname = cli_gentemp_with_prefix(ctx->this_layer_tmpdir, "html-utf16-tmp"))) {
|
|
status = CL_EMEM;
|
|
goto done;
|
|
}
|
|
|
|
if ((fd = open(tempname, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR)) < 0) {
|
|
cli_errmsg("cli_scanhtml_utf16: Can't create file %s\n", tempname);
|
|
status = CL_EOPEN;
|
|
goto done;
|
|
}
|
|
|
|
cli_dbgmsg("cli_scanhtml_utf16: using tempfile %s\n", tempname);
|
|
|
|
while (at < ctx->fmap->len) {
|
|
bytes = MIN(ctx->fmap->len - at, ctx->fmap->pgsz * 16);
|
|
if (!(buff = fmap_need_off_once(ctx->fmap, at, bytes))) {
|
|
status = CL_EREAD;
|
|
goto done;
|
|
}
|
|
at += bytes;
|
|
decoded = cli_utf16toascii(buff, bytes);
|
|
if (decoded) {
|
|
if (write(fd, decoded, bytes / 2) == -1) {
|
|
cli_errmsg("cli_scanhtml_utf16: Can't write to file %s\n", tempname);
|
|
status = CL_EWRITE;
|
|
goto done;
|
|
}
|
|
free(decoded);
|
|
decoded = NULL;
|
|
}
|
|
}
|
|
|
|
new_map = fmap_new(fd, 0, 0, NULL, tempname);
|
|
if (NULL == new_map) {
|
|
cli_errmsg("cli_scanhtml_utf16: failed to create fmap for ascii HTML file decoded from utf16: %s\n.", tempname);
|
|
status = CL_EMEM;
|
|
goto done;
|
|
}
|
|
|
|
/* Perform exp_eval with child fmap */
|
|
status = cli_recursion_stack_push(ctx, new_map, CL_TYPE_HTML, true, LAYER_ATTRIBUTES_NORMALIZED);
|
|
if (CL_SUCCESS != status) {
|
|
cli_dbgmsg("Failed to scan fmap.\n");
|
|
goto done;
|
|
}
|
|
|
|
status = cli_scanhtml(ctx);
|
|
|
|
(void)cli_recursion_stack_pop(ctx); /* Restore the parent fmap */
|
|
|
|
if (CL_SUCCESS != status) {
|
|
goto done;
|
|
}
|
|
|
|
done:
|
|
if (NULL != new_map) {
|
|
fmap_free(new_map);
|
|
}
|
|
if (-1 != fd) {
|
|
close(fd);
|
|
}
|
|
|
|
if (NULL != decoded) {
|
|
free(decoded);
|
|
}
|
|
|
|
if (NULL != tempname) {
|
|
if (!ctx->engine->keeptmp) {
|
|
(void)cli_unlink(tempname);
|
|
} else {
|
|
cli_dbgmsg("cli_scanhtml_utf16: Decoded HTML data saved in %s\n", tempname);
|
|
}
|
|
|
|
free(tempname);
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
static cl_error_t cli_ole2_scan_tempdir(
|
|
cli_ctx *ctx,
|
|
const char *dir,
|
|
struct uniq *files,
|
|
int has_vba,
|
|
int has_xlm,
|
|
int has_image)
|
|
{
|
|
cl_error_t status = CL_SUCCESS;
|
|
DIR *dd = NULL;
|
|
int has_macros = 0;
|
|
|
|
struct dirent *dent;
|
|
STATBUF statbuf;
|
|
char *subdirectory = NULL;
|
|
|
|
cli_dbgmsg("cli_ole2_scan_tempdir: %s\n", dir);
|
|
|
|
/* Output JSON Summary Information */
|
|
if (SCAN_COLLECT_METADATA && (ctx->this_layer_metadata_json != NULL)) {
|
|
(void)cli_ole2_tempdir_scan_summary(dir, ctx, files);
|
|
}
|
|
|
|
status = cli_ole2_tempdir_scan_embedded_ole10(dir, ctx, files);
|
|
if (CL_SUCCESS != status) {
|
|
goto done;
|
|
}
|
|
|
|
if (has_vba) {
|
|
status = cli_ole2_tempdir_scan_vba(dir, ctx, files, &has_macros);
|
|
if (CL_SUCCESS != status) {
|
|
goto done;
|
|
}
|
|
|
|
status = cli_ole2_tempdir_scan_vba_new(dir, ctx, files, &has_macros);
|
|
if (CL_SUCCESS != status) {
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
if (has_xlm) {
|
|
if (SCAN_HEURISTIC_MACROS) {
|
|
status = cli_append_potentially_unwanted(ctx, "Heuristics.OLE2.ContainsMacros.XLM");
|
|
if (CL_SUCCESS != status) {
|
|
goto done;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (has_xlm || has_image) {
|
|
/* TODO: Consider moving image extraction to handler_enum and
|
|
* removing the has_image and found_image stuff. */
|
|
status = cli_ole2_tempdir_scan_for_xlm_and_images(dir, ctx, files);
|
|
if (CL_SUCCESS != status) {
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
if (has_xlm || has_vba) {
|
|
status = cli_magic_scan_dir(dir, ctx, LAYER_ATTRIBUTES_NONE);
|
|
if (CL_SUCCESS != status) {
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
/* ACAB: since we now hash filenames and handle collisions we
|
|
* could avoid recursion by removing the block below and by
|
|
* flattening the paths in ole2_walk_property_tree (case 1) */
|
|
|
|
if ((dd = opendir(dir)) != NULL) {
|
|
while ((dent = readdir(dd))) {
|
|
if (dent->d_ino) {
|
|
if (strcmp(dent->d_name, ".") && strcmp(dent->d_name, "..")) {
|
|
/* build the full name */
|
|
subdirectory = malloc(strlen(dir) + strlen(dent->d_name) + 2);
|
|
if (!subdirectory) {
|
|
cli_dbgmsg("cli_ole2_tempdir_scan_vba: Unable to allocate memory for subdirectory path\n");
|
|
status = CL_EMEM;
|
|
break;
|
|
}
|
|
sprintf(subdirectory, "%s" PATHSEP "%s", dir, dent->d_name);
|
|
|
|
/* stat the file */
|
|
if (LSTAT(subdirectory, &statbuf) != -1) {
|
|
if (S_ISDIR(statbuf.st_mode) && !S_ISLNK(statbuf.st_mode)) {
|
|
/*
|
|
* Process subdirectory
|
|
*/
|
|
status = cli_ole2_scan_tempdir(
|
|
ctx,
|
|
subdirectory,
|
|
files,
|
|
has_vba,
|
|
has_xlm,
|
|
has_image);
|
|
if (CL_SUCCESS != status) {
|
|
goto done;
|
|
}
|
|
}
|
|
}
|
|
free(subdirectory);
|
|
subdirectory = NULL;
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
cli_dbgmsg("VBADir: Can't open directory %s.\n", dir);
|
|
status = CL_EOPEN;
|
|
goto done;
|
|
}
|
|
|
|
done:
|
|
if (NULL != dd) {
|
|
closedir(dd);
|
|
}
|
|
if (NULL != subdirectory) {
|
|
free(subdirectory);
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
static cl_error_t cli_scanole2(cli_ctx *ctx)
|
|
{
|
|
char *dir = NULL;
|
|
cl_error_t ret = CL_SUCCESS;
|
|
struct uniq *files = NULL;
|
|
int has_vba = 0;
|
|
int has_xlm = 0;
|
|
int has_image = 0;
|
|
|
|
cli_dbgmsg("in cli_scanole2()\n");
|
|
|
|
/* generate the temporary directory */
|
|
if (NULL == (dir = cli_gentemp_with_prefix(ctx->this_layer_tmpdir, "ole2-tmp"))) {
|
|
ret = CL_EMEM;
|
|
goto done;
|
|
}
|
|
|
|
if (mkdir(dir, 0700)) {
|
|
cli_dbgmsg("OLE2: Can't create temporary directory %s\n", dir);
|
|
free(dir);
|
|
dir = NULL;
|
|
ret = CL_ETMPDIR;
|
|
goto done;
|
|
}
|
|
|
|
ret = cli_ole2_extract(dir, ctx, &files, &has_vba, &has_xlm, &has_image);
|
|
if (CL_SUCCESS != ret) {
|
|
goto done;
|
|
}
|
|
|
|
if (files) {
|
|
/*
|
|
* Files containing the document summary, any VBA or XLM macros, or
|
|
* images were previously extracted from an ole2 file.
|
|
* This happens if cli_ole2_extract() executes the handler_writer()
|
|
* because XLM, VBA, or images were found.
|
|
* So now we need to process them.
|
|
*
|
|
* TODO: consider maybe processes all that stuff in memory instead of
|
|
* writing everything to temp files?
|
|
*/
|
|
ret = cli_ole2_scan_tempdir(
|
|
ctx,
|
|
dir,
|
|
files,
|
|
has_vba,
|
|
has_xlm,
|
|
has_image);
|
|
}
|
|
|
|
done:
|
|
if (files) {
|
|
uniq_free(files);
|
|
}
|
|
|
|
if (NULL != dir) {
|
|
if (!ctx->engine->keeptmp) {
|
|
cli_rmdirs(dir);
|
|
}
|
|
free(dir);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static cl_error_t cli_scantar(cli_ctx *ctx, unsigned int posix)
|
|
{
|
|
char *dir;
|
|
cl_error_t ret = CL_SUCCESS;
|
|
|
|
cli_dbgmsg("in cli_scantar()\n");
|
|
|
|
/* generate temporary directory */
|
|
if (!(dir = cli_gentemp_with_prefix(ctx->this_layer_tmpdir, "tar-tmp")))
|
|
return CL_EMEM;
|
|
|
|
if (mkdir(dir, 0700)) {
|
|
cli_errmsg("Tar: Can't create temporary directory %s\n", dir);
|
|
free(dir);
|
|
return CL_ETMPDIR;
|
|
}
|
|
|
|
ret = cli_untar(dir, posix, ctx);
|
|
|
|
if (!ctx->engine->keeptmp)
|
|
cli_rmdirs(dir);
|
|
|
|
free(dir);
|
|
return ret;
|
|
}
|
|
|
|
static cl_error_t cli_scanscrenc(cli_ctx *ctx)
|
|
{
|
|
char *tempname;
|
|
cl_error_t ret = CL_SUCCESS;
|
|
|
|
cli_dbgmsg("in cli_scanscrenc()\n");
|
|
|
|
if (!(tempname = cli_gentemp_with_prefix(ctx->this_layer_tmpdir, "screnc-tmp")))
|
|
return CL_EMEM;
|
|
|
|
if (mkdir(tempname, 0700)) {
|
|
cli_dbgmsg("CHM: Can't create temporary directory %s\n", tempname);
|
|
free(tempname);
|
|
return CL_ETMPDIR;
|
|
}
|
|
|
|
if (html_screnc_decode(ctx->fmap, tempname))
|
|
ret = cli_magic_scan_dir(tempname, ctx, LAYER_ATTRIBUTES_NONE);
|
|
|
|
if (!ctx->engine->keeptmp)
|
|
cli_rmdirs(tempname);
|
|
|
|
free(tempname);
|
|
return ret;
|
|
}
|
|
|
|
static cl_error_t cli_scanriff(cli_ctx *ctx)
|
|
{
|
|
cl_error_t ret = CL_SUCCESS;
|
|
|
|
if (cli_check_riff_exploit(ctx) == 2)
|
|
ret = cli_append_potentially_unwanted(ctx, "Heuristics.Exploit.W32.MS05-002");
|
|
|
|
return ret;
|
|
}
|
|
|
|
static cl_error_t cli_scancryptff(cli_ctx *ctx)
|
|
{
|
|
cl_error_t ret = CL_SUCCESS, ndesc;
|
|
unsigned int i;
|
|
const unsigned char *src;
|
|
unsigned char *dest = NULL;
|
|
char *tempfile;
|
|
size_t pos;
|
|
size_t bread;
|
|
|
|
/* Skip the CryptFF file header */
|
|
pos = 0x10;
|
|
|
|
if ((dest = (unsigned char *)malloc(FILEBUFF)) == NULL) {
|
|
cli_dbgmsg("CryptFF: Can't allocate memory\n");
|
|
return CL_EMEM;
|
|
}
|
|
|
|
if (!(tempfile = cli_gentemp_with_prefix(ctx->this_layer_tmpdir, "cryptff"))) {
|
|
free(dest);
|
|
return CL_EMEM;
|
|
}
|
|
|
|
if ((ndesc = open(tempfile, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR)) < 0) {
|
|
cli_errmsg("CryptFF: Can't create file %s\n", tempfile);
|
|
free(dest);
|
|
free(tempfile);
|
|
return CL_ECREAT;
|
|
}
|
|
|
|
for (; (src = fmap_need_off_once_len(ctx->fmap, pos, FILEBUFF, &bread)) && bread; pos += bread) {
|
|
for (i = 0; i < bread; i++)
|
|
dest[i] = src[i] ^ (unsigned char)0xff;
|
|
if (cli_writen(ndesc, dest, bread) == (size_t)-1) {
|
|
cli_dbgmsg("CryptFF: Can't write to descriptor %d\n", ndesc);
|
|
free(dest);
|
|
close(ndesc);
|
|
free(tempfile);
|
|
return CL_EWRITE;
|
|
}
|
|
}
|
|
|
|
free(dest);
|
|
|
|
cli_dbgmsg("CryptFF: Scanning decrypted data\n");
|
|
|
|
ret = cli_magic_scan_desc(ndesc, tempfile, ctx, NULL, LAYER_ATTRIBUTES_NONE);
|
|
|
|
close(ndesc);
|
|
|
|
if (ctx->engine->keeptmp) {
|
|
cli_dbgmsg("CryptFF: Decompressed data saved in %s\n", tempfile);
|
|
} else {
|
|
if (CL_SUCCESS != cli_unlink(tempfile)) {
|
|
ret = CL_EUNLINK;
|
|
}
|
|
}
|
|
|
|
free(tempfile);
|
|
return ret;
|
|
}
|
|
|
|
static cl_error_t cli_scanpdf(cli_ctx *ctx, off_t offset)
|
|
{
|
|
cl_error_t ret;
|
|
char *dir = cli_gentemp_with_prefix(ctx->this_layer_tmpdir, "pdf-tmp");
|
|
|
|
if (!dir)
|
|
return CL_EMEM;
|
|
|
|
if (mkdir(dir, 0700)) {
|
|
cli_dbgmsg("Can't create temporary directory for PDF file %s\n", dir);
|
|
free(dir);
|
|
return CL_ETMPDIR;
|
|
}
|
|
|
|
ret = cli_pdf(dir, ctx, offset);
|
|
|
|
if (!ctx->engine->keeptmp)
|
|
cli_rmdirs(dir);
|
|
|
|
free(dir);
|
|
return ret;
|
|
}
|
|
|
|
static cl_error_t cli_scantnef(cli_ctx *ctx)
|
|
{
|
|
cl_error_t ret;
|
|
char *dir = cli_gentemp_with_prefix(ctx->this_layer_tmpdir, "tnef-tmp");
|
|
|
|
if (!dir)
|
|
return CL_EMEM;
|
|
|
|
if (mkdir(dir, 0700)) {
|
|
cli_dbgmsg("Can't create temporary directory for tnef file %s\n", dir);
|
|
free(dir);
|
|
return CL_ETMPDIR;
|
|
}
|
|
|
|
ret = cli_tnef(dir, ctx);
|
|
|
|
if (ret == CL_SUCCESS)
|
|
ret = cli_magic_scan_dir(dir, ctx, LAYER_ATTRIBUTES_NONE);
|
|
|
|
if (!ctx->engine->keeptmp)
|
|
cli_rmdirs(dir);
|
|
|
|
free(dir);
|
|
return ret;
|
|
}
|
|
|
|
static cl_error_t cli_scanuuencoded(cli_ctx *ctx)
|
|
{
|
|
cl_error_t ret;
|
|
char *dir = cli_gentemp_with_prefix(ctx->this_layer_tmpdir, "uuencoded-tmp");
|
|
|
|
if (!dir)
|
|
return CL_EMEM;
|
|
|
|
if (mkdir(dir, 0700)) {
|
|
cli_dbgmsg("Can't create temporary directory for uuencoded file %s\n", dir);
|
|
free(dir);
|
|
return CL_ETMPDIR;
|
|
}
|
|
|
|
ret = cli_uuencode(dir, ctx->fmap);
|
|
|
|
if (ret == CL_SUCCESS)
|
|
ret = cli_magic_scan_dir(dir, ctx, LAYER_ATTRIBUTES_NONE);
|
|
|
|
if (!ctx->engine->keeptmp)
|
|
cli_rmdirs(dir);
|
|
|
|
free(dir);
|
|
return ret;
|
|
}
|
|
|
|
static cl_error_t cli_scanmail(cli_ctx *ctx)
|
|
{
|
|
char *dir = NULL;
|
|
cl_error_t ret;
|
|
|
|
cli_dbgmsg("Starting cli_scanmail()\n");
|
|
|
|
/* generate the temporary directory */
|
|
if (NULL == (dir = cli_gentemp_with_prefix(ctx->this_layer_tmpdir, "mail-tmp"))) {
|
|
ret = CL_EMEM;
|
|
goto done;
|
|
}
|
|
|
|
if (mkdir(dir, 0700)) {
|
|
cli_dbgmsg("Mail: Can't create temporary directory %s\n", dir);
|
|
ret = CL_ETMPDIR;
|
|
goto done;
|
|
}
|
|
|
|
/*
|
|
* Extract the attachments into the temporary directory
|
|
*/
|
|
ret = cli_mbox(dir, ctx);
|
|
if (CL_SUCCESS != ret) {
|
|
goto done;
|
|
}
|
|
|
|
ret = cli_magic_scan_dir(dir, ctx, LAYER_ATTRIBUTES_NONE);
|
|
if (CL_SUCCESS != ret) {
|
|
goto done;
|
|
}
|
|
|
|
done:
|
|
if (NULL != dir) {
|
|
if (!ctx->engine->keeptmp) {
|
|
cli_rmdirs(dir);
|
|
}
|
|
|
|
free(dir);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static cl_error_t cli_scan_structured(cli_ctx *ctx)
|
|
{
|
|
char buf[8192];
|
|
size_t result = 0;
|
|
unsigned int cc_count = 0;
|
|
unsigned int ssn_count = 0;
|
|
bool done = false;
|
|
fmap_t *map;
|
|
size_t pos = 0;
|
|
int (*ccfunc)(const unsigned char *buffer, size_t length, int cc_only);
|
|
int (*ssnfunc)(const unsigned char *buffer, size_t length);
|
|
|
|
if (ctx == NULL)
|
|
return CL_ENULLARG;
|
|
|
|
map = ctx->fmap;
|
|
|
|
if (ctx->engine->min_cc_count == 1)
|
|
ccfunc = dlp_has_cc;
|
|
else
|
|
ccfunc = dlp_get_cc_count;
|
|
|
|
switch (SCAN_HEURISTIC_STRUCTURED_SSN_NORMAL | SCAN_HEURISTIC_STRUCTURED_SSN_STRIPPED) {
|
|
case (CL_SCAN_HEURISTIC_STRUCTURED_SSN_NORMAL | CL_SCAN_HEURISTIC_STRUCTURED_SSN_STRIPPED):
|
|
if (ctx->engine->min_ssn_count == 1)
|
|
ssnfunc = dlp_has_ssn;
|
|
else
|
|
ssnfunc = dlp_get_ssn_count;
|
|
break;
|
|
|
|
case CL_SCAN_HEURISTIC_STRUCTURED_SSN_NORMAL:
|
|
if (ctx->engine->min_ssn_count == 1)
|
|
ssnfunc = dlp_has_normal_ssn;
|
|
else
|
|
ssnfunc = dlp_get_normal_ssn_count;
|
|
break;
|
|
|
|
case CL_SCAN_HEURISTIC_STRUCTURED_SSN_STRIPPED:
|
|
if (ctx->engine->min_ssn_count == 1)
|
|
ssnfunc = dlp_has_stripped_ssn;
|
|
else
|
|
ssnfunc = dlp_get_stripped_ssn_count;
|
|
break;
|
|
|
|
default:
|
|
ssnfunc = NULL;
|
|
}
|
|
|
|
while (!done && ((result = fmap_readn(map, buf, pos, 8191)) > 0) && (result != (size_t)-1)) {
|
|
pos += result;
|
|
if ((cc_count += ccfunc((const unsigned char *)buf, result,
|
|
(ctx->options->heuristic & CL_SCAN_HEURISTIC_STRUCTURED_CC) ? 1 : 0)) >= ctx->engine->min_cc_count) {
|
|
done = true;
|
|
}
|
|
|
|
if (ssnfunc && ((ssn_count += ssnfunc((const unsigned char *)buf, result)) >= ctx->engine->min_ssn_count)) {
|
|
done = true;
|
|
}
|
|
}
|
|
|
|
if (cc_count != 0 && cc_count >= ctx->engine->min_cc_count) {
|
|
cli_dbgmsg("cli_scan_structured: %u credit card numbers detected\n", cc_count);
|
|
if (CL_VIRUS == cli_append_potentially_unwanted(ctx, "Heuristics.Structured.CreditCardNumber")) {
|
|
return CL_VIRUS;
|
|
}
|
|
}
|
|
|
|
if (ssn_count != 0 && ssn_count >= ctx->engine->min_ssn_count) {
|
|
cli_dbgmsg("cli_scan_structured: %u social security numbers detected\n", ssn_count);
|
|
if (CL_VIRUS == cli_append_potentially_unwanted(ctx, "Heuristics.Structured.SSN")) {
|
|
return CL_VIRUS;
|
|
}
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
#if defined(_WIN32) || defined(C_LINUX) || defined(C_DARWIN)
|
|
#define PERF_MEASURE
|
|
#endif
|
|
|
|
#ifdef PERF_MEASURE
|
|
|
|
static struct
|
|
{
|
|
enum perfev id;
|
|
const char *name;
|
|
enum ev_type type;
|
|
} perf_events[] = {
|
|
{PERFT_SCAN, "full scan", ev_time},
|
|
{PERFT_PRECB, "prescan cb", ev_time},
|
|
{PERFT_POSTCB, "postscan cb", ev_time},
|
|
{PERFT_CACHE, "cache", ev_time},
|
|
{PERFT_FT, "filetype", ev_time},
|
|
{PERFT_CONTAINER, "container", ev_time},
|
|
{PERFT_SCRIPT, "script", ev_time},
|
|
{PERFT_PE, "pe", ev_time},
|
|
{PERFT_RAW, "raw", ev_time},
|
|
{PERFT_RAWTYPENO, "raw container", ev_time},
|
|
{PERFT_MAP, "map", ev_time},
|
|
{PERFT_BYTECODE, "bytecode", ev_time},
|
|
{PERFT_KTIME, "kernel", ev_int},
|
|
{PERFT_UTIME, "user", ev_int}};
|
|
|
|
static void get_thread_times(uint64_t *kt, uint64_t *ut)
|
|
{
|
|
#ifdef _WIN32
|
|
FILETIME c, e, k, u;
|
|
ULARGE_INTEGER kl, ul;
|
|
if (!GetThreadTimes(GetCurrentThread(), &c, &e, &k, &u)) {
|
|
*kt = *ut = 0;
|
|
return;
|
|
}
|
|
kl.LowPart = k.dwLowDateTime;
|
|
kl.HighPart = k.dwHighDateTime;
|
|
ul.LowPart = u.dwLowDateTime;
|
|
ul.HighPart = u.dwHighDateTime;
|
|
*kt = kl.QuadPart / 10;
|
|
*ut = ul.QuadPart / 10;
|
|
#else
|
|
struct tms tbuf;
|
|
if (times(&tbuf) != ((clock_t)-1)) {
|
|
clock_t tck = sysconf(_SC_CLK_TCK);
|
|
*kt = ((uint64_t)1000000) * tbuf.tms_stime / tck;
|
|
*ut = ((uint64_t)1000000) * tbuf.tms_utime / tck;
|
|
} else {
|
|
*kt = *ut = 0;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static inline void perf_init(cli_ctx *ctx)
|
|
{
|
|
uint64_t kt, ut;
|
|
unsigned i;
|
|
|
|
if (!SCAN_DEV_COLLECT_PERF_INFO)
|
|
return;
|
|
|
|
ctx->perf = cli_events_new(PERFT_LAST);
|
|
for (i = 0; i < sizeof(perf_events) / sizeof(perf_events[0]); i++) {
|
|
if (cli_event_define(ctx->perf, perf_events[i].id, perf_events[i].name,
|
|
perf_events[i].type, multiple_sum) == -1)
|
|
continue;
|
|
}
|
|
cli_event_time_start(ctx->perf, PERFT_SCAN);
|
|
get_thread_times(&kt, &ut);
|
|
cli_event_int(ctx->perf, PERFT_KTIME, -kt);
|
|
cli_event_int(ctx->perf, PERFT_UTIME, -ut);
|
|
}
|
|
|
|
static inline void perf_done(cli_ctx *ctx)
|
|
{
|
|
char timestr[512];
|
|
char *p;
|
|
unsigned i;
|
|
uint64_t kt, ut;
|
|
char *pend;
|
|
cli_events_t *perf = ctx->perf;
|
|
|
|
if (!perf)
|
|
return;
|
|
|
|
p = timestr;
|
|
pend = timestr + sizeof(timestr) - 1;
|
|
*pend = 0;
|
|
|
|
cli_event_time_stop(perf, PERFT_SCAN);
|
|
get_thread_times(&kt, &ut);
|
|
cli_event_int(perf, PERFT_KTIME, kt);
|
|
cli_event_int(perf, PERFT_UTIME, ut);
|
|
|
|
for (i = 0; i < sizeof(perf_events) / sizeof(perf_events[0]); i++) {
|
|
union ev_val val;
|
|
unsigned count;
|
|
|
|
cli_event_get(perf, perf_events[i].id, &val, &count);
|
|
if (p < pend)
|
|
p += snprintf(p, pend - p, "%s: %d.%03ums, ", perf_events[i].name,
|
|
(signed)(val.v_int / 1000),
|
|
(unsigned)(val.v_int % 1000));
|
|
}
|
|
*p = 0;
|
|
cli_infomsg(ctx, "performance: %s\n", timestr);
|
|
|
|
cli_events_free(perf);
|
|
ctx->perf = NULL;
|
|
}
|
|
|
|
static inline void perf_start(cli_ctx *ctx, int id)
|
|
{
|
|
cli_event_time_start(ctx->perf, id);
|
|
}
|
|
|
|
static inline void perf_stop(cli_ctx *ctx, int id)
|
|
{
|
|
cli_event_time_stop(ctx->perf, id);
|
|
}
|
|
|
|
static inline void perf_nested_start(cli_ctx *ctx, int id, int nestedid)
|
|
{
|
|
cli_event_time_nested_start(ctx->perf, id, nestedid);
|
|
}
|
|
|
|
static inline void perf_nested_stop(cli_ctx *ctx, int id, int nestedid)
|
|
{
|
|
cli_event_time_nested_stop(ctx->perf, id, nestedid);
|
|
}
|
|
|
|
#else
|
|
static inline void perf_init(cli_ctx *ctx)
|
|
{
|
|
UNUSEDPARAM(ctx);
|
|
}
|
|
static inline void perf_start(cli_ctx *ctx, int id)
|
|
{
|
|
UNUSEDPARAM(ctx);
|
|
UNUSEDPARAM(id);
|
|
}
|
|
static inline void perf_stop(cli_ctx *ctx, int id)
|
|
{
|
|
UNUSEDPARAM(ctx);
|
|
UNUSEDPARAM(id);
|
|
}
|
|
static inline void perf_nested_start(cli_ctx *ctx, int id, int nestedid)
|
|
{
|
|
UNUSEDPARAM(ctx);
|
|
UNUSEDPARAM(id);
|
|
UNUSEDPARAM(nestedid);
|
|
}
|
|
static inline void perf_nested_stop(cli_ctx *ctx, int id, int nestedid)
|
|
{
|
|
UNUSEDPARAM(ctx);
|
|
UNUSEDPARAM(id);
|
|
UNUSEDPARAM(nestedid);
|
|
}
|
|
static inline void perf_done(cli_ctx *ctx)
|
|
{
|
|
UNUSEDPARAM(ctx);
|
|
}
|
|
#endif
|
|
|
|
/**
|
|
* @brief Perform raw scan of current fmap.
|
|
*
|
|
* @param ctx Current scan context.
|
|
* @param type File type
|
|
* @param typercg Enable type recognition (file typing scan results).
|
|
* If 0, will be a regular ac-mode scan.
|
|
* @param[out] dettype If typercg enabled and scan detects HTML or MAIL types,
|
|
* will output HTML or MAIL types after performing HTML/MAIL scans
|
|
* @return cl_error_t
|
|
*/
|
|
static cl_error_t scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_file_t *dettype)
|
|
{
|
|
cl_error_t ret = CL_SUCCESS, nret = CL_SUCCESS;
|
|
struct cli_matched_type *ftoffset = NULL, *fpt;
|
|
unsigned int acmode = AC_SCAN_VIR;
|
|
|
|
cli_file_t found_type;
|
|
|
|
if ((typercg) &&
|
|
// Omit embedded files or file types already identified via this process.
|
|
(!(ctx->recursion_stack[ctx->recursion_level].attributes & LAYER_ATTRIBUTES_EMBEDDED)) &&
|
|
// Omit GZ files because they can contain portions of original files like zip file entries that cause invalid extractions and lots of warnings. Decompress first, then scan!
|
|
(type != CL_TYPE_GZ) &&
|
|
// We should also omit bzips, but DMG's may be detected in bzips.
|
|
//(type != CL_TYPE_BZ) &&
|
|
// Omit CPIO_OLD files because it's an image format that we can extract and scan manually.
|
|
(type != CL_TYPE_CPIO_OLD) &&
|
|
// Omit ZIP files because it'll detect each zip file entry as SFXZIP, which is a waste. We'll extract it and then scan.
|
|
(type != CL_TYPE_ZIP) &&
|
|
// Omit OOXML because they are ZIP-based and file-type scanning will double-extract their contents.
|
|
(type != CL_TYPE_OOXML_WORD) &&
|
|
(type != CL_TYPE_OOXML_PPT) &&
|
|
(type != CL_TYPE_OOXML_XL) &&
|
|
(type != CL_TYPE_OOXML_HWP) &&
|
|
// Omit OLD TAR files because it's a raw archive format that we can extract and scan manually.
|
|
(type != CL_TYPE_OLD_TAR) &&
|
|
// Omit POSIX TAR files because it's a raw archive format that we can extract and scan manually.
|
|
(type != CL_TYPE_POSIX_TAR)) {
|
|
/*
|
|
* Enable file type recognition scan mode if requested, except for some problematic types (above).
|
|
*/
|
|
acmode |= AC_SCAN_FT;
|
|
} else {
|
|
cli_dbgmsg("scanraw: embedded type recognition disabled or not applicable for type %s %s\n",
|
|
cli_ftname(type),
|
|
(ctx->recursion_stack[ctx->recursion_level].attributes & LAYER_ATTRIBUTES_EMBEDDED) ? "(embedded layer)" : "");
|
|
}
|
|
|
|
perf_start(ctx, PERFT_RAW);
|
|
ret = cli_scan_fmap(ctx, type == CL_TYPE_TEXT_ASCII ? CL_TYPE_ANY : type, false, &ftoffset, acmode, NULL);
|
|
perf_stop(ctx, PERFT_RAW);
|
|
|
|
// In allmatch-mode, ret will never be CL_VIRUS, so ret may be used exclusively for file type detection and for terminal errors.
|
|
// When not in allmatch-mode, it's more important to return right away if ret is CL_VIRUS, so we don't care if file type matches were found.
|
|
if (ret >= CL_TYPENO) {
|
|
// Matched 1+ file type signatures. Handle them.
|
|
found_type = (cli_file_t)ret;
|
|
|
|
perf_nested_start(ctx, PERFT_RAWTYPENO, PERFT_SCAN);
|
|
|
|
fpt = ftoffset;
|
|
|
|
while (fpt) {
|
|
if (fpt->offset > 0) {
|
|
bool type_has_been_handled = true;
|
|
bool ancestor_was_embedded = false;
|
|
size_t i;
|
|
|
|
/*
|
|
* First, use "embedded type recognition" to identify a file's actual type.
|
|
* (a.k.a. not embedded files, but file type detection corrections)
|
|
*
|
|
* Do this at all fmap layers. Though we should only reassign the types
|
|
* if the current type makes sense for the reassignment.
|
|
*/
|
|
switch (fpt->type) {
|
|
case CL_TYPE_MHTML:
|
|
if (SCAN_PARSE_MAIL && (DCONF_MAIL & MAIL_CONF_MBOX)) {
|
|
if ((ctx->recursion_stack[ctx->recursion_level].type >= CL_TYPE_TEXT_ASCII) &&
|
|
(ctx->recursion_stack[ctx->recursion_level].type <= CL_TYPE_BINARY_DATA)) {
|
|
// HTML files may contain special characters and could be
|
|
// misidentified as BINARY_DATA by cli_compare_ftm_file()
|
|
|
|
// Reassign type of current layer based on what we discovered
|
|
if (CL_SUCCESS != (ret = cli_recursion_stack_change_type(ctx, fpt->type, true))) {
|
|
cli_dbgmsg("Call to cli_recursion_stack_change_type() returned %s \n", cl_strerror(ret));
|
|
type_has_been_handled = false;
|
|
} else {
|
|
cli_dbgmsg("MHTML signature found at %u\n", (unsigned int)fpt->offset);
|
|
nret = ret = cli_scanmail(ctx);
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
|
|
case CL_TYPE_XDP:
|
|
if (SCAN_PARSE_PDF && (DCONF_DOC & DOC_CONF_PDF)) {
|
|
if ((ctx->recursion_stack[ctx->recursion_level].type >= CL_TYPE_TEXT_ASCII) &&
|
|
(ctx->recursion_stack[ctx->recursion_level].type <= CL_TYPE_BINARY_DATA)) {
|
|
// XML files may contain special characters and could be
|
|
// misidentified as BINARY_DATA by cli_compare_ftm_file()
|
|
|
|
// Reassign type of current layer based on what we discovered
|
|
if (CL_SUCCESS != (ret = cli_recursion_stack_change_type(ctx, fpt->type, true))) {
|
|
cli_dbgmsg("Call to cli_recursion_stack_change_type() returned %s \n", cl_strerror(ret));
|
|
type_has_been_handled = false;
|
|
} else {
|
|
cli_dbgmsg("XDP signature found at %u\n", (unsigned int)fpt->offset);
|
|
nret = ret = cli_scanxdp(ctx);
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
|
|
case CL_TYPE_XML_WORD:
|
|
if (SCAN_PARSE_XMLDOCS && (DCONF_DOC & DOC_CONF_MSXML)) {
|
|
if ((ctx->recursion_stack[ctx->recursion_level].type >= CL_TYPE_TEXT_ASCII) &&
|
|
(ctx->recursion_stack[ctx->recursion_level].type <= CL_TYPE_BINARY_DATA)) {
|
|
// XML files may contain special characters and could be
|
|
// misidentified as BINARY_DATA by cli_compare_ftm_file()
|
|
|
|
// Reassign type of current layer based on what we discovered
|
|
if (CL_SUCCESS != (ret = cli_recursion_stack_change_type(ctx, fpt->type, true))) {
|
|
cli_dbgmsg("Call to cli_recursion_stack_change_type() returned %s \n", cl_strerror(ret));
|
|
type_has_been_handled = false;
|
|
} else {
|
|
cli_dbgmsg("XML-WORD signature found at %u\n", (unsigned int)fpt->offset);
|
|
nret = ret = cli_scanmsxml(ctx);
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
case CL_TYPE_XML_XL:
|
|
if (SCAN_PARSE_XMLDOCS && (DCONF_DOC & DOC_CONF_MSXML)) {
|
|
if ((ctx->recursion_stack[ctx->recursion_level].type >= CL_TYPE_TEXT_ASCII) &&
|
|
(ctx->recursion_stack[ctx->recursion_level].type <= CL_TYPE_BINARY_DATA)) {
|
|
// XML files may contain special characters and could be
|
|
// misidentified as BINARY_DATA by cli_compare_ftm_file()
|
|
|
|
// Reassign type of current layer based on what we discovered
|
|
if (CL_SUCCESS != (ret = cli_recursion_stack_change_type(ctx, fpt->type, true))) {
|
|
cli_dbgmsg("Call to cli_recursion_stack_change_type() returned %s \n", cl_strerror(ret));
|
|
type_has_been_handled = false;
|
|
} else {
|
|
cli_dbgmsg("XML-XL signature found at %u\n", (unsigned int)fpt->offset);
|
|
nret = ret = cli_scanmsxml(ctx);
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
case CL_TYPE_XML_HWP:
|
|
if (SCAN_PARSE_XMLDOCS && (DCONF_DOC & DOC_CONF_HWP)) {
|
|
if ((ctx->recursion_stack[ctx->recursion_level].type >= CL_TYPE_TEXT_ASCII) &&
|
|
(ctx->recursion_stack[ctx->recursion_level].type <= CL_TYPE_BINARY_DATA)) {
|
|
// XML files may contain special characters and could be
|
|
// misidentified as BINARY_DATA by cli_compare_ftm_file()
|
|
|
|
// Reassign type of current layer based on what we discovered
|
|
if (CL_SUCCESS != (ret = cli_recursion_stack_change_type(ctx, fpt->type, true))) {
|
|
cli_dbgmsg("Call to cli_recursion_stack_change_type() returned %s \n", cl_strerror(ret));
|
|
type_has_been_handled = false;
|
|
} else {
|
|
cli_dbgmsg("XML-HWP signature found at %u\n", (unsigned int)fpt->offset);
|
|
nret = ret = cli_scanhwpml(ctx);
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
|
|
case CL_TYPE_DMG:
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_DMG)) {
|
|
// TODO: determine all types that DMG may start with
|
|
// if ((ctx->recursion_stack[ctx->recursion_level].type == CL_TYPE_BZIP2) || ...))
|
|
{
|
|
// Reassign type of current layer based on what we discovered
|
|
if (CL_SUCCESS != (ret = cli_recursion_stack_change_type(ctx, fpt->type, true))) {
|
|
cli_dbgmsg("Call to cli_recursion_stack_change_type() returned %s \n", cl_strerror(ret));
|
|
type_has_been_handled = false;
|
|
} else {
|
|
cli_dbgmsg("DMG signature found at %u\n", (unsigned int)fpt->offset);
|
|
nret = cli_scandmg(ctx);
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
|
|
case CL_TYPE_ISO9660:
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_ISO9660)) {
|
|
// TODO: determine all types that ISO9660 may start with
|
|
// if ((ctx->recursion_stack[ctx->recursion_level].type == CL_TYPE_ANY) || ...))
|
|
{
|
|
// Reassign type of current layer based on what we discovered
|
|
if (CL_SUCCESS != (ret = cli_recursion_stack_change_type(ctx, fpt->type, true))) {
|
|
cli_dbgmsg("Call to cli_recursion_stack_change_type() returned %s \n", cl_strerror(ret));
|
|
type_has_been_handled = false;
|
|
} else {
|
|
cli_dbgmsg("ISO signature found at %u\n", (unsigned int)fpt->offset);
|
|
nret = cli_scaniso(ctx, fpt->offset);
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
|
|
case CL_TYPE_UDF:
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_UDF)) {
|
|
{
|
|
// Reassign type of current layer based on what we discovered
|
|
if (CL_SUCCESS != (ret = cli_recursion_stack_change_type(ctx, fpt->type, true))) {
|
|
cli_dbgmsg("Call to cli_recursion_stack_change_type() returned %s \n", cl_strerror(ret));
|
|
type_has_been_handled = false;
|
|
} else {
|
|
cli_dbgmsg("UDF signature found at %u\n", (unsigned int)fpt->offset);
|
|
nret = cli_scanudf(ctx, fpt->offset);
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
|
|
case CL_TYPE_MBR:
|
|
if (SCAN_PARSE_ARCHIVE) {
|
|
// TODO: determine all types that GPT or MBR may start with
|
|
// if ((ctx->recursion_stack[ctx->recursion_level].type == CL_TYPE_???) || ...))
|
|
{
|
|
// First check if actually a GPT, not MBR.
|
|
cl_error_t iret = cli_mbr_check2(ctx, 0);
|
|
|
|
if ((iret == CL_TYPE_GPT) && (DCONF_ARCH & ARCH_CONF_GPT)) {
|
|
// Reassign type of current layer based on what we discovered
|
|
if (CL_SUCCESS != (ret = cli_recursion_stack_change_type(ctx, CL_TYPE_GPT, true))) {
|
|
cli_dbgmsg("Call to cli_recursion_stack_change_type() returned %s \n", cl_strerror(ret));
|
|
type_has_been_handled = false;
|
|
} else {
|
|
cli_dbgmsg("Recognized GUID Partition Table file\n");
|
|
cli_dbgmsg("GPT signature found at %u\n", (unsigned int)fpt->offset);
|
|
nret = cli_scangpt(ctx, 0);
|
|
}
|
|
} else if ((iret == CL_SUCCESS) && (DCONF_ARCH & ARCH_CONF_MBR)) {
|
|
// Reassign type of current layer based on what we discovered
|
|
if (CL_SUCCESS != (ret = cli_recursion_stack_change_type(ctx, CL_TYPE_MBR, true))) {
|
|
cli_dbgmsg("Call to cli_recursion_stack_change_type() returned %s \n", cl_strerror(ret));
|
|
type_has_been_handled = false;
|
|
} else {
|
|
cli_dbgmsg("MBR signature found at %u\n", (unsigned int)fpt->offset);
|
|
nret = cli_scanmbr(ctx, 0);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
|
|
default:
|
|
type_has_been_handled = false;
|
|
}
|
|
|
|
if ((CL_EMEM == nret) || ctx->abort_scan) {
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* Only scan embedded files if we are not already in an embedded context.
|
|
* That is, if this or a previous layer was identified with embedded file type recognition, then we do
|
|
* not scan for embedded files again.
|
|
*
|
|
* This restriction will prevent detecting the same embedded content more than once when recursing with
|
|
* embedded file type recognition deeper within the same buffer.
|
|
*
|
|
* This is necessary because we have no way of knowing the length of a file for many formats and cannot
|
|
* prevent a search for embedded files from finding the same embedded content multiple times (like a LOT
|
|
* of times).
|
|
*
|
|
* E.g. if the file is like this:
|
|
*
|
|
* [ data ] [ embedded file ] [ data ] [ embedded file ]
|
|
*
|
|
* The first time we do it we'll find "two" embedded files, like this:
|
|
*
|
|
* Emb. File #1: [ embedded file ] [ data ] [ embedded file ]
|
|
* Emb. File #2: [ embedded file ]
|
|
*
|
|
* We must not scan Emb. File #1 again for embedded files, because it would double-extract Emb. File #2.
|
|
*
|
|
* There is a flaw in this logic, though. Suppose that we actually have:
|
|
*
|
|
* [ data ] [ compressed file w. recognizable magic bytes ]
|
|
*
|
|
* A first pass of the above will again identify "two" embedded files:
|
|
*
|
|
* Emb. File #1: [ compressed archive w. recognizable magic bytes ]
|
|
* Emb. File #2: [ magic bytes ] <- Compressed data/Not real file
|
|
*
|
|
* In this case, the magic bytes of a contained, compressed file is somehow still identifiable despite
|
|
* compression. The result is the Emb. File #2 will fail to be parsed and when we decompress Emb. File
|
|
* #1, then we maybe get something like this:
|
|
*
|
|
* Decompressed: [ data ] [ embedded file ]
|
|
*
|
|
* So if this happened... then we WOULD want to scan the decompressed file for embedded files.
|
|
* The problem is, we have no way of knowing how long embedded files are.
|
|
* We don't know if we have:
|
|
*
|
|
* A. [ data ] [ embedded file ] [ data ] [ embedded file ]
|
|
* or
|
|
* B. [ data ] [ embedded compressed archive w. recognizable magic bytes ]
|
|
* or
|
|
* C. [ data ] [ embedded uncompressed archive w. multiple file entries [ file 1 ] [ file 2 ] [ file 2 ] ]
|
|
*
|
|
* Some ideas for a more accurate solution:
|
|
*
|
|
* 1. Record the offset and size of each file extracted by the parsers.
|
|
* Then, when we do embedded file type recognition, we can check if the offset and size of the
|
|
* embedded file matches the offset and size of a file that was extracted by a parser.
|
|
* This falls apart a little bit for multiple layers of archives unless we also compare offsets within
|
|
* each layer. We could do that, but it would be a lot of work. And we'd probably want to take into
|
|
* consideration if files were decompressed or decrypted. ... I don't know a clean solution.
|
|
*
|
|
* 2. Have all parsers to run before embedded file type recognition and they each determine the length
|
|
* of the file they parsed, so we can differentiate between embedded files and appended files.
|
|
* For appended files, we would know they weren't extracted by a parser module and the parser for
|
|
* each of those would report the length of the file it parsed so we can use that to mitigate
|
|
* overlapping embedded file type recognition.
|
|
* But I highly doubt all file types can be parsed to determine the correct length of the file.
|
|
*/
|
|
for (i = ctx->recursion_level; i > 0; i--) {
|
|
if (ctx->recursion_stack[i].attributes & LAYER_ATTRIBUTES_EMBEDDED) {
|
|
// Found an ancestor that was embedded.
|
|
// Do not scan embedded files again.
|
|
ancestor_was_embedded = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Next, check for actual embedded files.
|
|
*/
|
|
if ((false == ancestor_was_embedded) &&
|
|
(false == type_has_been_handled)) {
|
|
cli_dbgmsg("%s signature found at %u\n", cli_ftname(fpt->type), (unsigned int)fpt->offset);
|
|
|
|
type_has_been_handled = true;
|
|
|
|
switch (fpt->type) {
|
|
case CL_TYPE_RARSFX:
|
|
if ((have_rar && SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_RAR)) &&
|
|
(type != CL_TYPE_RAR)) {
|
|
// TODO: Add header validity check to prevent false positives from being scanned.
|
|
nret = cli_magic_scan_nested_fmap_type(
|
|
ctx->fmap,
|
|
fpt->offset,
|
|
ctx->fmap->len - fpt->offset,
|
|
ctx,
|
|
CL_TYPE_RAR,
|
|
NULL,
|
|
LAYER_ATTRIBUTES_EMBEDDED);
|
|
}
|
|
break;
|
|
|
|
case CL_TYPE_EGGSFX:
|
|
if ((SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_EGG)) &&
|
|
(type != CL_TYPE_EGG)) {
|
|
// TODO: Add header validity check to prevent false positives from being scanned.
|
|
nret = cli_magic_scan_nested_fmap_type(
|
|
ctx->fmap,
|
|
fpt->offset,
|
|
ctx->fmap->len - fpt->offset,
|
|
ctx,
|
|
CL_TYPE_EGG,
|
|
NULL,
|
|
LAYER_ATTRIBUTES_EMBEDDED);
|
|
}
|
|
break;
|
|
|
|
case CL_TYPE_ZIPSFX:
|
|
if ((SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_ZIP)) &&
|
|
(type != CL_TYPE_ZIP) &&
|
|
/* OOXML are ZIP-based. */
|
|
(type != CL_TYPE_OOXML_WORD) &&
|
|
(type != CL_TYPE_OOXML_PPT) &&
|
|
(type != CL_TYPE_OOXML_XL) &&
|
|
(type != CL_TYPE_OOXML_HWP)) {
|
|
// Header validity check to prevent false positives from being scanned.
|
|
size_t zip_size = 0;
|
|
|
|
ret = cli_unzip_single_header_check(ctx, fpt->offset, &zip_size);
|
|
if (ret != CL_SUCCESS) {
|
|
cli_dbgmsg("ZIP single header check failed: %s (%d)\n", cl_strerror(ret), ret);
|
|
break;
|
|
}
|
|
|
|
nret = cli_magic_scan_nested_fmap_type(
|
|
ctx->fmap,
|
|
fpt->offset,
|
|
zip_size,
|
|
ctx,
|
|
CL_TYPE_ZIP,
|
|
NULL,
|
|
LAYER_ATTRIBUTES_EMBEDDED);
|
|
}
|
|
break;
|
|
|
|
case CL_TYPE_CABSFX:
|
|
if ((SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_CAB)) &&
|
|
(type != CL_TYPE_MSCAB)) {
|
|
// Header validity check to prevent false positives from being scanned.
|
|
size_t cab_size = 0;
|
|
ret = cli_mscab_header_check(ctx, fpt->offset, &cab_size);
|
|
if (ret != CL_SUCCESS) {
|
|
cli_dbgmsg("CAB header check failed: %s (%d)\n", cl_strerror(ret), ret);
|
|
break;
|
|
}
|
|
|
|
nret = cli_magic_scan_nested_fmap_type(
|
|
ctx->fmap,
|
|
fpt->offset,
|
|
cab_size,
|
|
ctx,
|
|
CL_TYPE_MSCAB,
|
|
NULL,
|
|
LAYER_ATTRIBUTES_EMBEDDED);
|
|
}
|
|
break;
|
|
|
|
case CL_TYPE_ARJSFX:
|
|
if ((SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_ARJ)) &&
|
|
(type != CL_TYPE_ARJ)) {
|
|
// Header validity check to prevent false positives from being scanned.
|
|
size_t arj_size = 0;
|
|
|
|
ret = cli_unarj_header_check(ctx, fpt->offset, &arj_size);
|
|
if (ret != CL_SUCCESS) {
|
|
cli_dbgmsg("ARJ header check failed: %s (%d)\n", cl_strerror(ret), ret);
|
|
break;
|
|
}
|
|
|
|
nret = cli_magic_scan_nested_fmap_type(
|
|
ctx->fmap,
|
|
fpt->offset,
|
|
arj_size,
|
|
ctx,
|
|
CL_TYPE_ARJ,
|
|
NULL,
|
|
LAYER_ATTRIBUTES_EMBEDDED);
|
|
}
|
|
break;
|
|
|
|
case CL_TYPE_7ZSFX:
|
|
if ((SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_7Z)) &&
|
|
(type != CL_TYPE_7Z)) {
|
|
// TODO: Add header validity check to prevent false positives from being scanned.
|
|
nret = cli_magic_scan_nested_fmap_type(
|
|
ctx->fmap,
|
|
fpt->offset,
|
|
ctx->fmap->len - fpt->offset,
|
|
ctx,
|
|
CL_TYPE_7Z,
|
|
NULL,
|
|
LAYER_ATTRIBUTES_EMBEDDED);
|
|
}
|
|
break;
|
|
|
|
case CL_TYPE_NULSFT:
|
|
// Note: CL_TYPE_NULSFT is special, because the file actually starts 4 bytes before the start of the signature match
|
|
if ((SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_NSIS)) &&
|
|
(type == CL_TYPE_MSEXE && fpt->offset > 4)) {
|
|
// TODO: Add header validity check to prevent false positives from being scanned.
|
|
nret = cli_magic_scan_nested_fmap_type(
|
|
ctx->fmap,
|
|
fpt->offset - 4,
|
|
ctx->fmap->len - (fpt->offset - 4),
|
|
ctx,
|
|
CL_TYPE_NULSFT,
|
|
NULL,
|
|
LAYER_ATTRIBUTES_EMBEDDED);
|
|
}
|
|
break;
|
|
|
|
case CL_TYPE_AUTOIT:
|
|
if ((SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_AUTOIT)) &&
|
|
(type == CL_TYPE_MSEXE)) {
|
|
// TODO: Add header validity check to prevent false positives from being scanned.
|
|
nret = cli_magic_scan_nested_fmap_type(
|
|
ctx->fmap,
|
|
fpt->offset,
|
|
ctx->fmap->len - fpt->offset,
|
|
ctx,
|
|
CL_TYPE_AUTOIT,
|
|
NULL,
|
|
LAYER_ATTRIBUTES_EMBEDDED);
|
|
}
|
|
break;
|
|
|
|
case CL_TYPE_ISHIELD_MSI:
|
|
if ((SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_ISHIELD)) &&
|
|
(type == CL_TYPE_MSEXE)) {
|
|
// TODO: Add header validity check to prevent false positives from being scanned.
|
|
nret = cli_magic_scan_nested_fmap_type(
|
|
ctx->fmap,
|
|
fpt->offset,
|
|
ctx->fmap->len - fpt->offset,
|
|
ctx,
|
|
CL_TYPE_ISHIELD_MSI,
|
|
NULL,
|
|
LAYER_ATTRIBUTES_EMBEDDED);
|
|
}
|
|
break;
|
|
|
|
case CL_TYPE_PDF:
|
|
if ((SCAN_PARSE_PDF && (DCONF_DOC & DOC_CONF_PDF)) &&
|
|
(type != CL_TYPE_PDF)) {
|
|
// TODO: Add header validity check to prevent false positives from being scanned.
|
|
nret = cli_magic_scan_nested_fmap_type(
|
|
ctx->fmap,
|
|
fpt->offset,
|
|
ctx->fmap->len - fpt->offset,
|
|
ctx,
|
|
CL_TYPE_PDF,
|
|
NULL,
|
|
LAYER_ATTRIBUTES_EMBEDDED);
|
|
}
|
|
break;
|
|
|
|
case CL_TYPE_MSEXE:
|
|
if (SCAN_PARSE_PE && ctx->dconf->pe &&
|
|
(type == CL_TYPE_MSEXE || type == CL_TYPE_ZIP || type == CL_TYPE_MSOLE2)) {
|
|
struct cli_exe_info peinfo;
|
|
|
|
if ((uint64_t)(ctx->fmap->len - fpt->offset) > ctx->engine->maxembeddedpe) {
|
|
cli_dbgmsg("scanraw: MaxEmbeddedPE exceeded\n");
|
|
break;
|
|
}
|
|
|
|
cli_exe_info_init(&peinfo, 0);
|
|
|
|
// Header validity check to prevent false positives from being scanned.
|
|
ret = cli_peheader(ctx->fmap, &peinfo, CLI_PEHEADER_OPT_NONE, NULL);
|
|
|
|
// peinfo memory may have been allocated and must be freed even if it failed.
|
|
cli_exe_info_destroy(&peinfo);
|
|
|
|
if (CL_SUCCESS != ret) {
|
|
cli_dbgmsg("Header check for MSEXE detection failed, probably not actually an embedded PE file.\n");
|
|
break;
|
|
}
|
|
|
|
cli_dbgmsg("*** Detected embedded PE file at %u ***\n", (unsigned int)fpt->offset);
|
|
|
|
// Setting ctx->corrupted_input will prevent the PE parser from reporting "broken executable" for unpacked/reconstructed files that may not be 100% to spec.
|
|
// In here we're just carrying the corrupted_input flag from parent to child, in case the parent's flag was set.
|
|
unsigned int corrupted_input = ctx->corrupted_input;
|
|
|
|
ctx->corrupted_input = 1;
|
|
|
|
nret = cli_magic_scan_nested_fmap_type(
|
|
ctx->fmap,
|
|
fpt->offset,
|
|
// Sadly, there is no way from the PE header to determine the length of the PE file.
|
|
// So we just pass the remaining length of the fmap.
|
|
ctx->fmap->len - fpt->offset,
|
|
ctx,
|
|
CL_TYPE_MSEXE,
|
|
NULL,
|
|
LAYER_ATTRIBUTES_EMBEDDED);
|
|
|
|
ctx->corrupted_input = corrupted_input;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
type_has_been_handled = false;
|
|
cli_dbgmsg("scanraw: Type %u not handled in fpt loop\n", fpt->type);
|
|
}
|
|
|
|
} // end check for embedded files
|
|
|
|
} // end if (fpt->offset > 0)
|
|
|
|
if ((nret == CL_EMEM) ||
|
|
(ctx->abort_scan)) {
|
|
break;
|
|
}
|
|
|
|
fpt = fpt->next;
|
|
} // end while (fpt) loop
|
|
|
|
if (!((nret == CL_EMEM) || (ctx->abort_scan))) {
|
|
/*
|
|
* Now run the other file type parsers that may rely on file type
|
|
* recognition to determine the actual file type.
|
|
*/
|
|
switch (found_type) {
|
|
case CL_TYPE_HTML:
|
|
if (cli_recursion_stack_get_type(ctx, -2) == CL_TYPE_AUTOIT) {
|
|
/* bb#11196 - autoit script file misclassified as HTML */
|
|
ret = CL_TYPE_TEXT_ASCII;
|
|
} else if (SCAN_PARSE_HTML &&
|
|
(type == CL_TYPE_TEXT_ASCII ||
|
|
type == CL_TYPE_GIF) && /* Scan GIFs for embedded HTML/Javascript */
|
|
(DCONF_DOC & DOC_CONF_HTML)) {
|
|
*dettype = CL_TYPE_HTML;
|
|
if (CL_SUCCESS != (ret = cli_recursion_stack_change_type(ctx, CL_TYPE_HTML, true))) {
|
|
cli_dbgmsg("Call to cli_recursion_stack_change_type() returned %s \n", cl_strerror(ret));
|
|
} else {
|
|
nret = cli_scanhtml(ctx);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case CL_TYPE_MAIL:
|
|
if (SCAN_PARSE_MAIL && type == CL_TYPE_TEXT_ASCII && (DCONF_MAIL & MAIL_CONF_MBOX)) {
|
|
*dettype = CL_TYPE_MAIL;
|
|
if (CL_SUCCESS != (ret = cli_recursion_stack_change_type(ctx, CL_TYPE_MAIL, true))) {
|
|
cli_dbgmsg("Call to cli_recursion_stack_change_type() returned %s \n", cl_strerror(ret));
|
|
} else {
|
|
nret = cli_scanmail(ctx);
|
|
}
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
perf_nested_stop(ctx, PERFT_RAWTYPENO, PERFT_SCAN);
|
|
ret = nret;
|
|
} // end if (ret >= CL_TYPENO)
|
|
|
|
while (ftoffset) {
|
|
fpt = ftoffset;
|
|
ftoffset = ftoffset->next;
|
|
free(fpt);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
void emax_reached(cli_ctx *ctx)
|
|
{
|
|
int32_t stack_index;
|
|
|
|
if (NULL == ctx || NULL == ctx->recursion_stack) {
|
|
return;
|
|
}
|
|
|
|
stack_index = (int32_t)ctx->recursion_level;
|
|
|
|
while (stack_index >= 0) {
|
|
fmap_t *map = ctx->recursion_stack[stack_index].fmap;
|
|
|
|
if (NULL != map) {
|
|
map->dont_cache_flag = true;
|
|
}
|
|
|
|
stack_index -= 1;
|
|
}
|
|
|
|
cli_dbgmsg("emax_reached: marked parents as non cacheable\n");
|
|
}
|
|
|
|
#define LINESTR(x) #x
|
|
#define LINESTR2(x) LINESTR(x)
|
|
#define __AT__ " at line " LINESTR2(__LINE__)
|
|
|
|
/**
|
|
* @brief Provide the following to the calling application for each embedded file:
|
|
* - name of parent file
|
|
* - size of parent file
|
|
* - name of current file
|
|
* - size of current file
|
|
* - pointer to the current file data
|
|
*
|
|
* @param cb
|
|
* @param ctx
|
|
* @param filetype
|
|
* @return cl_error_t
|
|
*/
|
|
static cl_error_t dispatch_file_inspection_callback(clcb_file_inspection cb, cli_ctx *ctx, const char *filetype)
|
|
{
|
|
cl_error_t status = CL_SUCCESS;
|
|
cl_error_t append_ret;
|
|
|
|
int fd = -1;
|
|
uint32_t fmap_index = ctx->recursion_level; /* index of current file */
|
|
|
|
cl_fmap_t *fmap = NULL;
|
|
const char *file_name = NULL;
|
|
size_t file_size = 0;
|
|
const char *file_buffer = NULL;
|
|
const char **ancestors = NULL;
|
|
|
|
size_t parent_file_size = 0;
|
|
|
|
if (NULL == cb) {
|
|
// Callback is not set.
|
|
goto done;
|
|
}
|
|
|
|
fmap = ctx->recursion_stack[fmap_index].fmap;
|
|
fd = fmap_fd(fmap);
|
|
|
|
CLI_MAX_CALLOC_OR_GOTO_DONE(ancestors, ctx->recursion_level + 1, sizeof(char *), status = CL_EMEM);
|
|
|
|
file_name = fmap->name;
|
|
file_buffer = fmap_need_off_once_len(fmap, 0, fmap->len, &file_size);
|
|
|
|
while (fmap_index > 0) {
|
|
cl_fmap_t *previous_fmap;
|
|
|
|
fmap_index -= 1;
|
|
previous_fmap = ctx->recursion_stack[fmap_index].fmap;
|
|
|
|
if (ctx->recursion_level > 0 && (fmap_index == ctx->recursion_level - 1)) {
|
|
parent_file_size = previous_fmap->len;
|
|
}
|
|
|
|
ancestors[fmap_index] = previous_fmap->name;
|
|
}
|
|
|
|
perf_start(ctx, PERFT_INSPECT);
|
|
status = cb(fd, filetype, ancestors, parent_file_size, file_name, file_size, file_buffer,
|
|
ctx->recursion_level, ctx->recursion_stack[ctx->recursion_level].attributes, ctx->cb_ctx);
|
|
perf_stop(ctx, PERFT_INSPECT);
|
|
|
|
switch (status) {
|
|
case CL_BREAK:
|
|
cli_dbgmsg("dispatch_file_inspection_callback: file trusted by callback\n");
|
|
|
|
// Remove any evidence for this layer and set the verdict to trusted.
|
|
(void)cli_trust_this_layer(ctx, "legacy file-inspection application callback");
|
|
|
|
break;
|
|
case CL_VIRUS:
|
|
cli_dbgmsg("dispatch_file_inspection_callback: file blocked by callback\n");
|
|
append_ret = cli_append_virus(ctx, "Detected.By.Callback.Inspection");
|
|
if (append_ret == CL_VIRUS) {
|
|
status = CL_VIRUS;
|
|
}
|
|
break;
|
|
case CL_SUCCESS:
|
|
// No action requested by callback. Keep scanning.
|
|
break;
|
|
default:
|
|
status = CL_SUCCESS;
|
|
cli_warnmsg("dispatch_file_inspection_callback: ignoring bad return code from callback\n");
|
|
}
|
|
|
|
done:
|
|
|
|
CLI_FREE_AND_SET_NULL(ancestors);
|
|
return status;
|
|
}
|
|
|
|
static cl_error_t dispatch_prescan_callback(clcb_pre_scan cb, cli_ctx *ctx, const char *filetype, bool pre_cache)
|
|
{
|
|
cl_error_t status = CL_SUCCESS;
|
|
cl_error_t append_ret;
|
|
|
|
if (cb) {
|
|
perf_start(ctx, PERFT_PRECB);
|
|
status = cb(fmap_fd(ctx->fmap), filetype, ctx->cb_ctx);
|
|
perf_stop(ctx, PERFT_PRECB);
|
|
|
|
switch (status) {
|
|
case CL_BREAK: {
|
|
const char *source = pre_cache ? "legacy pre-cache application callback"
|
|
: "legacy pre-scan application callback";
|
|
|
|
cli_dbgmsg("dispatch_prescan_callback: file allowed by callback\n");
|
|
|
|
// Remove any evidence for this layer and set the verdict to trusted.
|
|
(void)cli_trust_this_layer(ctx, source);
|
|
|
|
status = CL_VERIFIED;
|
|
} break;
|
|
case CL_VIRUS: {
|
|
const char *alert_name = pre_cache ? "Detected.By.Callback.PreCache"
|
|
: "Detected.By.Callback.PreScan";
|
|
|
|
cli_dbgmsg("dispatch_prescan_callback: file blocked by callback\n");
|
|
|
|
append_ret = cli_append_virus(ctx, alert_name);
|
|
if (append_ret == CL_VIRUS) {
|
|
status = CL_VIRUS;
|
|
}
|
|
} break;
|
|
case CL_SUCCESS:
|
|
// No action requested by callback. Keep scanning.
|
|
break;
|
|
default:
|
|
status = CL_SUCCESS;
|
|
cli_warnmsg("dispatch_prescan_callback: ignoring bad return code from callback\n");
|
|
}
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
static cl_error_t calculate_fuzzy_image_hash(cli_ctx *ctx, cli_file_t type)
|
|
{
|
|
cl_error_t status = CL_EPARSE;
|
|
const uint8_t *offset = NULL;
|
|
image_fuzzy_hash_t hash = {0};
|
|
json_object *header = NULL;
|
|
|
|
FFIError *fuzzy_hash_calc_error = NULL;
|
|
|
|
offset = fmap_need_off(ctx->fmap, 0, ctx->fmap->real_len);
|
|
|
|
if (SCAN_COLLECT_METADATA && (NULL != ctx->this_layer_metadata_json)) {
|
|
if (NULL == (header = cli_jsonobj(ctx->this_layer_metadata_json, "ImageFuzzyHash"))) {
|
|
cli_errmsg("Failed to allocate ImageFuzzyHash JSON object\n");
|
|
status = CL_EMEM;
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
if (!fuzzy_hash_calculate_image(offset, ctx->fmap->real_len, hash.hash, 8, &fuzzy_hash_calc_error)) {
|
|
cli_dbgmsg("Failed to calculate image fuzzy hash for %s: %s\n",
|
|
cli_ftname(type),
|
|
ffierror_fmt(fuzzy_hash_calc_error));
|
|
|
|
if (SCAN_COLLECT_METADATA && (NULL != header)) {
|
|
(void)cli_jsonstr(header, "Error", ffierror_fmt(fuzzy_hash_calc_error));
|
|
}
|
|
|
|
goto done;
|
|
}
|
|
|
|
if (SCAN_COLLECT_METADATA && (NULL != header)) {
|
|
char hashstr[17];
|
|
snprintf(hashstr, 17, "%02x%02x%02x%02x%02x%02x%02x%02x",
|
|
hash.hash[0], hash.hash[1], hash.hash[2], hash.hash[3],
|
|
hash.hash[4], hash.hash[5], hash.hash[6], hash.hash[7]);
|
|
(void)cli_jsonstr(header, "Hash", hashstr);
|
|
}
|
|
|
|
ctx->recursion_stack[ctx->recursion_level].image_fuzzy_hash = hash;
|
|
ctx->recursion_stack[ctx->recursion_level].calculated_image_fuzzy_hash = true;
|
|
|
|
status = CL_SUCCESS;
|
|
|
|
done:
|
|
if (NULL != fuzzy_hash_calc_error) {
|
|
ffierror_free(fuzzy_hash_calc_error);
|
|
}
|
|
return status;
|
|
}
|
|
|
|
/**
|
|
* @brief A unified list of reasons why a scan result inside the magic_scan function
|
|
* should goto done instead of continuing to parse/scan this layer.
|
|
*
|
|
* These are not reasons why the scan should abort entirely. For that, just check ctx->abort_scan.
|
|
*
|
|
* @param ctx The scan context.
|
|
* @param result_in The result to compare.
|
|
* @param result_out The result that magic_scan should return.
|
|
* @return true We found a reason to goto done.
|
|
* @return false The scan must go on.
|
|
*/
|
|
static inline bool result_should_goto_done(cli_ctx *ctx, cl_error_t result_in, cl_error_t *result_out)
|
|
{
|
|
bool halt_scan = false;
|
|
|
|
if (NULL == ctx || NULL == result_out) {
|
|
cli_dbgmsg("Invalid arguments for file scan result check.\n");
|
|
halt_scan = true;
|
|
goto done;
|
|
}
|
|
|
|
if (NULL != ctx && ctx->abort_scan) {
|
|
// ensure abort_scan is respected
|
|
halt_scan = true;
|
|
}
|
|
|
|
switch (result_in) {
|
|
/*
|
|
* Reasons to halt the scan and report the error up to the caller/user.
|
|
*/
|
|
|
|
// A virus result means we should halt the scan.
|
|
// We do not return CL_VIRUS in allmatch-mode until the very end.
|
|
case CL_VIRUS:
|
|
|
|
// Each of these error codes considered terminal and will halt the scan.
|
|
case CL_EUNLINK:
|
|
case CL_ESTAT:
|
|
case CL_ESEEK:
|
|
case CL_EWRITE:
|
|
case CL_EDUP:
|
|
case CL_ETMPFILE:
|
|
case CL_ETMPDIR:
|
|
case CL_EMEM:
|
|
cli_dbgmsg("Descriptor[%d]: halting after file scan because: %s\n", fmap_fd(ctx->fmap), cl_strerror(result_in));
|
|
halt_scan = true;
|
|
*result_out = result_in;
|
|
break;
|
|
|
|
/*
|
|
* Reasons to halt the scan but report a successful scan.
|
|
*/
|
|
|
|
// Exceeding the time limit should definitely halt the scan.
|
|
// But unless the user enabled alert-exceeds-max, we don't want to complain about it.
|
|
case CL_ETIMEOUT:
|
|
|
|
// If the file was determined to be trusted, then we can stop scanning this layer. (Ex: EXE with a valid Authenticode sig.)
|
|
// Convert CL_VERIFIED to CL_SUCCESS because we don't want to propagate the CL_VERIFIED return code up to the caller.
|
|
// If we didn't, a trusted file could cause a larger archive containing non-trustworthy files to be trusted.
|
|
case CL_VERIFIED:
|
|
cli_dbgmsg("Descriptor[%d]: halting after file scan because: %s\n", fmap_fd(ctx->fmap), cl_strerror(result_in));
|
|
halt_scan = true;
|
|
*result_out = CL_SUCCESS;
|
|
break;
|
|
|
|
/*
|
|
* All other results must not halt the scan.
|
|
*/
|
|
|
|
// Nothing to do.
|
|
case CL_SUCCESS:
|
|
|
|
// Unless ctx->abort_scan was set, all these "MAX" conditions should finish scanning as much as is allowed.
|
|
// That is, the can may still be blocked from recursing into the next layer, or scanning new files or large files.
|
|
case CL_EMAXREC:
|
|
case CL_EMAXSIZE:
|
|
case CL_EMAXFILES:
|
|
|
|
// The following are explicitly listed here so you think twice before putting them in the scan-halt list, above.
|
|
// Malformed/truncated files could report as any of these three, and that's fine.
|
|
// See commit 087e7fc3fa923e5d6a6fd2efe8df852a36256b5b for additional details.
|
|
case CL_EFORMAT:
|
|
case CL_EPARSE:
|
|
case CL_EREAD:
|
|
case CL_EUNPACK:
|
|
|
|
default:
|
|
cli_dbgmsg("Descriptor[%d]: Continuing after file scan resulted with: %s\n",
|
|
fmap_fd(ctx->fmap), cl_strerror(result_in));
|
|
*result_out = CL_SUCCESS;
|
|
}
|
|
|
|
done:
|
|
return halt_scan;
|
|
}
|
|
|
|
cl_error_t cli_magic_scan(cli_ctx *ctx, cli_file_t type)
|
|
{
|
|
cl_error_t status = CL_SUCCESS;
|
|
cl_error_t ret;
|
|
|
|
cl_error_t cache_check_result = CL_VIRUS;
|
|
cl_verdict_t verdict_at_this_level = CL_VERDICT_NOTHING_FOUND;
|
|
|
|
bool cache_enabled = true;
|
|
cli_file_t dettype = CL_TYPE_ANY;
|
|
uint8_t typercg = 1;
|
|
bitset_t *old_hook_lsig_matches = NULL;
|
|
const char *filetype;
|
|
|
|
if (!ctx->engine) {
|
|
cli_errmsg("CRITICAL: engine == NULL\n");
|
|
status = CL_ENULLARG;
|
|
goto early_ret;
|
|
}
|
|
|
|
if (!(ctx->engine->dboptions & CL_DB_COMPILED)) {
|
|
cli_errmsg("CRITICAL: engine not compiled\n");
|
|
status = CL_EMALFDB;
|
|
goto early_ret;
|
|
}
|
|
|
|
if (ctx->fmap->len <= 5) {
|
|
status = CL_SUCCESS;
|
|
cli_dbgmsg("cli_magic_scan: File is too small (%zu bytes), ignoring.\n", ctx->fmap->len);
|
|
goto early_ret;
|
|
}
|
|
|
|
if (cli_updatelimits(ctx, ctx->fmap->len) != CL_SUCCESS) {
|
|
emax_reached(ctx);
|
|
status = CL_SUCCESS;
|
|
cli_dbgmsg("cli_magic_scan: returning %d %s (no post, no cache)\n", status, __AT__);
|
|
goto early_ret;
|
|
}
|
|
|
|
if (type == CL_TYPE_PART_ANY) {
|
|
typercg = 0;
|
|
}
|
|
|
|
/*
|
|
* Determine if caching is enabled.
|
|
* The application may have specifically disabled caching. Also, if the application never loaded any signatures,
|
|
* then the cache will be NULL and caching will also be disabled.
|
|
*/
|
|
if ((ctx->engine->engine_options & ENGINE_OPTIONS_DISABLE_CACHE) ||
|
|
(ctx->engine->cache == NULL)) {
|
|
cache_enabled = false;
|
|
}
|
|
|
|
/*
|
|
* Perform file typing from the start of the file.
|
|
*/
|
|
perf_start(ctx, PERFT_FT);
|
|
if ((type == CL_TYPE_ANY) || type == CL_TYPE_PART_ANY) {
|
|
type = cli_determine_fmap_type(ctx, type);
|
|
}
|
|
perf_stop(ctx, PERFT_FT);
|
|
if (type == CL_TYPE_ERROR) {
|
|
status = CL_EREAD;
|
|
cli_dbgmsg("cli_magic_scan: cli_determine_fmap_type returned CL_TYPE_ERROR\n");
|
|
cli_dbgmsg("cli_magic_scan: returning %d %s (no post, no cache)\n", status, __AT__);
|
|
goto early_ret;
|
|
}
|
|
filetype = cli_ftname(type);
|
|
|
|
/* set current layer to the type we found */
|
|
ret = cli_recursion_stack_change_type(ctx, type, true /* ? */);
|
|
if (CL_SUCCESS != ret) {
|
|
cli_dbgmsg("cli_magic_scan: cli_recursion_stack_change_type returned %d\n", ret);
|
|
// We must go to done here (and not early_ret), because `ret` needs to be tidied up before returning.
|
|
status = ret;
|
|
goto done;
|
|
}
|
|
|
|
/*
|
|
* Run the pre_hash callback.
|
|
*/
|
|
ret = cli_dispatch_scan_callback(ctx, CL_SCAN_CALLBACK_PRE_HASH);
|
|
if (CL_SUCCESS != ret) {
|
|
status = ret;
|
|
goto done;
|
|
}
|
|
|
|
/*
|
|
* Run the deprecated pre_cache callback.
|
|
*/
|
|
ret = dispatch_prescan_callback(ctx->engine->cb_pre_cache, ctx, filetype, true /* pre_cache */);
|
|
if (CL_VERIFIED == ret || CL_VIRUS == ret) {
|
|
status = ret;
|
|
goto done;
|
|
}
|
|
|
|
/*
|
|
* Run the deprecated file_inspection callback.
|
|
*/
|
|
ret = dispatch_file_inspection_callback(ctx->engine->cb_file_inspection, ctx, filetype);
|
|
if (CL_SUCCESS != ret) {
|
|
status = ret;
|
|
goto done;
|
|
}
|
|
|
|
/*
|
|
* Record the file hash(es) in the JSON metadata before we do the cache check.
|
|
*/
|
|
if (SCAN_COLLECT_METADATA) {
|
|
uint8_t *hash = NULL;
|
|
char hash_string[SHA256_HASH_SIZE * 2 + 1];
|
|
bool need_hash[CLI_HASH_AVAIL_TYPES] = {false};
|
|
cli_hash_type_t hash_type;
|
|
|
|
need_hash[CLI_HASH_SHA2_256] = true;
|
|
if (SCAN_COLLECT_METADATA && SCAN_STORE_EXTRA_HASHES) {
|
|
need_hash[CLI_HASH_MD5] = true;
|
|
need_hash[CLI_HASH_SHA1] = true;
|
|
}
|
|
|
|
/* Set fmap to need hash later if required.
|
|
* This is an optimization so we can calculate all needed hashes in one pass. */
|
|
for (hash_type = CLI_HASH_MD5; hash_type < CLI_HASH_AVAIL_TYPES; hash_type++) {
|
|
if (need_hash[hash_type]) {
|
|
ret = fmap_will_need_hash_later(ctx->fmap, hash_type);
|
|
if (CL_SUCCESS != ret) {
|
|
cli_dbgmsg("cli_magic_scan: Failed to set fmap to need the %s hash later\n", cli_hash_name(hash_type));
|
|
status = ret;
|
|
goto done;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (hash_type = CLI_HASH_MD5; hash_type < CLI_HASH_AVAIL_TYPES; hash_type++) {
|
|
if (need_hash[hash_type]) {
|
|
size_t i;
|
|
size_t hash_len = cli_hash_len(hash_type);
|
|
|
|
/* If we need a hash, we will calculate it now */
|
|
ret = fmap_get_hash(ctx->fmap, &hash, hash_type);
|
|
if (CL_SUCCESS != ret || hash == NULL) {
|
|
cli_dbgmsg("cli_magic_scan: Failed to get a hash for the current fmap.\n");
|
|
// It may be that the file was truncated between the time we started the scan and the time we got the hash.
|
|
// Not a reason to print an error message.
|
|
status = CL_SUCCESS;
|
|
goto done;
|
|
}
|
|
|
|
/* Convert hash to string */
|
|
for (i = 0; i < hash_len; i++) {
|
|
sprintf(hash_string + i * 2, "%02x", hash[i]);
|
|
}
|
|
hash_string[hash_len * 2] = 0;
|
|
|
|
ret = cli_jsonstr(ctx->this_layer_metadata_json, cli_hash_name(hash_type), hash_string);
|
|
if (ret != CL_SUCCESS) {
|
|
cli_dbgmsg("cli_magic_scan: Failed to store the %s hash in the metadata JSON.\n", cli_hash_name(hash_type));
|
|
status = ret;
|
|
goto done;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Check if we've already scanned this file before.
|
|
*/
|
|
if (cache_enabled) {
|
|
perf_start(ctx, PERFT_CACHE);
|
|
cache_check_result = clean_cache_check(ctx);
|
|
perf_stop(ctx, PERFT_CACHE);
|
|
}
|
|
|
|
if (cache_enabled && (cache_check_result != CL_VIRUS)) {
|
|
status = CL_SUCCESS;
|
|
cli_dbgmsg("cli_magic_scan: returning %d %s (no post, no cache)\n", status, __AT__);
|
|
// We can go to early_ret here, because we know status is CL_SUCCESS, and we obviously add to the cache.
|
|
// This does mean, however, that we do not run the post-scan callback for layers that are cached.
|
|
goto early_ret;
|
|
}
|
|
|
|
/* Save off the hook_lsig_matches */
|
|
old_hook_lsig_matches = ctx->hook_lsig_matches;
|
|
ctx->hook_lsig_matches = NULL;
|
|
|
|
/*
|
|
* Run the pre_scan callback.
|
|
*/
|
|
ret = cli_dispatch_scan_callback(ctx, CL_SCAN_CALLBACK_PRE_SCAN);
|
|
if (CL_SUCCESS != ret) {
|
|
status = ret;
|
|
goto done;
|
|
}
|
|
|
|
/*
|
|
* Run the deprecated pre_scan callback.
|
|
*/
|
|
ret = dispatch_prescan_callback(ctx->engine->cb_pre_scan, ctx, filetype, false /* pre_cache */);
|
|
if (CL_VERIFIED == ret || CL_VIRUS == ret) {
|
|
status = ret;
|
|
goto done;
|
|
}
|
|
|
|
// If none of the scan options are enabled, then we can skip parsing and just do a raw pattern match.
|
|
// For this check, we don't care if the CL_SCAN_GENERAL_ALLMATCHES option is enabled, hence the `~`.
|
|
if (!((ctx->options->general & ~CL_SCAN_GENERAL_ALLMATCHES) || (ctx->options->parse) || (ctx->options->heuristic) || (ctx->options->mail) || (ctx->options->dev))) {
|
|
status = cli_scan_fmap(ctx, CL_TYPE_ANY, false, NULL, AC_SCAN_VIR, NULL);
|
|
// It doesn't matter what was returned, always go to the end after this. Raw mode! No parsing files!
|
|
goto done;
|
|
}
|
|
|
|
// We already saved the hook_lsig_matches (above)
|
|
// The ctx one is NULL at present.
|
|
ctx->hook_lsig_matches = cli_bitset_init();
|
|
if (NULL == ctx->hook_lsig_matches) {
|
|
status = CL_EMEM;
|
|
goto done;
|
|
}
|
|
|
|
if (type != CL_TYPE_IGNORED && ctx->engine->sdb) {
|
|
/*
|
|
* If self protection mechanism enabled, do the scanraw() scan first
|
|
* before extracting with a file type parser.
|
|
*/
|
|
cli_dbgmsg("cli_magic_scan: Performing raw scan to pattern match\n");
|
|
|
|
ret = scanraw(ctx, type, 0, &dettype);
|
|
|
|
// Evaluate the result from the scan to see if it end the scan of this layer early,
|
|
// and to decid if we should propagate an error or not.
|
|
if (result_should_goto_done(ctx, ret, &status)) {
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Run the file type parsers that we normally use before the raw scan.
|
|
*/
|
|
perf_nested_start(ctx, PERFT_CONTAINER, PERFT_SCAN);
|
|
switch (type) {
|
|
case CL_TYPE_IGNORED:
|
|
break;
|
|
|
|
case CL_TYPE_HWP3:
|
|
if (SCAN_PARSE_HWP3 && (DCONF_DOC & DOC_CONF_HWP))
|
|
ret = cli_scanhwp3(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_HWPOLE2:
|
|
if (SCAN_PARSE_OLE2 && (DCONF_ARCH & ARCH_CONF_OLE2))
|
|
ret = cli_scanhwpole2(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_XML_WORD:
|
|
if (SCAN_PARSE_XMLDOCS && (DCONF_DOC & DOC_CONF_MSXML))
|
|
ret = cli_scanmsxml(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_XML_XL:
|
|
if (SCAN_PARSE_XMLDOCS && (DCONF_DOC & DOC_CONF_MSXML))
|
|
ret = cli_scanmsxml(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_XML_HWP:
|
|
if (SCAN_PARSE_XMLDOCS && (DCONF_DOC & DOC_CONF_HWP))
|
|
ret = cli_scanhwpml(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_XDP:
|
|
if (SCAN_PARSE_PDF && (DCONF_DOC & DOC_CONF_PDF))
|
|
ret = cli_scanxdp(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_RAR:
|
|
case CL_TYPE_RARSFX:
|
|
if (have_rar && SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_RAR))
|
|
ret = cli_scanrar(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_EGG:
|
|
case CL_TYPE_EGGSFX:
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_EGG))
|
|
ret = cli_scanegg(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_ONENOTE:
|
|
if (SCAN_PARSE_ONENOTE && (DCONF_ARCH & DOC_CONF_ONENOTE))
|
|
ret = scan_onenote(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_ALZ:
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_ALZ)) {
|
|
ret = cli_scanalz(ctx);
|
|
}
|
|
break;
|
|
|
|
case CL_TYPE_LHA_LZH:
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_LHA_LZH))
|
|
ret = scan_lha_lzh(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_OOXML_WORD:
|
|
case CL_TYPE_OOXML_PPT:
|
|
case CL_TYPE_OOXML_XL:
|
|
case CL_TYPE_OOXML_HWP:
|
|
if (SCAN_PARSE_XMLDOCS && (DCONF_DOC & DOC_CONF_OOXML)) {
|
|
if (SCAN_COLLECT_METADATA && (ctx->this_layer_metadata_json != NULL)) {
|
|
ret = cli_process_ooxml(ctx, type);
|
|
|
|
if (ret == CL_EMEM || ret == CL_ENULLARG) {
|
|
/* critical error */
|
|
break;
|
|
} else if (ret != CL_SUCCESS) {
|
|
/*
|
|
* non-critical return => allow for the CL_TYPE_ZIP scan to occur
|
|
* cli_process_ooxml other possible returns:
|
|
* CL_ETIMEOUT, CL_EMAXSIZE, CL_EMAXFILES, CL_EPARSE,
|
|
* CL_EFORMAT, CL_BREAK, CL_ESTAT
|
|
*/
|
|
ret = CL_SUCCESS;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Extract the OOXML contents */
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_ZIP))
|
|
ret = cli_unzip(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_ZIP:
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_ZIP)) {
|
|
if (ctx->recursion_stack[ctx->recursion_level].attributes & LAYER_ATTRIBUTES_EMBEDDED) {
|
|
/* If this is an embedded ZIP found by scanraw() with file type detection,
|
|
* then we only extract a single zip entry. */
|
|
ret = cli_unzip_single(ctx, 0);
|
|
} else {
|
|
ret = cli_unzip(ctx);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case CL_TYPE_GZ:
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_GZ))
|
|
ret = cli_scangzip(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_BZ:
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_BZ))
|
|
ret = cli_scanbzip(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_XZ:
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_XZ))
|
|
ret = cli_scanxz(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_GPT:
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_GPT))
|
|
ret = cli_scangpt(ctx, 0);
|
|
break;
|
|
|
|
case CL_TYPE_APM:
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_APM))
|
|
ret = cli_scanapm(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_ARJ:
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_ARJ))
|
|
ret = cli_scanarj(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_NULSFT:
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_NSIS))
|
|
ret = cli_scannulsft(ctx, 0);
|
|
break;
|
|
|
|
case CL_TYPE_AUTOIT:
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_AUTOIT))
|
|
ret = cli_scanautoit(ctx, 23);
|
|
break;
|
|
|
|
case CL_TYPE_MSSZDD:
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_SZDD))
|
|
ret = cli_scanszdd(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_MSCAB:
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_CAB))
|
|
ret = cli_scanmscab(ctx, 0);
|
|
break;
|
|
|
|
case CL_TYPE_HTML:
|
|
if (SCAN_PARSE_HTML && (DCONF_DOC & DOC_CONF_HTML))
|
|
ret = cli_scanhtml(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_HTML_UTF16:
|
|
if (SCAN_PARSE_HTML && (DCONF_DOC & DOC_CONF_HTML))
|
|
ret = cli_scanhtml_utf16(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_SCRIPT:
|
|
if ((DCONF_DOC & DOC_CONF_SCRIPT) && dettype != CL_TYPE_HTML)
|
|
ret = cli_scanscript(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_SWF:
|
|
if (SCAN_PARSE_SWF && (DCONF_DOC & DOC_CONF_SWF))
|
|
ret = cli_scanswf(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_RTF:
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_DOC & DOC_CONF_RTF))
|
|
ret = cli_scanrtf(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_MAIL:
|
|
if (SCAN_PARSE_MAIL && (DCONF_MAIL & MAIL_CONF_MBOX))
|
|
ret = cli_scanmail(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_MHTML:
|
|
if (SCAN_PARSE_MAIL && (DCONF_MAIL & MAIL_CONF_MBOX))
|
|
ret = cli_scanmail(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_TNEF:
|
|
if (SCAN_PARSE_MAIL && (DCONF_MAIL & MAIL_CONF_TNEF))
|
|
ret = cli_scantnef(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_UUENCODED:
|
|
if (DCONF_OTHER & OTHER_CONF_UUENC)
|
|
ret = cli_scanuuencoded(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_MSCHM:
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_CHM))
|
|
ret = cli_scanmschm(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_MSOLE2:
|
|
if (SCAN_PARSE_OLE2 && (DCONF_ARCH & ARCH_CONF_OLE2))
|
|
ret = cli_scanole2(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_7Z:
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_7Z))
|
|
ret = cli_7unz(ctx, 0);
|
|
break;
|
|
|
|
case CL_TYPE_POSIX_TAR:
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_TAR))
|
|
ret = cli_scantar(ctx, 1);
|
|
break;
|
|
|
|
case CL_TYPE_OLD_TAR:
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_TAR))
|
|
ret = cli_scantar(ctx, 0);
|
|
break;
|
|
|
|
case CL_TYPE_CPIO_OLD:
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_CPIO))
|
|
ret = cli_scancpio_old(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_CPIO_ODC:
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_CPIO))
|
|
ret = cli_scancpio_odc(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_CPIO_NEWC:
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_CPIO))
|
|
ret = cli_scancpio_newc(ctx, 0);
|
|
break;
|
|
|
|
case CL_TYPE_CPIO_CRC:
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_CPIO))
|
|
ret = cli_scancpio_newc(ctx, 1);
|
|
break;
|
|
|
|
case CL_TYPE_BINHEX:
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_BINHEX))
|
|
ret = cli_binhex(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_SCRENC:
|
|
if (DCONF_OTHER & OTHER_CONF_SCRENC)
|
|
ret = cli_scanscrenc(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_RIFF:
|
|
if (SCAN_HEURISTICS && (DCONF_OTHER & OTHER_CONF_RIFF))
|
|
ret = cli_scanriff(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_GRAPHICS: {
|
|
if (SCAN_PARSE_IMAGE) {
|
|
/*
|
|
* This case is for unhandled graphics types such as BMP, JPEG 2000, etc.
|
|
*
|
|
* Note: JPEG 2000 is a very different format from JPEG, JPEG/JFIF, JPEG/Exif, JPEG/SPIFF (1994, 1997)
|
|
* JPEG 2000 is not handled by cli_parsejpeg.
|
|
*/
|
|
|
|
if (SCAN_PARSE_IMAGE_FUZZY_HASH && (DCONF_OTHER & OTHER_CONF_IMAGE_FUZZY_HASH)) {
|
|
// It's okay if it fails to calculate the fuzzy hash.
|
|
(void)calculate_fuzzy_image_hash(ctx, type);
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
case CL_TYPE_GIF: {
|
|
if (SCAN_PARSE_IMAGE && (DCONF_OTHER & OTHER_CONF_GIF)) {
|
|
if (SCAN_HEURISTICS && SCAN_HEURISTIC_BROKEN_MEDIA) {
|
|
/*
|
|
* Parse GIF files, checking for exploits and other file format issues.
|
|
*/
|
|
ret = cli_parsegif(ctx);
|
|
if (CL_SUCCESS != ret) {
|
|
// do not calculate the fuzzy image hash if parsing failed, or a heuristic alert occurred.
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (SCAN_PARSE_IMAGE_FUZZY_HASH && (DCONF_OTHER & OTHER_CONF_IMAGE_FUZZY_HASH)) {
|
|
// It's okay if it fails to calculate the fuzzy hash.
|
|
(void)calculate_fuzzy_image_hash(ctx, type);
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
case CL_TYPE_PNG: {
|
|
if (SCAN_PARSE_IMAGE && (DCONF_OTHER & OTHER_CONF_PNG)) {
|
|
if (SCAN_HEURISTICS && SCAN_HEURISTIC_BROKEN_MEDIA) {
|
|
/*
|
|
* Parse PNG files, checking for exploits and other file format issues.
|
|
*/
|
|
ret = cli_parsepng(ctx); /* PNG parser detects a couple CVE's as well as Broken.Media */
|
|
if (CL_SUCCESS != ret) {
|
|
// do not calculate the fuzzy image hash if parsing failed, or a heuristic alert occurred.
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (SCAN_PARSE_IMAGE_FUZZY_HASH && (DCONF_OTHER & OTHER_CONF_IMAGE_FUZZY_HASH)) {
|
|
// It's okay if it fails to calculate the fuzzy hash.
|
|
(void)calculate_fuzzy_image_hash(ctx, type);
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
case CL_TYPE_JPEG: {
|
|
if (SCAN_PARSE_IMAGE && (DCONF_OTHER & OTHER_CONF_JPEG)) {
|
|
if (SCAN_HEURISTICS && SCAN_HEURISTIC_BROKEN_MEDIA) {
|
|
/*
|
|
* Parse JPEG files, checking for exploits and other file format issues.
|
|
*
|
|
* Note: JPEG 2000 is a very different format from JPEG, JPEG/JFIF, JPEG/Exif, JPEG/SPIFF (1994, 1997)
|
|
* JPEG 2000 is not checked by cli_parsejpeg.
|
|
*/
|
|
ret = cli_parsejpeg(ctx); /* JPG parser detects MS04-028 exploits as well as Broken.Media */
|
|
if (CL_SUCCESS != ret) {
|
|
// do not calculate the fuzzy image hash if parsing failed, or a heuristic alert occurred.
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (SCAN_PARSE_IMAGE_FUZZY_HASH && (DCONF_OTHER & OTHER_CONF_IMAGE_FUZZY_HASH)) {
|
|
// It's okay if it fails to calculate the fuzzy hash.
|
|
(void)calculate_fuzzy_image_hash(ctx, type);
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
case CL_TYPE_TIFF: {
|
|
if (SCAN_PARSE_IMAGE && (DCONF_OTHER & OTHER_CONF_TIFF)) {
|
|
if (SCAN_HEURISTICS && SCAN_HEURISTIC_BROKEN_MEDIA) {
|
|
/*
|
|
* Parse TIFF files, checking for exploits and other file format issues.
|
|
*/
|
|
ret = cli_parsetiff(ctx);
|
|
if (CL_SUCCESS != ret) {
|
|
// do not calculate the fuzzy image hash if parsing failed, or a heuristic alert occurred.
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (SCAN_PARSE_IMAGE_FUZZY_HASH && (DCONF_OTHER & OTHER_CONF_IMAGE_FUZZY_HASH)) {
|
|
// It's okay if it fails to calculate the fuzzy hash.
|
|
(void)calculate_fuzzy_image_hash(ctx, type);
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
case CL_TYPE_CRYPTFF:
|
|
if (DCONF_OTHER & OTHER_CONF_CRYPTFF)
|
|
ret = cli_scancryptff(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_ELF:
|
|
if (SCAN_PARSE_ELF && ctx->dconf->elf)
|
|
ret = cli_scanelf(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_MACHO:
|
|
if (ctx->dconf->macho)
|
|
ret = cli_scanmacho(ctx, NULL);
|
|
break;
|
|
|
|
case CL_TYPE_MACHO_UNIBIN:
|
|
if (ctx->dconf->macho)
|
|
ret = cli_scanmacho_unibin(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_SIS:
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_SIS))
|
|
ret = cli_scansis(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_XAR:
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_XAR))
|
|
ret = cli_scanxar(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_PART_HFSPLUS:
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_HFSPLUS))
|
|
ret = cli_scanhfsplus(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_ISHIELD_MSI:
|
|
if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_ISHIELD))
|
|
ret = cli_scanishield_msi(ctx, 14);
|
|
break;
|
|
|
|
case CL_TYPE_BINARY_DATA:
|
|
case CL_TYPE_TEXT_UTF16BE:
|
|
if (SCAN_HEURISTICS && (DCONF_OTHER & OTHER_CONF_MYDOOMLOG))
|
|
ret = cli_check_mydoom_log(ctx);
|
|
break;
|
|
|
|
case CL_TYPE_TEXT_ASCII:
|
|
if (SCAN_HEURISTIC_STRUCTURED && (DCONF_OTHER & OTHER_CONF_DLP))
|
|
/* TODO: consider calling this from cli_scanscript() for
|
|
* a normalised text
|
|
*/
|
|
ret = cli_scan_structured(ctx);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
perf_nested_stop(ctx, PERFT_CONTAINER, PERFT_SCAN);
|
|
|
|
// Evaluate the result from the parsers to see if it end the scan of this layer early,
|
|
// and to decide if we should propagate an error or not.
|
|
if (result_should_goto_done(ctx, ret, &status)) {
|
|
goto done;
|
|
}
|
|
|
|
/*
|
|
* Perform the raw scan, which may include file type recognition signatures.
|
|
*/
|
|
|
|
/* Disable type recognition for the raw scan for zip files larger than maxziptypercg */
|
|
if (type == CL_TYPE_ZIP && SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_ZIP)) {
|
|
/* CL_ENGINE_MAX_ZIPTYPERCG */
|
|
uint64_t curr_len = ctx->fmap->len;
|
|
if (curr_len > ctx->engine->maxziptypercg) {
|
|
cli_dbgmsg("cli_magic_scan: Not checking for embedded PEs (zip file > MaxZipTypeRcg)\n");
|
|
typercg = 0;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Perform pattern matching for malware detections AND embedded file type recognition.
|
|
* Embedded file type recognition may re-assign the current file as a new type, or
|
|
* it may detect embedded files. E.g. ZIP entries in a PE file (i.e. self-extracting ZIP).
|
|
*/
|
|
if ((type != CL_TYPE_IGNORED) &&
|
|
/* CL_TYPE_HTML: raw HTML files are not scanned, unless safety measure activated via DCONF */
|
|
(type != CL_TYPE_HTML || !(SCAN_PARSE_HTML) || !(DCONF_DOC & DOC_CONF_HTML_SKIPRAW)) &&
|
|
(!ctx->engine->sdb)) {
|
|
|
|
cli_dbgmsg("cli_magic_scan: Performing raw scan to pattern match and/or detect embedded files\n");
|
|
|
|
ret = scanraw(ctx, type, typercg, &dettype);
|
|
|
|
// Evaluate the result from the scan to see if it end the scan of this layer early,
|
|
// and to decid if we should propagate an error or not.
|
|
if (result_should_goto_done(ctx, ret, &status)) {
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Now run the rest of the file type parsers.
|
|
*/
|
|
switch (type) {
|
|
/* bytecode hooks triggered by a lsig must be a hook
|
|
* called from one of the functions here */
|
|
case CL_TYPE_TEXT_ASCII:
|
|
case CL_TYPE_TEXT_UTF16BE:
|
|
case CL_TYPE_TEXT_UTF16LE:
|
|
case CL_TYPE_TEXT_UTF8:
|
|
perf_nested_start(ctx, PERFT_SCRIPT, PERFT_SCAN);
|
|
if ((dettype != CL_TYPE_HTML) &&
|
|
SCAN_PARSE_HTML && (DCONF_DOC & DOC_CONF_SCRIPT) && (ret != CL_VIRUS)) {
|
|
ret = cli_scanscript(ctx);
|
|
}
|
|
if (((dettype == CL_TYPE_MAIL) || (cli_recursion_stack_get_type(ctx, -1) == CL_TYPE_MAIL)) &&
|
|
SCAN_PARSE_MAIL && (DCONF_MAIL & MAIL_CONF_MBOX) && (ret != CL_VIRUS)) {
|
|
|
|
ret = cli_scan_fmap(ctx, CL_TYPE_MAIL, false, NULL, AC_SCAN_VIR, NULL);
|
|
}
|
|
perf_nested_stop(ctx, PERFT_SCRIPT, PERFT_SCAN);
|
|
break;
|
|
|
|
/* Due to performance reasons all executables were first scanned
|
|
* in raw mode. Now we will try to unpack them
|
|
*/
|
|
case CL_TYPE_MSEXE:
|
|
perf_nested_start(ctx, PERFT_PE, PERFT_SCAN);
|
|
if (SCAN_PARSE_PE && ctx->dconf->pe) {
|
|
// Setting ctx->corrupted_input will prevent the PE parser from reporting "broken executable" for unpacked/reconstructed files that may not be 100% to spec.
|
|
// In here we're just carrying the corrupted_input flag from parent to child, in case the parent's flag was set.
|
|
unsigned int corrupted_input = ctx->corrupted_input;
|
|
ret = cli_scanpe(ctx);
|
|
ctx->corrupted_input = corrupted_input;
|
|
}
|
|
perf_nested_stop(ctx, PERFT_PE, PERFT_SCAN);
|
|
break;
|
|
|
|
case CL_TYPE_ELF:
|
|
perf_nested_start(ctx, PERFT_ELF, PERFT_SCAN);
|
|
ret = cli_unpackelf(ctx);
|
|
perf_nested_stop(ctx, PERFT_ELF, PERFT_SCAN);
|
|
break;
|
|
|
|
case CL_TYPE_MACHO:
|
|
case CL_TYPE_MACHO_UNIBIN:
|
|
perf_nested_start(ctx, PERFT_MACHO, PERFT_SCAN);
|
|
ret = cli_unpackmacho(ctx);
|
|
perf_nested_stop(ctx, PERFT_MACHO, PERFT_SCAN);
|
|
break;
|
|
|
|
case CL_TYPE_AI_MODEL:
|
|
case CL_TYPE_PYTHON_COMPILED:
|
|
case CL_TYPE_BINARY_DATA:
|
|
ret = cli_scan_fmap(ctx, CL_TYPE_OTHER, false, NULL, AC_SCAN_VIR, NULL);
|
|
break;
|
|
|
|
case CL_TYPE_PDF: /* FIXMELIMITS: pdf should be an archive! */
|
|
if (SCAN_PARSE_PDF && (DCONF_DOC & DOC_CONF_PDF)) {
|
|
ret = cli_scanpdf(ctx, 0);
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
// Evaluate the result from the parsers to see if it end the scan of this layer early,
|
|
// and to decide if we should propagate an error or not.
|
|
if (result_should_goto_done(ctx, ret, &status)) {
|
|
goto done;
|
|
}
|
|
|
|
done:
|
|
|
|
/*
|
|
* Run the post_scan callback.
|
|
*/
|
|
ret = cli_dispatch_scan_callback(ctx, CL_SCAN_CALLBACK_POST_SCAN);
|
|
if (CL_SUCCESS != ret) {
|
|
cli_dbgmsg("cli_magic_scan: POST_SCAN callback returned %d\n", ret);
|
|
status = ret;
|
|
}
|
|
|
|
// Filter the result from the parsers so we don't propagate non-fatal errors.
|
|
// And to convert CL_VERIFIED -> CL_SUCCESS
|
|
(void)result_should_goto_done(ctx, status, &status);
|
|
|
|
/*
|
|
* Run the deprecated post-scan callback (if one exists) and provide the verdict for this layer.
|
|
*/
|
|
cli_dbgmsg("cli_magic_scan: returning %d %s\n", status, __AT__);
|
|
if (ctx->engine->cb_post_scan) {
|
|
cl_error_t callback_ret;
|
|
cl_error_t append_ret;
|
|
const char *virusname = NULL;
|
|
|
|
// Get the last signature that matched (if any).
|
|
if (0 < evidence_num_alerts(ctx->this_layer_evidence)) {
|
|
virusname = cli_get_last_virus(ctx);
|
|
}
|
|
|
|
perf_start(ctx, PERFT_POSTCB);
|
|
callback_ret = ctx->engine->cb_post_scan(fmap_fd(ctx->fmap), verdict_at_this_level, virusname, ctx->cb_ctx);
|
|
perf_stop(ctx, PERFT_POSTCB);
|
|
|
|
switch (callback_ret) {
|
|
case CL_BREAK:
|
|
cli_dbgmsg("cli_magic_scan: file allowed by post_scan callback\n");
|
|
|
|
// Remove any evidence for this layer and set the verdict to trusted.
|
|
(void)cli_trust_this_layer(ctx, "legacy post-scan application callback");
|
|
|
|
// status = CL_SUCCESS; // Do override the status here.
|
|
// If status == CL_VIRUS, we'll fix when we look at the verdict.
|
|
break;
|
|
case CL_VIRUS:
|
|
cli_dbgmsg("cli_magic_scan: file blocked by post_scan callback\n");
|
|
append_ret = cli_append_virus(ctx, "Detected.By.Callback");
|
|
if (append_ret == CL_VIRUS) {
|
|
status = CL_VIRUS;
|
|
}
|
|
break;
|
|
case CL_SUCCESS:
|
|
// No action requested by callback. Keep scanning.
|
|
break;
|
|
default:
|
|
// status = CL_SUCCESS; // Do override the status here, just log a warning.
|
|
cli_warnmsg("cli_magic_scan: ignoring bad return code from post_scan callback\n");
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Check the verdict for this layer.
|
|
* If the verdict is CL_VERDICT_TRUSTED, remove any evidence for this layer and clear CL_VIRUS status (if set)
|
|
* Otherwise, we'll update the verdict based on the evidence.
|
|
*/
|
|
if (CL_VERDICT_TRUSTED == ctx->recursion_stack[ctx->recursion_level].verdict) {
|
|
/* Remove any alerts for this layer. */
|
|
if (NULL != ctx->recursion_stack[ctx->recursion_level].evidence) {
|
|
evidence_free(ctx->recursion_stack[ctx->recursion_level].evidence);
|
|
ctx->recursion_stack[ctx->recursion_level].evidence = NULL;
|
|
ctx->this_layer_evidence = NULL;
|
|
}
|
|
if (CL_VIRUS == status) {
|
|
status = CL_SUCCESS; // If we have a CL_VERDICT_TRUSTED, we should not return CL_VIRUS.
|
|
}
|
|
} else {
|
|
/*
|
|
* Update the verdict for this layer based on the scan results.
|
|
* If the verdict is CL_VERDICT_TRUSTED, then we don't change it.
|
|
*/
|
|
if (0 < evidence_num_indicators_type(ctx->this_layer_evidence, IndicatorType_Strong)) {
|
|
ctx->recursion_stack[ctx->recursion_level].verdict = CL_VERDICT_STRONG_INDICATOR;
|
|
} else if (0 < evidence_num_indicators_type(ctx->this_layer_evidence, IndicatorType_PotentiallyUnwanted)) {
|
|
ctx->recursion_stack[ctx->recursion_level].verdict = CL_VERDICT_POTENTIALLY_UNWANTED;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* If the verdict for this layer is "clean", we can cache it.
|
|
*
|
|
* Note: clean_cache_add() will check the fmap->dont_cache_flag,
|
|
* so this may not actually cache if we exceeded limits earlier.
|
|
* It will also check if caching is disabled.
|
|
*/
|
|
if ((CL_VERDICT_TRUSTED == ctx->recursion_stack[ctx->recursion_level].verdict) ||
|
|
(CL_VERDICT_NOTHING_FOUND == ctx->recursion_stack[ctx->recursion_level].verdict)) {
|
|
// Also verify we have no weak indicators before adding to the clean cache.
|
|
// Weak indicators may be used in the future to match a strong indicator.
|
|
if (evidence_num_indicators_type(ctx->this_layer_evidence, IndicatorType_Weak) == 0) {
|
|
perf_start(ctx, PERFT_CACHE);
|
|
clean_cache_add(ctx);
|
|
perf_stop(ctx, PERFT_CACHE);
|
|
}
|
|
}
|
|
|
|
early_ret:
|
|
|
|
if (old_hook_lsig_matches) {
|
|
/* We need to restore the old hook_lsig_matches */
|
|
cli_bitset_free(ctx->hook_lsig_matches); // safe to call, even if NULL
|
|
ctx->hook_lsig_matches = old_hook_lsig_matches;
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
cl_error_t cli_magic_scan_desc_type(int desc, const char *filepath, cli_ctx *ctx, cli_file_t type,
|
|
const char *name, uint32_t attributes)
|
|
{
|
|
STATBUF sb;
|
|
cl_error_t status = CL_SUCCESS;
|
|
fmap_t *new_map = NULL;
|
|
|
|
if (!ctx) {
|
|
return CL_EARG;
|
|
}
|
|
|
|
cli_dbgmsg("in cli_magic_scan_desc_type (recursion_level: %u/%u)\n", ctx->recursion_level, ctx->engine->max_recursion_level);
|
|
|
|
if (FSTAT(desc, &sb) == -1) {
|
|
cli_errmsg("cli_magic_scan_desc_type: Can't fstat descriptor %d\n", desc);
|
|
status = CL_ESTAT;
|
|
goto done;
|
|
}
|
|
if (sb.st_size <= 5) {
|
|
cli_dbgmsg("cli_magic_scan_desc_type: Small data (%u bytes)\n", (unsigned int)sb.st_size);
|
|
status = CL_SUCCESS;
|
|
goto done;
|
|
}
|
|
|
|
perf_start(ctx, PERFT_MAP);
|
|
new_map = fmap_new(desc, 0, sb.st_size, name, filepath);
|
|
perf_stop(ctx, PERFT_MAP);
|
|
if (NULL == new_map) {
|
|
cli_errmsg("cli_magic_scan_desc_type: CRITICAL: fmap_new() failed\n");
|
|
status = CL_EMEM;
|
|
goto done;
|
|
}
|
|
|
|
status = cli_recursion_stack_push(ctx, new_map, type, true, attributes); /* Perform scan with child fmap */
|
|
if (CL_SUCCESS != status) {
|
|
cli_dbgmsg("Failed to scan fmap.\n");
|
|
goto done;
|
|
}
|
|
|
|
status = cli_magic_scan(ctx, type);
|
|
|
|
(void)cli_recursion_stack_pop(ctx); /* Restore the parent fmap */
|
|
|
|
done:
|
|
if (NULL != new_map) {
|
|
fmap_free(new_map);
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
cl_error_t cli_magic_scan_desc(int desc, const char *filepath, cli_ctx *ctx, const char *name, uint32_t attributes)
|
|
{
|
|
return cli_magic_scan_desc_type(desc, filepath, ctx, CL_TYPE_ANY, name, attributes);
|
|
}
|
|
|
|
/**
|
|
* @brief Scan an offset/length into a file map.
|
|
*
|
|
* Magic-scan some portion of an existing fmap.
|
|
*
|
|
* @param map File map.
|
|
* @param offset Offset into file map.
|
|
* @param length Length from offset.
|
|
* @param ctx Scanning context structure.
|
|
* @param type CL_TYPE of data to be scanned.
|
|
* @param name (optional) Original name of the file (to set fmap name metadata)
|
|
* @return int CL_SUCCESS, or an error code.
|
|
*/
|
|
static cl_error_t magic_scan_nested_fmap_type(cl_fmap_t *map, size_t offset, size_t length, cli_ctx *ctx,
|
|
cli_file_t type, const char *name, uint32_t attributes)
|
|
{
|
|
cl_error_t status = CL_SUCCESS;
|
|
fmap_t *new_map = NULL;
|
|
|
|
cli_dbgmsg("magic_scan_nested_fmap_type: [0, +%zu), [%zu, +%zu)\n",
|
|
map->len, offset, length);
|
|
|
|
if (offset >= map->len) {
|
|
cli_dbgmsg("magic_scan_nested_fmap_type: Invalid offset: %zu\n", offset);
|
|
goto done;
|
|
}
|
|
|
|
if (!length)
|
|
length = map->len - offset;
|
|
|
|
if (length > map->len - offset) {
|
|
cli_dbgmsg("magic_scan_nested_fmap_type: Data truncated: %zu -> %zu\n",
|
|
length, map->len - offset);
|
|
length = map->len - offset;
|
|
}
|
|
|
|
if (length <= 5) {
|
|
cli_dbgmsg("magic_scan_nested_fmap_type: Small data (%zu bytes)\n", length);
|
|
goto done;
|
|
}
|
|
|
|
new_map = fmap_duplicate(map, offset, length, name);
|
|
if (NULL == new_map) {
|
|
cli_dbgmsg("magic_scan_nested_fmap_type: Failed to duplicate fmap for scan of fmap subsection\n");
|
|
goto done;
|
|
}
|
|
|
|
status = cli_recursion_stack_push(ctx, new_map, type, false, attributes); /* Perform scan with child fmap */
|
|
if (CL_SUCCESS != status) {
|
|
cli_dbgmsg("magic_scan_nested_fmap_type: Failed to add map to recursion stack for magic scan.\n");
|
|
goto done;
|
|
}
|
|
|
|
status = cli_magic_scan(ctx, type);
|
|
|
|
(void)cli_recursion_stack_pop(ctx); /* Restore the parent fmap */
|
|
|
|
done:
|
|
if (NULL != new_map) {
|
|
free_duplicate_fmap(new_map); /* This fmap is just a duplicate. */
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
/* For map scans that may be forced to disk */
|
|
cl_error_t cli_magic_scan_nested_fmap_type(cl_fmap_t *map, size_t offset, size_t length, cli_ctx *ctx,
|
|
cli_file_t type, const char *name, uint32_t attributes)
|
|
{
|
|
cl_error_t ret = CL_SUCCESS;
|
|
|
|
cli_dbgmsg("cli_magic_scan_nested_fmap_type: [%zu, +%zu)\n", offset, length);
|
|
if (offset >= map->len) {
|
|
cli_dbgmsg("Invalid offset: %zu\n", offset);
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
if (ctx->engine->engine_options & ENGINE_OPTIONS_FORCE_TO_DISK) {
|
|
/*
|
|
* Force to disk!
|
|
*
|
|
* Write the offset + length section of the fmap to disk, and scan it.
|
|
*/
|
|
const uint8_t *mapdata = NULL;
|
|
char *tempfile = NULL;
|
|
int fd = -1;
|
|
size_t nread = 0;
|
|
|
|
/* Then check length */
|
|
if (!length) {
|
|
/* Caller didn't specify len, use rest of the map */
|
|
length = map->len - offset;
|
|
}
|
|
if (length > map->len - offset) {
|
|
cli_dbgmsg("cli_magic_scan_nested_fmap_type: Data truncated: %zu -> %zu\n", length, map->len - offset);
|
|
length = map->len - offset;
|
|
}
|
|
if (length <= 5) {
|
|
cli_dbgmsg("cli_magic_scan_nested_fmap_type: Small data (%u bytes)\n", (unsigned int)length);
|
|
return CL_SUCCESS;
|
|
}
|
|
if (!CLI_ISCONTAINED_0_TO(map->len, offset, length)) {
|
|
cli_dbgmsg("cli_magic_scan_nested_fmap_type: map error occurred [%zu, %zu] not within [0, %zu]\n", offset, length, map->len);
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
/* Length checked, now get map */
|
|
mapdata = fmap_need_off_once_len(map, offset, length, &nread);
|
|
if (!mapdata || (nread != length)) {
|
|
cli_errmsg("cli_magic_scan_nested_fmap_type: could not map sub-file\n");
|
|
return CL_EMAP;
|
|
}
|
|
|
|
ret = cli_gentempfd(ctx->this_layer_tmpdir, &tempfile, &fd);
|
|
if (ret != CL_SUCCESS) {
|
|
return ret;
|
|
}
|
|
|
|
cli_dbgmsg("cli_magic_scan_nested_fmap_type: writing nested map content to temp file %s\n", tempfile);
|
|
if (cli_writen(fd, mapdata, length) == (size_t)-1) {
|
|
cli_errmsg("cli_magic_scan_nested_fmap_type: cli_writen error writing subdoc temporary file.\n");
|
|
ret = CL_EWRITE;
|
|
}
|
|
|
|
/* scan the temp file */
|
|
ret = cli_magic_scan_desc_type(fd, tempfile, ctx, type, name, attributes);
|
|
|
|
/* remove the temp file, if needed */
|
|
if (fd >= 0) {
|
|
close(fd);
|
|
}
|
|
if (!ctx->engine->keeptmp) {
|
|
if (cli_unlink(tempfile)) {
|
|
cli_errmsg("cli_magic_scan_nested_fmap_type: error unlinking tempfile %s\n", tempfile);
|
|
ret = CL_EUNLINK;
|
|
}
|
|
}
|
|
free(tempfile);
|
|
} else {
|
|
/*
|
|
* Not forced to disk.
|
|
*
|
|
* Just use nested map by scanning given fmap at offset + length.
|
|
*/
|
|
ret = magic_scan_nested_fmap_type(map, offset, length, ctx, type, name, attributes);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
cl_error_t cli_magic_scan_buff(const void *buffer, size_t length, cli_ctx *ctx, const char *name, uint32_t attributes)
|
|
{
|
|
cl_error_t ret;
|
|
fmap_t *map = NULL;
|
|
|
|
map = fmap_open_memory(buffer, length, name);
|
|
if (!map) {
|
|
return CL_EMAP;
|
|
}
|
|
|
|
ret = cli_magic_scan_nested_fmap_type(map, 0, length, ctx, CL_TYPE_ANY, name, attributes);
|
|
|
|
fmap_free(map);
|
|
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* @brief The main function to initiate a scan of an fmap.
|
|
*
|
|
* @param map File map.
|
|
* @param filepath (optional, recommended) filepath of the open file descriptor or file map.
|
|
* @param[out] verdict_out A pointer to a cl_verdict_t that will be set to the scan verdict.
|
|
* You should check the verdict even if the function returns an error.
|
|
* @param[out] last_alert_out Will be set to a statically allocated (i.e. needs not be freed) signature name if the scan matches against a signature.
|
|
* @param[out] scanned_out (Optional) The number of bytes scanned.
|
|
* @param engine The scanning engine.
|
|
* @param scanoptions Scanning options.
|
|
* @param[in,out] context (Optional) An application-defined context struct, opaque to libclamav.
|
|
* May be used within your callback functions.
|
|
* @param hash_hint (Optional) A NULL terminated string of the file hash so that
|
|
* libclamav does not need to calculate it.
|
|
* @param[out] hash_out (Optional) A NULL terminated string of the file hash.
|
|
* The caller is responsible for freeing this string.
|
|
* @param hash_alg The hashing algorithm used for either `hash_hint` or `hash_out`.
|
|
* Supported algorithms are "md5", "sha1", "sha2-256".
|
|
* Required only if you provide a `hash_hint` or want to receive a `hash_out`.
|
|
* @param file_type_hint (Optional) A NULL terminated string of the file type hint.
|
|
* E.g. "pe", "elf", "zip", etc.
|
|
* You may also use ClamAV type names such as "CL_TYPE_PE".
|
|
* ClamAV will ignore the hint if it is not familiar with the specified type.
|
|
* @param file_type_out (Optional) A NULL terminated string of the file type
|
|
* of the top layer as determined by ClamAV.
|
|
* Will take the form of the standard ClamAV file type format. E.g. "CL_TYPE_PE".
|
|
* The caller is responsible for freeing this string.
|
|
* @return cl_error_t CL_SUCCESS if no error occured.
|
|
* Otherwise a CL_E* error code.
|
|
* Does NOT return CL_VIRUS for a signature match. Check the `verdict_out` parameter instead.
|
|
*/
|
|
static cl_error_t scan_common(
|
|
cl_fmap_t *map,
|
|
const char *filepath,
|
|
cl_verdict_t *verdict_out,
|
|
const char **last_alert_out,
|
|
uint64_t *scanned_out,
|
|
const struct cl_engine *engine,
|
|
struct cl_scan_options *scanoptions,
|
|
void *context,
|
|
const char *hash_hint,
|
|
char **hash_out,
|
|
const char *hash_alg,
|
|
const char *file_type_hint,
|
|
char **file_type_out)
|
|
{
|
|
cl_error_t status = CL_SUCCESS;
|
|
cl_error_t ret;
|
|
|
|
cli_ctx ctx = {0};
|
|
|
|
bool logg_initialized = false;
|
|
|
|
char *target_basename = NULL;
|
|
char *new_temp_prefix = NULL;
|
|
size_t new_temp_prefix_len;
|
|
char *new_temp_path = NULL;
|
|
|
|
time_t current_time;
|
|
struct tm tm_struct;
|
|
|
|
size_t num_potentially_unwanted_indicators = 0;
|
|
|
|
// The default type is SHA2-256.
|
|
cli_hash_type_t requested_hash_type = CLI_HASH_SHA2_256;
|
|
// The type of the file being scanned.
|
|
cli_file_t file_type = CL_TYPE_ANY;
|
|
|
|
if (NULL == map || NULL == scanoptions || NULL == verdict_out || NULL == last_alert_out || NULL == engine) {
|
|
return CL_ENULLARG;
|
|
}
|
|
|
|
/* Initialize output variables */
|
|
*verdict_out = CL_VERDICT_NOTHING_FOUND;
|
|
*last_alert_out = NULL;
|
|
|
|
// If the caller provided a file type hint, we make a best effort to use it.
|
|
if (file_type_hint) {
|
|
file_type = cli_ftcode_human_friendly(file_type_hint);
|
|
if (CL_TYPE_ERROR == file_type) {
|
|
cli_dbgmsg("scan_common: Unsupported file type hint: %s. Will treat it as unknown (CL_TYPE_ANY)\n", file_type_hint);
|
|
file_type = CL_TYPE_ANY;
|
|
}
|
|
}
|
|
|
|
if (NULL != hash_out) {
|
|
*hash_out = NULL;
|
|
}
|
|
|
|
if (NULL != hash_alg) {
|
|
// Set the fmap hash for the given algorithm.
|
|
if (3 == strlen(hash_alg) && (0 == strncmp(hash_alg, "md5", 3) || (0 == strncmp(hash_alg, "MD5", 3)))) {
|
|
requested_hash_type = CLI_HASH_MD5;
|
|
} else if (4 == strlen(hash_alg) && (0 == strncmp(hash_alg, "sha1", 4) || (0 == strncmp(hash_alg, "SHA1", 4)))) {
|
|
requested_hash_type = CLI_HASH_SHA1;
|
|
} else if ((8 == strlen(hash_alg) && (0 == strncmp(hash_alg, "sha2-256", 8) || (0 == strncmp(hash_alg, "SHA2-256", 8)))) ||
|
|
(6 == strlen(hash_alg) && (0 == strncmp(hash_alg, "sha256", 6) || (0 == strncmp(hash_alg, "SHA256", 6))))) {
|
|
requested_hash_type = CLI_HASH_SHA2_256;
|
|
} else {
|
|
cli_errmsg("scan_common: Unsupported hash algorithm: %s\n", hash_alg);
|
|
status = CL_EARG;
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
// If hash_hint is provided, we need to check if the hash_alg is valid.
|
|
if (NULL != hash_hint) {
|
|
uint8_t hash[CLI_HASHLEN_MAX] = {0};
|
|
size_t hash_string_len = strlen(hash_hint);
|
|
|
|
if (hash_string_len != cli_hash_len(requested_hash_type) * 2) {
|
|
cli_errmsg("scan_common: hash_hint provided, but its length (%zu) does not match the expected length for %s (%zu).\n",
|
|
hash_string_len, hash_alg, cli_hash_len(requested_hash_type) * 2);
|
|
status = CL_EARG;
|
|
goto done;
|
|
}
|
|
|
|
// Convert the hash_hint string to a binary hash.
|
|
ret = cli_hexstr_to_bytes(hash_hint, hash_string_len, hash);
|
|
if (ret != CL_SUCCESS) {
|
|
cli_errmsg("scan_common: hash_hint provided, but it is not a valid hex string.\n");
|
|
status = CL_EARG;
|
|
goto done;
|
|
}
|
|
// Set the fmap hash for the given algorithm.
|
|
if (CL_SUCCESS != fmap_set_hash(map, hash, requested_hash_type)) {
|
|
cli_errmsg("scan_common: Failed to set fmap hash for %s.\n", hash_alg);
|
|
status = CL_EARG;
|
|
goto done;
|
|
}
|
|
|
|
cli_dbgmsg("scan_common: recorded %s hash hint: %s\n", cli_hash_name(requested_hash_type), hash_hint);
|
|
}
|
|
|
|
ctx.engine = engine;
|
|
ctx.scanned = scanned_out;
|
|
CLI_MALLOC_OR_GOTO_DONE(ctx.options, sizeof(struct cl_scan_options), status = CL_EMEM);
|
|
|
|
memcpy(ctx.options, scanoptions, sizeof(struct cl_scan_options));
|
|
|
|
ctx.dconf = (struct cli_dconf *)engine->dconf;
|
|
ctx.cb_ctx = context;
|
|
|
|
if (!(ctx.hook_lsig_matches = cli_bitset_init())) {
|
|
status = CL_EMEM;
|
|
goto done;
|
|
}
|
|
|
|
ctx.recursion_stack_size = ctx.engine->max_recursion_level;
|
|
ctx.recursion_stack = calloc(sizeof(cli_scan_layer_t), ctx.recursion_stack_size);
|
|
if (!ctx.recursion_stack) {
|
|
status = CL_EMEM;
|
|
goto done;
|
|
}
|
|
|
|
// ctx was memset, so recursion_level starts at 0.
|
|
ctx.recursion_stack[ctx.recursion_level].fmap = map;
|
|
ctx.recursion_stack[ctx.recursion_level].size = map->len;
|
|
ctx.recursion_stack[ctx.recursion_level].type = CL_TYPE_ANY;
|
|
ctx.fmap = ctx.recursion_stack[ctx.recursion_level].fmap;
|
|
|
|
perf_init(&ctx);
|
|
|
|
if (ctx.engine->maxscantime != 0) {
|
|
if (gettimeofday(&ctx.time_limit, NULL) == 0) {
|
|
uint32_t secs = ctx.engine->maxscantime / 1000;
|
|
uint32_t usecs = (ctx.engine->maxscantime % 1000) * 1000;
|
|
ctx.time_limit.tv_sec += secs;
|
|
ctx.time_limit.tv_usec += usecs;
|
|
if (ctx.time_limit.tv_usec >= 1000000) {
|
|
ctx.time_limit.tv_usec -= 1000000;
|
|
ctx.time_limit.tv_sec++;
|
|
}
|
|
} else {
|
|
char buf[64];
|
|
cli_dbgmsg("scan_common: gettimeofday error: %s\n", cli_strerror(errno, buf, 64));
|
|
}
|
|
}
|
|
|
|
if (filepath != NULL) {
|
|
ctx.target_filepath = strdup(filepath);
|
|
}
|
|
|
|
/*
|
|
* Create a tmp sub-directory for the temp files generated by this scan.
|
|
*
|
|
* If keeptmp (LeaveTemporaryFiles / --leave-temps) is enabled, we'll include the
|
|
* basename in the tmp directory.
|
|
* If keeptmp is not enabled, we'll just call it "scantemp".
|
|
*/
|
|
current_time = time(NULL);
|
|
|
|
#ifdef _WIN32
|
|
if (0 != localtime_s(&tm_struct, ¤t_time)) {
|
|
#else
|
|
if (!localtime_r(¤t_time, &tm_struct)) {
|
|
#endif
|
|
cli_errmsg("scan_common: Failed to get local time.\n");
|
|
status = CL_ESTAT;
|
|
goto done;
|
|
}
|
|
|
|
if ((ctx.engine->engine_options & ENGINE_OPTIONS_TMPDIR_RECURSION)) {
|
|
if ((ctx.engine->keeptmp) &&
|
|
(NULL != ctx.target_filepath) &&
|
|
(CL_SUCCESS == cli_basename(ctx.target_filepath, strlen(ctx.target_filepath), &target_basename, true /* posix_support_backslash_pathsep */))) {
|
|
/* Include the basename in the temp directory */
|
|
new_temp_prefix_len = strlen("YYYYMMDD_HHMMSS-") + strlen(target_basename);
|
|
new_temp_prefix = cli_max_calloc(1, new_temp_prefix_len + 1);
|
|
if (!new_temp_prefix) {
|
|
cli_errmsg("scan_common: Failed to allocate memory for temp directory name.\n");
|
|
status = CL_EMEM;
|
|
goto done;
|
|
}
|
|
strftime(new_temp_prefix, new_temp_prefix_len + 1, "%Y%m%d_%H%M%S-", &tm_struct);
|
|
strcpy(new_temp_prefix + strlen("YYYYMMDD_HHMMSS-"), target_basename);
|
|
} else {
|
|
/* Just use date */
|
|
new_temp_prefix_len = strlen("YYYYMMDD_HHMMSS-scantemp");
|
|
new_temp_prefix = cli_max_calloc(1, new_temp_prefix_len + 1);
|
|
if (!new_temp_prefix) {
|
|
cli_errmsg("scan_common: Failed to allocate memory for temp directory name.\n");
|
|
status = CL_EMEM;
|
|
goto done;
|
|
}
|
|
strftime(new_temp_prefix, new_temp_prefix_len + 1, "%Y%m%d_%H%M%S-scantemp", &tm_struct);
|
|
}
|
|
|
|
/* Place the new temp sub-directory within the configured temp directory */
|
|
new_temp_path = cli_gentemp_with_prefix(ctx.engine->tmpdir, new_temp_prefix);
|
|
free(new_temp_prefix);
|
|
if (NULL == new_temp_path) {
|
|
cli_errmsg("scan_common: Failed to generate temp directory name.\n");
|
|
status = CL_EMEM;
|
|
goto done;
|
|
}
|
|
|
|
ctx.recursion_stack[ctx.recursion_level].tmpdir = new_temp_path;
|
|
ctx.this_layer_tmpdir = new_temp_path;
|
|
|
|
if (mkdir(ctx.this_layer_tmpdir, 0700)) {
|
|
cli_errmsg("Can't create temporary directory for scan: %s.\n", ctx.this_layer_tmpdir);
|
|
status = CL_EACCES;
|
|
goto done;
|
|
}
|
|
} else {
|
|
/*
|
|
* Use the configured temp directory.
|
|
* Making a unique subdirectory per scan is slower, and particularly slow on Windows.
|
|
*/
|
|
ctx.recursion_stack[ctx.recursion_level].tmpdir = ctx.engine->tmpdir;
|
|
ctx.this_layer_tmpdir = ctx.engine->tmpdir;
|
|
}
|
|
|
|
cli_logg_setup(&ctx);
|
|
logg_initialized = true;
|
|
|
|
// Assign a unique object_id to the new container.
|
|
ctx.recursion_stack[ctx.recursion_level].object_id = ctx.object_count;
|
|
ctx.object_count++;
|
|
|
|
if (ctx.options->general & CL_SCAN_GENERAL_COLLECT_METADATA) {
|
|
ctx.metadata_json = json_object_new_object();
|
|
if (NULL == ctx.metadata_json) {
|
|
cli_errmsg("scan_common: no memory for json properties object\n");
|
|
status = CL_EMEM;
|
|
goto done;
|
|
}
|
|
/* Set the convenience pointer to the current properties object */
|
|
ctx.recursion_stack[ctx.recursion_level].metadata_json = ctx.metadata_json;
|
|
ctx.this_layer_metadata_json = ctx.metadata_json;
|
|
|
|
status = cli_jsonstr(ctx.metadata_json, "Magic", "CLAMJSONv0");
|
|
if (status != CL_SUCCESS) {
|
|
cli_errmsg("scan_common: error setting Magic property in metadata.json\n");
|
|
goto done;
|
|
}
|
|
if (ctx.fmap->name) {
|
|
status = cli_jsonstr(ctx.metadata_json, "FileName", ctx.fmap->name);
|
|
if (status != CL_SUCCESS) {
|
|
cli_errmsg("scan_common: error setting FileName property in metadata.json\n");
|
|
goto done;
|
|
}
|
|
}
|
|
if (ctx.fmap->path) {
|
|
status = cli_jsonstr(ctx.metadata_json, "FilePath", ctx.fmap->path);
|
|
if (status != CL_SUCCESS) {
|
|
cli_errmsg("scan_common: error setting FilePath property in metadata.json\n");
|
|
goto done;
|
|
}
|
|
}
|
|
status = cli_jsonuint64(ctx.metadata_json, "FileSize", (uint64_t)ctx.fmap->len);
|
|
if (status != CL_SUCCESS) {
|
|
cli_errmsg("scan_common: error setting FileSize property in metadata.json\n");
|
|
goto done;
|
|
}
|
|
status = cli_jsonuint64(ctx.metadata_json, "ObjectID", (uint64_t)ctx.recursion_stack[ctx.recursion_level].object_id);
|
|
if (status != CL_SUCCESS) {
|
|
cli_errmsg("scan_common: error setting ObjectID property in metadata.json\n");
|
|
goto done;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* DO THE SCAN!
|
|
*/
|
|
status = cli_magic_scan(&ctx, file_type);
|
|
|
|
if (ctx.options->general & CL_SCAN_GENERAL_COLLECT_METADATA && (ctx.metadata_json != NULL)) {
|
|
json_object *jobj;
|
|
const char *jstring;
|
|
|
|
/* set value of unique root object tag */
|
|
if (json_object_object_get_ex(ctx.metadata_json, "FileType", &jobj)) {
|
|
enum json_type type;
|
|
const char *jstr;
|
|
|
|
type = json_object_get_type(jobj);
|
|
if (type == json_type_string) {
|
|
jstr = json_object_get_string(jobj);
|
|
cli_jsonstr(ctx.metadata_json, "RootFileType", jstr);
|
|
}
|
|
}
|
|
|
|
/* serialize json properties to string */
|
|
#ifdef JSON_C_TO_STRING_NOSLASHESCAPE
|
|
jstring = json_object_to_json_string_ext(ctx.metadata_json, JSON_C_TO_STRING_PRETTY | JSON_C_TO_STRING_NOSLASHESCAPE);
|
|
#else
|
|
jstring = json_object_to_json_string_ext(ctx.metadata_json, JSON_C_TO_STRING_PRETTY);
|
|
#endif
|
|
if (NULL == jstring) {
|
|
cli_errmsg("scan_common: no memory for json serialization.\n");
|
|
status = CL_EMEM;
|
|
goto done;
|
|
}
|
|
|
|
cli_dbgmsg("%s\n", jstring);
|
|
|
|
if (status != CL_VIRUS) {
|
|
/*
|
|
* Run bytecode preclass hook.
|
|
*/
|
|
struct cli_matcher *iroot = ctx.engine->root[13];
|
|
|
|
struct cli_bc_ctx *bc_ctx = cli_bytecode_context_alloc();
|
|
if (!bc_ctx) {
|
|
cli_errmsg("scan_common: can't allocate memory for bc_ctx\n");
|
|
status = CL_EMEM;
|
|
} else {
|
|
cli_bytecode_context_setctx(bc_ctx, &ctx);
|
|
status = cli_bytecode_runhook(&ctx, ctx.engine, bc_ctx, BC_PRECLASS, map);
|
|
cli_bytecode_context_destroy(bc_ctx);
|
|
}
|
|
|
|
/* backwards compatibility: scan the json string unless a virus was detected */
|
|
if (status != CL_VIRUS && (iroot->ac_lsigs || iroot->ac_patterns || iroot->pcre_metas)) {
|
|
cli_dbgmsg("scan_common: running deprecated preclass bytecodes for target type 13\n");
|
|
ctx.options->general &= ~CL_SCAN_GENERAL_COLLECT_METADATA;
|
|
status = cli_magic_scan_buff(jstring, strlen(jstring), &ctx, NULL, LAYER_ATTRIBUTES_NONE);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Invoke file props callback.
|
|
*/
|
|
if (ctx.engine->cb_file_props != NULL) {
|
|
ret = ctx.engine->cb_file_props(jstring, status, ctx.cb_ctx);
|
|
if (ret != CL_SUCCESS) {
|
|
status = ret;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Write the file properties metadata JSON to metadata.json if keeptmp is enabled and temp-dir recursion is enabled.
|
|
* At present, the `metadata.json` filename is hardcoded, and cannot be written to a directory containing temp files from other scans.
|
|
*/
|
|
if ((ctx.engine->keeptmp) &&
|
|
(ctx.engine->engine_options & ENGINE_OPTIONS_TMPDIR_RECURSION)) {
|
|
|
|
int fd = -1;
|
|
char *tmpname = NULL;
|
|
|
|
if ((ret = cli_newfilepathfd(ctx.this_layer_tmpdir, "metadata.json", &tmpname, &fd)) != CL_SUCCESS) {
|
|
cli_dbgmsg("scan_common: Can't create json properties file, ret = %i.\n", ret);
|
|
} else {
|
|
if ((size_t)-1 == cli_writen(fd, jstring, strlen(jstring))) {
|
|
cli_dbgmsg("scan_common: cli_writen error writing json properties file.\n");
|
|
} else {
|
|
cli_dbgmsg("json written to: %s\n", tmpname);
|
|
}
|
|
}
|
|
if (fd != -1) {
|
|
close(fd);
|
|
}
|
|
if (NULL != tmpname) {
|
|
free(tmpname);
|
|
}
|
|
}
|
|
}
|
|
|
|
// If any alerts occurred, set the output pointer to the "latest" alert signature name.
|
|
if (0 < evidence_num_alerts(ctx.this_layer_evidence)) {
|
|
*last_alert_out = cli_get_last_virus_str(&ctx);
|
|
}
|
|
|
|
*verdict_out = ctx.recursion_stack[ctx.recursion_level].verdict;
|
|
|
|
/*
|
|
* Report PUA alerts here.
|
|
*/
|
|
num_potentially_unwanted_indicators = evidence_num_indicators_type(
|
|
ctx.this_layer_evidence,
|
|
IndicatorType_PotentiallyUnwanted);
|
|
if (0 != num_potentially_unwanted_indicators) {
|
|
// We have "potentially unwanted" indicators that would not have been reported yet.
|
|
// We may wish to report them now, ... depending ....
|
|
|
|
if (ctx.options->general & CL_SCAN_GENERAL_ALLMATCHES) {
|
|
// We're in allmatch mode, so report all "potentially unwanted" matches now.
|
|
|
|
size_t i;
|
|
|
|
for (i = 0; i < num_potentially_unwanted_indicators; i++) {
|
|
const char *pua_alert = evidence_get_indicator(
|
|
ctx.this_layer_evidence,
|
|
IndicatorType_PotentiallyUnwanted,
|
|
i,
|
|
NULL, // Don't need to get the depth here.
|
|
NULL // Don't need to get the object ID here.
|
|
);
|
|
|
|
if (NULL != pua_alert) {
|
|
// We don't know exactly which layer the alert happened at.
|
|
// There's a decent chance it wasn't at this layer, and in that case we wouldn't
|
|
// even have access to that file anymore (it's gone!). So we'll pass back -1 for the
|
|
// file descriptor rather than using `cli_virus_found_cb() which would pass back
|
|
// The top level file descriptor.
|
|
if (ctx.engine->cb_virus_found) {
|
|
ctx.engine->cb_virus_found(
|
|
-1,
|
|
pua_alert,
|
|
ctx.cb_ctx);
|
|
}
|
|
}
|
|
}
|
|
|
|
} else {
|
|
// Not allmatch mode. Only want to report one thing...
|
|
if (0 == evidence_num_indicators_type(ctx.this_layer_evidence, IndicatorType_Strong)) {
|
|
// And it looks like we haven't reported anything else, so report the last "potentially unwanted" one.
|
|
// cli_get_last_virus() will do that, grabbing the last alerting indicator of any type.
|
|
cl_error_t callback_ret = CL_SUCCESS;
|
|
|
|
while ((CL_SUCCESS == callback_ret) &&
|
|
(0 < evidence_num_indicators_type(ctx.this_layer_evidence, IndicatorType_PotentiallyUnwanted))) {
|
|
callback_ret = cli_virus_found_cb(
|
|
&ctx,
|
|
cli_get_last_virus(&ctx),
|
|
IndicatorType_PotentiallyUnwanted);
|
|
// If the callback returned CL_SUCCESS then it will have also removed the indicator from evidence
|
|
// And we must loop around and report the next one.
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* If the caller requested a hash, we need to get it from the fmap.
|
|
*/
|
|
if (NULL != hash_out) {
|
|
// Allocate a buffer for the hash
|
|
size_t hash_len = cli_hash_len(requested_hash_type);
|
|
char *hash_string = malloc(hash_len * 2 + 1); // +1 for the null terminator
|
|
if (NULL == hash_string) {
|
|
cli_errmsg("scan_common: no memory for hash string buffer\n");
|
|
status = CL_EMEM;
|
|
} else {
|
|
// Get the hash from the fmap.
|
|
uint8_t *hash = NULL;
|
|
ret = fmap_get_hash(map, &hash, requested_hash_type);
|
|
if (CL_SUCCESS != ret || hash == NULL) {
|
|
cli_errmsg("scan_common: fmap_get_hash failed: %d\n", ret);
|
|
status = ret;
|
|
} else {
|
|
// Convert hash to string.
|
|
size_t i;
|
|
for (i = 0; i < hash_len; i++) {
|
|
sprintf(hash_string + i * 2, "%02x", hash[i]);
|
|
}
|
|
hash_string[hash_len * 2] = 0;
|
|
|
|
*hash_out = hash_string;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* If the caller requested a file type, we need to get it from the fmap.
|
|
*/
|
|
if (NULL != file_type_out) {
|
|
const char *ftname = cli_ftname(ctx.recursion_stack[ctx.recursion_level].type);
|
|
if ((NULL == ftname) ||
|
|
(strcmp(ftname, "CL_TYPE_ANY") == 0)) {
|
|
cli_dbgmsg("scan_common: unknown file type.\n");
|
|
// Default to CL_TYPE_BINARY_DATA if we never determined the type.
|
|
*file_type_out = cli_safer_strdup("CL_TYPE_BINARY_DATA");
|
|
} else {
|
|
// Set the output pointer to the file type name.
|
|
*file_type_out = cli_safer_strdup(ftname);
|
|
}
|
|
}
|
|
|
|
done:
|
|
|
|
if (logg_initialized) {
|
|
cli_logg_unsetup();
|
|
}
|
|
|
|
if (NULL != ctx.metadata_json) {
|
|
cli_json_delobj(ctx.metadata_json);
|
|
}
|
|
|
|
if ((NULL != ctx.engine) &&
|
|
(ctx.engine->engine_options & ENGINE_OPTIONS_TMPDIR_RECURSION) &&
|
|
(NULL != ctx.this_layer_tmpdir)) {
|
|
|
|
if (!ctx.engine->keeptmp) {
|
|
(void)cli_rmdirs(ctx.this_layer_tmpdir);
|
|
}
|
|
free(ctx.this_layer_tmpdir);
|
|
} else {
|
|
// If we didn't create a temp directory, we don't need to free it,
|
|
// and have to trust that all temp files were cleaned up by their respective modules.
|
|
}
|
|
|
|
if (NULL != target_basename) {
|
|
free(target_basename);
|
|
}
|
|
|
|
if (NULL != ctx.target_filepath) {
|
|
free(ctx.target_filepath);
|
|
}
|
|
|
|
if (NULL != ctx.perf) {
|
|
perf_done(&ctx);
|
|
}
|
|
|
|
if (NULL != ctx.hook_lsig_matches) {
|
|
cli_bitset_free(ctx.hook_lsig_matches);
|
|
}
|
|
|
|
if (NULL != ctx.recursion_stack) {
|
|
if (NULL != ctx.recursion_stack[ctx.recursion_level].evidence) {
|
|
evidence_free(ctx.recursion_stack[ctx.recursion_level].evidence);
|
|
}
|
|
|
|
free(ctx.recursion_stack);
|
|
}
|
|
|
|
if (NULL != ctx.options) {
|
|
free(ctx.options);
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
cl_error_t cl_scandesc(
|
|
int desc,
|
|
const char *filename,
|
|
const char **virname,
|
|
unsigned long int *scanned,
|
|
const struct cl_engine *engine,
|
|
struct cl_scan_options *scanoptions)
|
|
{
|
|
cl_error_t status;
|
|
uint64_t scanned_out;
|
|
cl_verdict_t verdict_out = CL_VERDICT_NOTHING_FOUND;
|
|
|
|
status = cl_scandesc_ex(
|
|
desc,
|
|
filename,
|
|
&verdict_out,
|
|
virname,
|
|
&scanned_out,
|
|
engine,
|
|
scanoptions,
|
|
NULL, // void *context,
|
|
NULL, // const char *hash_hint,
|
|
NULL, // char **hash_out,
|
|
NULL, // const char *hash_alg,
|
|
NULL, // const char *file_type_hint,
|
|
NULL); // char **file_type_out);
|
|
|
|
if (NULL != scanned) {
|
|
if ((SIZEOF_LONG == 4) &&
|
|
(scanned_out / CL_COUNT_PRECISION > UINT32_MAX)) {
|
|
cli_warnmsg("cl_scanfile_callback: scanned_out exceeds UINT32_MAX, setting to UINT32_MAX\n");
|
|
*scanned = UINT32_MAX;
|
|
} else {
|
|
*scanned = (unsigned long int)(scanned_out / CL_COUNT_PRECISION);
|
|
}
|
|
}
|
|
|
|
if (verdict_out == CL_VERDICT_STRONG_INDICATOR || verdict_out == CL_VERDICT_POTENTIALLY_UNWANTED) {
|
|
// Reporting "CL_VIRUS" is more important than reporting an error,
|
|
// because... unfortunately we can only do one with this API.
|
|
status = CL_VIRUS;
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
cl_error_t cl_scandesc_callback(
|
|
int desc,
|
|
const char *filename,
|
|
const char **virname,
|
|
unsigned long int *scanned,
|
|
const struct cl_engine *engine,
|
|
struct cl_scan_options *scanoptions,
|
|
void *context)
|
|
{
|
|
cl_error_t status;
|
|
uint64_t scanned_bytes;
|
|
cl_verdict_t verdict_out = CL_VERDICT_NOTHING_FOUND;
|
|
|
|
status = cl_scandesc_ex(
|
|
desc,
|
|
filename,
|
|
&verdict_out,
|
|
virname,
|
|
&scanned_bytes,
|
|
engine,
|
|
scanoptions,
|
|
context,
|
|
NULL, // const char *hash_hint,
|
|
NULL, // char **hash_out,
|
|
NULL, // const char *hash_alg,
|
|
NULL, // const char *file_type_hint,
|
|
NULL); // char **file_type_out);
|
|
|
|
if (NULL != scanned) {
|
|
if ((SIZEOF_LONG == 4) &&
|
|
(scanned_bytes / CL_COUNT_PRECISION > UINT32_MAX)) {
|
|
cli_warnmsg("cl_scanfile_callback: scanned_bytes exceeds UINT32_MAX, setting to UINT32_MAX\n");
|
|
*scanned = UINT32_MAX;
|
|
} else {
|
|
*scanned = (unsigned long int)(scanned_bytes / CL_COUNT_PRECISION);
|
|
}
|
|
}
|
|
|
|
if (verdict_out == CL_VERDICT_STRONG_INDICATOR || verdict_out == CL_VERDICT_POTENTIALLY_UNWANTED) {
|
|
// Reporting "CL_VIRUS" is more important than reporting an error,
|
|
// because... unfortunately we can only do one with this API.
|
|
status = CL_VIRUS;
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
cl_error_t cl_scandesc_ex(
|
|
int desc,
|
|
const char *filename,
|
|
cl_verdict_t *verdict_out,
|
|
const char **last_alert_out,
|
|
uint64_t *scanned_out,
|
|
const struct cl_engine *engine,
|
|
struct cl_scan_options *scanoptions,
|
|
void *context,
|
|
const char *hash_hint,
|
|
char **hash_out,
|
|
const char *hash_alg,
|
|
const char *file_type_hint,
|
|
char **file_type_out)
|
|
{
|
|
cl_error_t status = CL_SUCCESS;
|
|
cl_fmap_t *map = NULL;
|
|
STATBUF sb;
|
|
char *filename_base = NULL;
|
|
|
|
if (FSTAT(desc, &sb) == -1) {
|
|
cli_errmsg("cl_scandesc_callback: Can't fstat descriptor %d\n", desc);
|
|
status = CL_ESTAT;
|
|
goto done;
|
|
}
|
|
if (sb.st_size <= 5) {
|
|
cli_dbgmsg("cl_scandesc_callback: File too small (" STDu64 " bytes), ignoring\n", (uint64_t)sb.st_size);
|
|
status = CL_SUCCESS;
|
|
goto done;
|
|
}
|
|
if ((engine->maxfilesize > 0) && ((uint64_t)sb.st_size > engine->maxfilesize)) {
|
|
cli_dbgmsg("cl_scandesc_callback: File too large (" STDu64 " bytes), ignoring\n", (uint64_t)sb.st_size);
|
|
if (scanoptions->heuristic & CL_SCAN_HEURISTIC_EXCEEDS_MAX) {
|
|
if (engine->cb_virus_found) {
|
|
engine->cb_virus_found(desc, "Heuristics.Limits.Exceeded.MaxFileSize", context);
|
|
if (last_alert_out) {
|
|
*last_alert_out = "Heuristics.Limits.Exceeded.MaxFileSize";
|
|
}
|
|
}
|
|
status = CL_VIRUS;
|
|
} else {
|
|
status = CL_SUCCESS;
|
|
}
|
|
goto done;
|
|
}
|
|
|
|
if (NULL != filename) {
|
|
(void)cli_basename(filename, strlen(filename), &filename_base, true /* posix_support_backslash_pathsep */);
|
|
}
|
|
|
|
if (NULL == (map = fmap_new(desc, 0, sb.st_size, filename_base, filename))) {
|
|
cli_errmsg("CRITICAL: fmap_new() failed\n");
|
|
status = CL_EMEM;
|
|
goto done;
|
|
}
|
|
|
|
status = scan_common(
|
|
map,
|
|
filename,
|
|
verdict_out,
|
|
last_alert_out,
|
|
scanned_out,
|
|
engine,
|
|
scanoptions,
|
|
context,
|
|
hash_hint,
|
|
hash_out,
|
|
hash_alg,
|
|
file_type_hint,
|
|
file_type_out);
|
|
|
|
done:
|
|
if (NULL != map) {
|
|
fmap_free(map);
|
|
}
|
|
if (NULL != filename_base) {
|
|
free(filename_base);
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
cl_error_t cl_scanmap_callback(
|
|
cl_fmap_t *map,
|
|
const char *filename,
|
|
const char **virname,
|
|
unsigned long int *scanned,
|
|
const struct cl_engine *engine,
|
|
struct cl_scan_options *scanoptions,
|
|
void *context)
|
|
{
|
|
cl_error_t status;
|
|
uint64_t scanned_bytes;
|
|
cl_verdict_t verdict_out = CL_VERDICT_NOTHING_FOUND;
|
|
|
|
status = cl_scanmap_ex(
|
|
map,
|
|
filename,
|
|
&verdict_out,
|
|
virname,
|
|
&scanned_bytes,
|
|
engine,
|
|
scanoptions,
|
|
context,
|
|
NULL,
|
|
NULL,
|
|
NULL,
|
|
NULL,
|
|
NULL);
|
|
|
|
if (NULL != scanned) {
|
|
if ((SIZEOF_LONG == 4) &&
|
|
(scanned_bytes / CL_COUNT_PRECISION > UINT32_MAX)) {
|
|
cli_warnmsg("cl_scanfile_callback: scanned_bytes exceeds UINT32_MAX, setting to UINT32_MAX\n");
|
|
*scanned = UINT32_MAX;
|
|
} else {
|
|
*scanned = (unsigned long int)(scanned_bytes / CL_COUNT_PRECISION);
|
|
}
|
|
}
|
|
|
|
if (verdict_out == CL_VERDICT_STRONG_INDICATOR || verdict_out == CL_VERDICT_POTENTIALLY_UNWANTED) {
|
|
// Reporting "CL_VIRUS" is more important than reporting an error,
|
|
// because... unfortunately we can only do one with this API.
|
|
status = CL_VIRUS;
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
cl_error_t cl_scanmap_ex(
|
|
cl_fmap_t *map,
|
|
const char *filename,
|
|
cl_verdict_t *verdict_out,
|
|
const char **last_alert_out,
|
|
uint64_t *scanned_out,
|
|
const struct cl_engine *engine,
|
|
struct cl_scan_options *scanoptions,
|
|
void *context,
|
|
const char *hash_hint,
|
|
char **hash_out,
|
|
const char *hash_alg,
|
|
const char *file_type_hint,
|
|
char **file_type_out)
|
|
{
|
|
if ((engine->maxfilesize > 0) && (map->len > engine->maxfilesize)) {
|
|
cli_dbgmsg("cl_scandesc_callback: File too large (%zu bytes), ignoring\n", map->len);
|
|
if (scanoptions->heuristic & CL_SCAN_HEURISTIC_EXCEEDS_MAX) {
|
|
if (engine->cb_virus_found) {
|
|
engine->cb_virus_found(fmap_fd(map), "Heuristics.Limits.Exceeded.MaxFileSize", context);
|
|
if (last_alert_out) {
|
|
*last_alert_out = "Heuristics.Limits.Exceeded.MaxFileSize";
|
|
}
|
|
}
|
|
return CL_VIRUS;
|
|
}
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
if (NULL != filename && map->name == NULL) {
|
|
// Use the provided name for the fmap name if one wasn't already set.
|
|
(void)cli_basename(filename, strlen(filename), &map->name, true /* posix_support_backslash_pathsep */);
|
|
}
|
|
|
|
return scan_common(
|
|
map,
|
|
filename,
|
|
verdict_out,
|
|
last_alert_out,
|
|
scanned_out,
|
|
engine,
|
|
scanoptions,
|
|
context,
|
|
hash_hint,
|
|
hash_out,
|
|
hash_alg,
|
|
file_type_hint,
|
|
file_type_out);
|
|
}
|
|
|
|
cl_error_t cli_magic_scan_file(const char *filename, cli_ctx *ctx, const char *original_name, uint32_t attributes)
|
|
{
|
|
int fd = -1;
|
|
cl_error_t ret = CL_EOPEN;
|
|
|
|
/* internal version of cl_scanfile with arec/mrec preserved */
|
|
fd = safe_open(filename, O_RDONLY | O_BINARY);
|
|
if (fd < 0) {
|
|
goto done;
|
|
}
|
|
|
|
ret = cli_magic_scan_desc(fd, filename, ctx, original_name, attributes);
|
|
|
|
done:
|
|
if (fd >= 0) {
|
|
close(fd);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
cl_error_t cl_scanfile(
|
|
const char *filename,
|
|
const char **virname,
|
|
unsigned long int *scanned,
|
|
const struct cl_engine *engine,
|
|
struct cl_scan_options *scanoptions)
|
|
{
|
|
cl_error_t status;
|
|
uint64_t scanned_bytes;
|
|
cl_verdict_t verdict_out = CL_VERDICT_NOTHING_FOUND;
|
|
|
|
status = cl_scanfile_ex(
|
|
filename,
|
|
&verdict_out,
|
|
virname,
|
|
&scanned_bytes,
|
|
engine,
|
|
scanoptions,
|
|
NULL,
|
|
NULL,
|
|
NULL,
|
|
NULL,
|
|
NULL,
|
|
NULL);
|
|
|
|
if (NULL != scanned) {
|
|
if (SIZEOF_LONG == 4 && scanned_bytes > UINT32_MAX) {
|
|
cli_warnmsg("cl_scanfile_callback: scanned_bytes exceeds UINT32_MAX, setting to UINT32_MAX\n");
|
|
*scanned = UINT32_MAX;
|
|
} else {
|
|
*scanned = (unsigned long int)scanned_bytes;
|
|
}
|
|
}
|
|
|
|
if (verdict_out == CL_VERDICT_STRONG_INDICATOR || verdict_out == CL_VERDICT_POTENTIALLY_UNWANTED) {
|
|
// Reporting "CL_VIRUS" is more important than reporting an error,
|
|
// because... unfortunately we can only do one with this API.
|
|
status = CL_VIRUS;
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
cl_error_t cl_scanfile_callback(
|
|
const char *filename,
|
|
const char **virname,
|
|
unsigned long int *scanned,
|
|
const struct cl_engine *engine,
|
|
struct cl_scan_options *scanoptions,
|
|
void *context)
|
|
{
|
|
cl_error_t status;
|
|
uint64_t scanned_out;
|
|
cl_verdict_t verdict_out = CL_VERDICT_NOTHING_FOUND;
|
|
|
|
status = cl_scanfile_ex(
|
|
filename,
|
|
&verdict_out,
|
|
virname,
|
|
&scanned_out,
|
|
engine,
|
|
scanoptions,
|
|
context,
|
|
NULL,
|
|
NULL,
|
|
NULL,
|
|
NULL,
|
|
NULL);
|
|
|
|
if (NULL != scanned) {
|
|
if (SIZEOF_LONG == 4 && scanned_out > UINT32_MAX) {
|
|
cli_warnmsg("cl_scanfile_callback: scanned_out exceeds UINT32_MAX, setting to UINT32_MAX\n");
|
|
*scanned = UINT32_MAX;
|
|
} else {
|
|
*scanned = (unsigned long int)scanned_out;
|
|
}
|
|
}
|
|
|
|
if (verdict_out == CL_VERDICT_STRONG_INDICATOR || verdict_out == CL_VERDICT_POTENTIALLY_UNWANTED) {
|
|
// Reporting "CL_VIRUS" is more important than reporting an error,
|
|
// because... unfortunately we can only do one with this API.
|
|
status = CL_VIRUS;
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
cl_error_t cl_scanfile_ex(
|
|
const char *filename,
|
|
cl_verdict_t *verdict_out,
|
|
const char **last_alert_out,
|
|
uint64_t *scanned_out,
|
|
const struct cl_engine *engine,
|
|
struct cl_scan_options *scanoptions,
|
|
void *context,
|
|
const char *hash_hint,
|
|
char **hash_out,
|
|
const char *hash_alg,
|
|
const char *file_type_hint,
|
|
char **file_type_out)
|
|
{
|
|
int fd;
|
|
cl_error_t ret;
|
|
const char *fname = cli_to_utf8_maybe_alloc(filename);
|
|
|
|
if (!fname)
|
|
return CL_EARG;
|
|
|
|
if ((fd = safe_open(fname, O_RDONLY | O_BINARY)) == -1) {
|
|
if (errno == EACCES) {
|
|
return CL_EACCES;
|
|
} else {
|
|
return CL_EOPEN;
|
|
}
|
|
}
|
|
|
|
if (fname != filename)
|
|
free((char *)fname);
|
|
|
|
ret = cl_scandesc_ex(
|
|
fd,
|
|
filename,
|
|
verdict_out,
|
|
last_alert_out,
|
|
scanned_out,
|
|
engine,
|
|
scanoptions,
|
|
context,
|
|
hash_hint,
|
|
hash_out,
|
|
hash_alg,
|
|
file_type_hint,
|
|
file_type_out);
|
|
|
|
close(fd);
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
Local Variables:
|
|
c-basic-offset: 4
|
|
End:
|
|
*/
|