clamav/libclamav/bytecode_api.c

2344 lines
70 KiB
C
Raw Normal View History

2009-08-20 16:23:43 +03:00
/*
* ClamAV bytecode internal API
*
* Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
* Copyright (C) 2009-2013 Sourcefire, Inc.
2009-08-20 16:23:43 +03:00
*
2009-09-04 16:24:52 +03:00
* Authors: Török Edvin
*
2009-08-20 16:23:43 +03:00
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
2009-12-11 20:57:41 +02:00
#ifdef HAVE_CONFIG_H
#include "clamav-config.h"
#endif
#ifdef HAVE_UNISTD_H
2009-09-07 18:01:43 +03:00
#include <unistd.h>
2009-12-11 20:57:41 +02:00
#endif
2009-09-04 17:29:13 +03:00
#include <stdlib.h>
#include <fcntl.h>
#include <errno.h>
2010-01-20 16:19:18 +02:00
#include <string.h>
2010-05-12 18:26:02 +03:00
#include <math.h>
2010-05-13 00:27:00 +03:00
#include <ctype.h>
2014-02-08 00:31:12 -05:00
Add CMake build tooling This patch adds experimental-quality CMake build tooling. The libmspack build required a modification to use "" instead of <> for header #includes. This will hopefully be included in the libmspack upstream project when adding CMake build tooling to libmspack. Removed use of libltdl when using CMake. Flex & Bison are now required to build. If -DMAINTAINER_MODE, then GPERF is also required, though it currently doesn't actually do anything. TODO! I found that the autotools build system was generating the lexer output but not actually compiling it, instead using previously generated (and manually renamed) lexer c source. As a consequence, changes to the .l and .y files weren't making it into the build. To resolve this, I removed generated flex/bison files and fixed the tooling to use the freshly generated files. Flex and bison are now required build tools. On Windows, this adds a dependency on the winflexbison package, which can be obtained using Chocolatey or may be manually installed. CMake tooling only has partial support for building with external LLVM library, and no support for the internal LLVM (to be removed in the future). I.e. The CMake build currently only supports the bytecode interpreter. Many files used include paths relative to the top source directory or relative to the current project, rather than relative to each build target. Modern CMake support requires including internal dependency headers the same way you would external dependency headers (albeit with "" instead of <>). This meant correcting all header includes to be relative to the build targets and not relative to the workspace. For example, ... ```c include "../libclamav/clamav.h" include "clamd/clamd_others.h" ``` ... becomes: ```c // libclamav include "clamav.h" // clamd include "clamd_others.h" ``` Fixes header name conflicts by renaming a few of the files. Converted the "shared" code into a static library, which depends on libclamav. The ironically named "shared" static library provides features common to the ClamAV apps which are not required in libclamav itself and are not intended for use by downstream projects. This change was required for correct modern CMake practices but was also required to use the automake "subdir-objects" option. This eliminates warnings when running autoreconf which, in the next version of autoconf & automake are likely to break the build. libclamav used to build in multiple stages where an earlier stage is a static library containing utils required by the "shared" code. Linking clamdscan and clamdtop with this libclamav utils static lib allowed these two apps to function without libclamav. While this is nice in theory, the practical gains are minimal and it complicates the build system. As such, the autotools and CMake tooling was simplified for improved maintainability and this feature was thrown out. clamdtop and clamdscan now require libclamav to function. Removed the nopthreads version of the autotools libclamav_internal_utils static library and added pthread linking to a couple apps that may have issues building on some platforms without it, with the intention of removing needless complexity from the source. Kept the regular version of libclamav_internal_utils.la though it is no longer used anywhere but in libclamav. Added an experimental doxygen build option which attempts to build clamav.h and libfreshclam doxygen html docs. The CMake build tooling also may build the example program(s), which isn't a feature in the Autotools build system. Changed C standard to C90+ due to inline linking issues with socket.h when linking libfreshclam.so on Linux. Generate common.rc for win32. Fix tabs/spaces in shared Makefile.am, and remove vestigial ifndef from misc.c. Add CMake files to the automake dist, so users can try the new CMake tooling w/out having to build from a git clone. clamonacc changes: - Renamed FANOTIFY macro to HAVE_SYS_FANOTIFY_H to better match other similar macros. - Added a new clamav-clamonacc.service systemd unit file, based on the work of ChadDevOps & Aaron Brighton. - Added missing clamonacc man page. Updates to clamdscan man page, add missing options. Remove vestigial CL_NOLIBCLAMAV definitions (all apps now use libclamav). Rename Windows mspack.dll to libmspack.dll so all ClamAV-built libraries have the lib-prefix with Visual Studio as with CMake.
2020-08-13 00:25:34 -07:00
#if HAVE_JSON
#include <json.h>
#endif
#if HAVE_BZLIB_H
#include <bzlib.h>
#endif
2014-07-01 19:38:01 -04:00
#include "clamav.h"
2009-09-04 17:29:13 +03:00
#include "clambc.h"
#include "bytecode.h"
2009-09-04 17:29:13 +03:00
#include "bytecode_priv.h"
2009-08-20 16:23:43 +03:00
#include "type_desc.h"
#include "bytecode_api.h"
2009-09-04 16:24:52 +03:00
#include "bytecode_api_impl.h"
2009-09-11 15:12:17 +03:00
#include "others.h"
2010-01-18 19:31:59 +02:00
#include "pe.h"
2010-08-02 15:42:58 +03:00
#include "pdf.h"
2010-01-20 17:16:27 +02:00
#include "disasm.h"
2010-03-22 13:00:23 +02:00
#include "scanners.h"
2010-03-31 10:53:11 +03:00
#include "jsparse/js-norm.h"
2010-05-12 18:26:02 +03:00
#include "hashtab.h"
#include "str.h"
#include "filetypes.h"
#include "lzma_iface.h"
2009-08-20 16:23:43 +03:00
#define EV ctx->bc_events
#define STRINGIFY(x) #x
#define TOSTRING(x) STRINGIFY(x)
#define API_MISUSE() cli_event_error_str(EV, "API misuse @" TOSTRING(__LINE__))
struct bc_lzma {
struct CLI_LZMA stream;
int32_t from;
int32_t to;
};
#if HAVE_BZLIB_H
struct bc_bzip2 {
bz_stream stream;
int32_t from;
int32_t to;
};
#endif
2009-09-04 16:24:52 +03:00
uint32_t cli_bcapi_test1(struct cli_bc_ctx *ctx, uint32_t a, uint32_t b)
2009-08-20 16:23:43 +03:00
{
UNUSEDPARAM(ctx);
return (a == 0xf00dbeef && b == 0xbeeff00d) ? 0x12345678 : 0x55;
2009-08-20 16:23:43 +03:00
}
2009-09-04 17:29:13 +03:00
uint32_t cli_bcapi_test2(struct cli_bc_ctx *ctx, uint32_t a)
{
UNUSEDPARAM(ctx);
return a == 0xf00d ? 0xd00f : 0x5555;
}
int32_t cli_bcapi_read(struct cli_bc_ctx *ctx, uint8_t *data, int32_t size)
2009-09-04 17:29:13 +03:00
{
size_t n;
if (!ctx->fmap) {
API_MISUSE();
return -1;
}
2010-03-19 13:20:59 +02:00
if (size < 0 || size > CLI_MAX_ALLOCATION) {
cli_warnmsg("bytecode: negative read size: %d\n", size);
API_MISUSE();
return -1;
2010-01-21 16:48:56 +02:00
}
n = fmap_readn(ctx->fmap, data, ctx->off, size);
if ((n == 0) || (n == (size_t)-1)) {
cli_dbgmsg("bcapi_read: fmap_readn failed (requested %d)\n", size);
cli_event_count(EV, BCEV_READ_ERR);
return (int32_t)n;
2010-03-24 17:55:04 +02:00
}
cli_event_int(EV, BCEV_OFFSET, ctx->off);
cli_event_fastdata(EV, BCEV_READ, data, size);
// cli_event_data(EV, BCEV_READ, data, n);
2010-01-21 16:48:56 +02:00
ctx->off += n;
return (int32_t)n;
2009-09-04 17:29:13 +03:00
}
int32_t cli_bcapi_seek(struct cli_bc_ctx *ctx, int32_t pos, uint32_t whence)
2009-09-04 17:29:13 +03:00
{
off_t off;
2010-03-24 17:55:04 +02:00
if (!ctx->fmap) {
cli_dbgmsg("bcapi_seek: no fmap\n");
API_MISUSE();
return -1;
2010-03-24 17:55:04 +02:00
}
2009-09-04 17:29:13 +03:00
switch (whence) {
case 0:
off = pos;
break;
case 1:
off = ctx->off + pos;
break;
case 2:
off = ctx->file_size + pos;
break;
default:
API_MISUSE();
cli_dbgmsg("bcapi_seek: invalid whence value\n");
return -1;
2009-09-04 17:29:13 +03:00
}
2010-03-24 17:55:04 +02:00
if (off < 0 || off > ctx->file_size) {
cli_dbgmsg("bcapi_seek: out of file: %lld (max %d)\n",
(long long)off, ctx->file_size);
return -1;
2010-03-24 17:55:04 +02:00
}
cli_event_int(EV, BCEV_OFFSET, off);
2009-09-04 17:29:13 +03:00
ctx->off = off;
return off;
}
2009-09-11 15:12:17 +03:00
2009-09-21 18:48:43 +03:00
uint32_t cli_bcapi_debug_print_str(struct cli_bc_ctx *ctx, const uint8_t *str, uint32_t len)
2009-09-11 15:12:17 +03:00
{
UNUSEDPARAM(len);
cli_event_fastdata(EV, BCEV_DBG_STR, str, strlen((const char *)str));
2009-09-11 15:12:17 +03:00
cli_dbgmsg("bytecode debug: %s\n", str);
return 0;
}
uint32_t cli_bcapi_debug_print_uint(struct cli_bc_ctx *ctx, uint32_t a)
2009-09-11 15:12:17 +03:00
{
cli_event_int(EV, BCEV_DBG_INT, a);
// cli_dbgmsg("bytecode debug: %d\n", a);
// return 0;
2010-05-13 20:02:02 +03:00
if (!cli_debug_flag)
return 0;
return cli_eprintf("%d", a);
2009-09-11 15:12:17 +03:00
}
2009-09-22 11:03:17 +03:00
/*TODO: compiler should make sure that only constants are passed here, and not
* pointers to arbitrary locations that may not be valid when bytecode finishes
* executing */
uint32_t cli_bcapi_setvirusname(struct cli_bc_ctx *ctx, const uint8_t *name, uint32_t len)
2009-09-22 11:03:17 +03:00
{
UNUSEDPARAM(len);
ctx->virname = (const char *)name;
2009-10-02 12:27:52 +03:00
return 0;
2009-09-22 11:03:17 +03:00
}
2009-11-06 16:34:46 +02:00
uint32_t cli_bcapi_disasm_x86(struct cli_bc_ctx *ctx, struct DISASM_RESULT *res, uint32_t len)
{
2010-01-20 17:16:27 +02:00
int n;
const unsigned char *buf;
const unsigned char *next;
UNUSEDPARAM(len);
if (!res || !ctx->fmap || (size_t)(ctx->off) >= ctx->fmap->len) {
API_MISUSE();
return -1;
}
2010-01-20 18:20:53 +02:00
/* 32 should be longest instr we support decoding.
* When we'll support mmx/sse instructions this should be updated! */
n = MIN(32, ctx->fmap->len - ctx->off);
2010-01-20 17:16:27 +02:00
buf = fmap_need_off_once(ctx->fmap, ctx->off, n);
2013-02-13 14:21:37 -05:00
if (buf)
next = cli_disasm_one(buf, n, res, 0);
else
next = NULL;
2010-03-24 17:55:04 +02:00
if (!next) {
cli_dbgmsg("bcapi_disasm: failed\n");
cli_event_count(EV, BCEV_DISASM_FAIL);
return -1;
2010-03-24 17:55:04 +02:00
}
2010-01-20 17:16:27 +02:00
return ctx->off + next - buf;
2009-11-06 16:34:46 +02:00
}
/* TODO: field in ctx, id of last bytecode that called magicscandesc, reset
* after hooks/other bytecodes are run. TODO: need a more generic solution
* to avoid uselessly recursing on bytecode-unpacked files, but also a way to
* override the limit if we need it in a special situation */
int32_t cli_bcapi_write(struct cli_bc_ctx *ctx, uint8_t *data, int32_t len)
2009-11-06 16:34:46 +02:00
{
char err[128];
size_t res;
cli_ctx *cctx = (cli_ctx *)ctx->ctx;
if (len < 0) {
cli_warnmsg("Bytecode API: called with negative length!\n");
API_MISUSE();
return -1;
}
2010-03-19 15:47:26 +02:00
if (!ctx->outfd) {
ctx->tempfile = cli_gentemp(cctx ? cctx->engine->tmpdir : NULL);
if (!ctx->tempfile) {
cli_dbgmsg("Bytecode API: Unable to allocate memory for tempfile\n");
cli_event_error_oom(EV, 0);
return -1;
}
ctx->outfd = open(ctx->tempfile, O_RDWR | O_CREAT | O_EXCL | O_TRUNC | O_BINARY, 0600);
if (ctx->outfd == -1) {
ctx->outfd = 0;
cli_warnmsg("Bytecode API: Can't create file %s: %s\n", ctx->tempfile, cli_strerror(errno, err, sizeof(err)));
cli_event_error_str(EV, "cli_bcapi_write: Can't create temporary file");
free(ctx->tempfile);
return -1;
}
cli_dbgmsg("bytecode opened new tempfile: %s\n", ctx->tempfile);
}
cli_event_fastdata(ctx->bc_events, BCEV_WRITE, data, len);
if (cli_checklimits("bytecode api", cctx, ctx->written + len, 0, 0))
return -1;
res = cli_writen(ctx->outfd, data, (size_t)len);
if (res > 0) ctx->written += res;
if (res == (size_t)-1) {
cli_warnmsg("Bytecode API: write failed: %s\n", cli_strerror(errno, err, sizeof(err)));
cli_event_error_str(EV, "cli_bcapi_write: write failed");
}
return (int32_t)res;
2009-11-06 16:34:46 +02:00
}
void cli_bytecode_context_set_trace(struct cli_bc_ctx *ctx, unsigned level,
bc_dbg_callback_trace trace,
bc_dbg_callback_trace_op trace_op,
bc_dbg_callback_trace_val trace_val,
bc_dbg_callback_trace_ptr trace_ptr)
{
ctx->trace = trace;
ctx->trace_op = trace_op;
ctx->trace_val = trace_val;
ctx->trace_ptr = trace_ptr;
ctx->trace_level = level;
}
uint32_t cli_bcapi_trace_scope(struct cli_bc_ctx *ctx, const uint8_t *scope, uint32_t scopeid)
{
if (LIKELY(!ctx->trace_level))
return 0;
if (ctx->scope != (const char *)scope) {
ctx->scope = (const char *)scope ? (const char *)scope : "?";
ctx->scopeid = scopeid;
ctx->trace_level |= 0x80; /* temporarily increase level to print params */
} else if ((ctx->trace_level >= trace_scope) && ctx->scopeid != scopeid) {
ctx->scopeid = scopeid;
ctx->trace_level |= 0x40; /* temporarily increase level to print location */
}
return 0;
}
uint32_t cli_bcapi_trace_directory(struct cli_bc_ctx *ctx, const uint8_t *dir, uint32_t dummy)
{
UNUSEDPARAM(dummy);
if (LIKELY(!ctx->trace_level))
return 0;
ctx->directory = (const char *)dir ? (const char *)dir : "";
return 0;
}
uint32_t cli_bcapi_trace_source(struct cli_bc_ctx *ctx, const uint8_t *file, uint32_t line)
{
if (LIKELY(ctx->trace_level < trace_line))
return 0;
if (ctx->file != (const char *)file || ctx->line != line) {
ctx->col = 0;
ctx->file = (const char *)file ? (const char *)file : "??";
ctx->line = line;
}
return 0;
}
uint32_t cli_bcapi_trace_op(struct cli_bc_ctx *ctx, const uint8_t *op, uint32_t col)
{
if (LIKELY(ctx->trace_level < trace_col))
return 0;
if (ctx->trace_level & 0xc0) {
ctx->col = col;
/* func/scope changed and they needed param/location event */
ctx->trace(ctx, (ctx->trace_level & 0x80) ? trace_func : trace_scope);
ctx->trace_level &= ~0xc0;
2009-12-09 11:41:20 +02:00
}
if (LIKELY(ctx->trace_level < trace_col))
return 0;
if (ctx->col != col) {
ctx->col = col;
ctx->trace(ctx, trace_col);
} else {
ctx->trace(ctx, trace_line);
}
if (LIKELY(ctx->trace_level < trace_op))
return 0;
if (ctx->trace_op && op)
ctx->trace_op(ctx, (const char *)op);
return 0;
}
2009-11-06 16:34:46 +02:00
uint32_t cli_bcapi_trace_value(struct cli_bc_ctx *ctx, const uint8_t *name, uint32_t value)
{
if (LIKELY(ctx->trace_level < trace_val))
return 0;
if (ctx->trace_level & 0x80) {
if ((ctx->trace_level & 0x7f) < trace_param)
return 0;
ctx->trace(ctx, trace_param);
}
if (ctx->trace_val && name)
ctx->trace_val(ctx, (const char *)name, value);
return 0;
}
uint32_t cli_bcapi_trace_ptr(struct cli_bc_ctx *ctx, const uint8_t *ptr, uint32_t dummy)
{
UNUSEDPARAM(dummy);
if (LIKELY(ctx->trace_level < trace_val))
return 0;
if (ctx->trace_level & 0x80) {
if ((ctx->trace_level & 0x7f) < trace_param)
return 0;
ctx->trace(ctx, trace_param);
}
if (ctx->trace_ptr)
ctx->trace_ptr(ctx, ptr);
return 0;
}
2010-01-18 19:31:59 +02:00
uint32_t cli_bcapi_pe_rawaddr(struct cli_bc_ctx *ctx, uint32_t rva)
2010-01-18 19:31:59 +02:00
{
uint32_t ret;
unsigned err = 0;
const struct cli_pe_hook_data *pe = ctx->hooks.pedata;
Code cleanup: Refactor to clean up formatting issues Refactored the clamscan code that determines 'what to scan' in order to clean up some very messy logic and also to get around a difference in how vscode and clang-format handle formatting #ifdef blocks in the middle of an else/if. In addition to refactoring, there is a slight behavior improvement. With this change, doing `clamscan blah -` will now scan `blah` and then also scan `stdin`. You can even do `clamscan - blah` to now scan `stdin` and then scan `blah`. Before, The `-` had to be the only "filename" argument in order to scan from stdin. In addition, added a bunch of extra empty lines or changing multi-line function calls to single-line function calls in order to get around a bug in clang-format with these two options do not playing nice together: - AlignConsecutiveAssignments: true - AlignAfterOpenBracket: true AlignAfterOpenBracket is not taking account the spaces inserted by AlignConsecutiveAssignments, so you end up with stuff like this: ```c bleeblah = 1; blah = function(arg1, arg2, arg3); // ^--- these args 4-left from where they should be. ``` VSCode, meanwhile, somehow fixes this whitespace issue so code that is correctly formatted by VSCode doesn't have this bug, meaning that: 1. The clang-format check in GH Actions fails. 2. We'd all have to stop using format-on-save in VSCode and accept the bug if we wanted those GH Actions tests to pass. Adding an empty line before variable assignments from multi-line function calls evades the buggy behavior. This commit should resolve the clang-format github action test failures, for now.
2022-03-10 20:55:13 -08:00
ret = cli_rawaddr(rva, ctx->sections, pe->nsections, &err,
ctx->file_size, pe->hdr_size);
if (err) {
cli_dbgmsg("bcapi_pe_rawaddr invalid rva: %u\n", rva);
return PE_INVALID_RVA;
}
return ret;
2010-01-18 19:31:59 +02:00
}
2010-01-20 16:19:18 +02:00
static inline const char *cli_memmem(const char *haystack, unsigned hlen,
const unsigned char *needle, unsigned nlen)
2010-01-20 16:19:18 +02:00
{
const char *p;
unsigned char c;
if (!needle || !haystack) {
return NULL;
}
2010-01-20 16:19:18 +02:00
c = *needle++;
if (nlen == 1)
return memchr(haystack, c, hlen);
2010-01-20 16:19:18 +02:00
while (hlen >= nlen) {
p = haystack;
haystack = memchr(haystack, c, hlen - nlen + 1);
if (!haystack)
return NULL;
hlen -= haystack + 1 - p;
p = haystack + 1;
if (!memcmp(p, needle, nlen - 1))
return haystack;
haystack = p;
2010-01-20 16:19:18 +02:00
}
return NULL;
}
int32_t cli_bcapi_file_find(struct cli_bc_ctx *ctx, const uint8_t *data, uint32_t len)
2010-05-12 18:26:02 +03:00
{
fmap_t *map = ctx->fmap;
if (!map || len <= 0) {
cli_dbgmsg("bcapi_file_find preconditions not met\n");
API_MISUSE();
return -1;
2010-05-12 18:26:02 +03:00
}
return cli_bcapi_file_find_limit(ctx, data, len, map->len);
}
int32_t cli_bcapi_file_find_limit(struct cli_bc_ctx *ctx, const uint8_t *data, uint32_t len, int32_t limit)
2010-01-20 16:19:18 +02:00
{
char buf[4096];
fmap_t *map = ctx->fmap;
uint32_t off = ctx->off;
size_t n;
size_t limit_sz;
2010-01-20 16:19:18 +02:00
if (!map || (len > sizeof(buf) / 4) || (len <= 0) || (limit <= 0)) {
cli_dbgmsg("bcapi_file_find_limit preconditions not met\n");
API_MISUSE();
return -1;
2010-03-24 17:55:04 +02:00
}
limit_sz = (size_t)limit;
cli_event_int(EV, BCEV_OFFSET, off);
cli_event_fastdata(EV, BCEV_FIND, data, len);
2010-01-20 16:19:18 +02:00
for (;;) {
const char *p;
size_t readlen = sizeof(buf);
if (off + readlen > limit_sz) {
if (off > limit_sz) {
return -1;
} else {
readlen = limit_sz - off;
}
}
n = fmap_readn(map, buf, off, readlen);
if ((n < len) || (n == (size_t)-1))
return -1;
p = cli_memmem(buf, n, data, len);
if (p)
return off + (p - buf);
off += n;
2010-01-20 16:19:18 +02:00
}
return -1;
}
int32_t cli_bcapi_file_byteat(struct cli_bc_ctx *ctx, uint32_t off)
2010-01-20 16:19:18 +02:00
{
unsigned char c;
2010-03-24 17:55:04 +02:00
if (!ctx->fmap) {
cli_dbgmsg("bcapi_file_byteat: no fmap\n");
return -1;
2010-03-24 17:55:04 +02:00
}
cli_event_int(EV, BCEV_OFFSET, off);
2010-03-24 17:55:04 +02:00
if (fmap_readn(ctx->fmap, &c, off, 1) != 1) {
cli_dbgmsg("bcapi_file_byteat: fmap_readn failed at %u\n", off);
return -1;
2010-03-24 17:55:04 +02:00
}
2010-01-20 16:19:18 +02:00
return c;
}
uint8_t *cli_bcapi_malloc(struct cli_bc_ctx *ctx, uint32_t size)
{
void *v;
#if USE_MPOOL
if (!ctx->mpool) {
ctx->mpool = mpool_create();
if (!ctx->mpool) {
cli_dbgmsg("bytecode: mpool_create failed!\n");
cli_event_error_oom(EV, 0);
return NULL;
}
}
v = MPOOL_MALLOC(ctx->mpool, size);
#else
/* TODO: implement using a list of pointers we allocated! */
cli_errmsg("cli_bcapi_malloc not implemented for systems without mmap yet!\n");
v = cli_malloc(size);
#endif
if (!v)
cli_event_error_oom(EV, size);
return v;
}
int32_t cli_bcapi_get_pe_section(struct cli_bc_ctx *ctx, struct cli_exe_section *section, uint32_t num)
2010-02-12 16:47:44 +02:00
{
if (num < ctx->hooks.pedata->nsections) {
memcpy(section, &ctx->sections[num], sizeof(struct cli_exe_section));
return 0;
2010-02-12 16:47:44 +02:00
}
return -1;
}
2010-03-19 13:20:59 +02:00
int32_t cli_bcapi_fill_buffer(struct cli_bc_ctx *ctx, uint8_t *buf,
uint32_t buflen, uint32_t filled,
uint32_t pos, uint32_t fill)
2010-03-19 13:20:59 +02:00
{
int32_t res, remaining, tofill;
UNUSEDPARAM(fill);
2010-03-19 15:47:26 +02:00
if (!buf || !buflen || buflen > CLI_MAX_ALLOCATION || filled > buflen) {
cli_dbgmsg("fill_buffer1\n");
API_MISUSE();
return -1;
2010-03-19 15:47:26 +02:00
}
if (ctx->off >= ctx->file_size) {
cli_dbgmsg("fill_buffer2\n");
API_MISUSE();
return 0;
2010-03-19 15:47:26 +02:00
}
2010-03-19 13:20:59 +02:00
remaining = filled - pos;
if (remaining) {
if (!CLI_ISCONTAINED(buf, buflen, buf + pos, remaining)) {
cli_dbgmsg("fill_buffer3\n");
API_MISUSE();
return -1;
}
memmove(buf, buf + pos, remaining);
2010-03-19 13:20:59 +02:00
}
tofill = buflen - remaining;
if (!CLI_ISCONTAINED(buf, buflen, buf + remaining, tofill)) {
cli_dbgmsg("fill_buffer4\n");
API_MISUSE();
return -1;
2010-03-19 15:47:26 +02:00
}
res = cli_bcapi_read(ctx, buf + remaining, tofill);
2010-03-24 17:55:04 +02:00
if (res <= 0) {
cli_dbgmsg("fill_buffer5\n");
API_MISUSE();
return res;
2010-03-24 17:55:04 +02:00
}
2010-03-19 13:20:59 +02:00
return remaining + res;
}
2010-03-19 15:47:26 +02:00
int32_t cli_bcapi_extract_new(struct cli_bc_ctx *ctx, int32_t id)
{
cli_ctx *cctx;
int res = -1;
cli_event_count(EV, BCEV_EXTRACTED);
2010-03-19 15:47:26 +02:00
cli_dbgmsg("previous tempfile had %u bytes\n", ctx->written);
if (!ctx->written)
return 0;
if (ctx->ctx && cli_updatelimits(ctx->ctx, ctx->written))
return -1;
2010-03-19 15:47:26 +02:00
ctx->written = 0;
2013-02-28 19:32:29 -05:00
if (lseek(ctx->outfd, 0, SEEK_SET) == -1) {
cli_dbgmsg("bytecode: call to lseek() has failed\n");
return CL_ESEEK;
}
2010-03-19 15:47:26 +02:00
cli_dbgmsg("bytecode: scanning extracted file %s\n", ctx->tempfile);
cctx = (cli_ctx *)ctx->ctx;
if (cctx) {
libclamav: Fix scan recursion tracking Scan recursion is the process of identifying files embedded in other files and then scanning them, recursively. Internally this process is more complex than it may sound because a file may have multiple layers of types before finding a new "file". At present we treat the recursion count in the scanning context as an index into both our fmap list AND our container list. These two lists are conceptually a part of the same thing and should be unified. But what's concerning is that the "recursion level" isn't actually incremented or decremented at the same time that we add a layer to the fmap or container lists but instead is more touchy-feely, increasing when we find a new "file". To account for this shadiness, the size of the fmap and container lists has always been a little longer than our "max scan recursion" limit so we don't accidentally overflow the fmap or container arrays (!). I've implemented a single recursion-stack as an array, similar to before, which includes a pointer to each fmap at each layer, along with the size and type. Push and pop functions add and remove layers whenever a new fmap is added. A boolean argument when pushing indicates if the new layer represents a new buffer or new file (descriptor). A new buffer will reset the "nested fmap level" (described below). This commit also provides a solution for an issue where we detect embedded files more than once during scan recursion. For illustration, imagine a tarball named foo.tar.gz with this structure: | description | type | rec level | nested fmap level | | ------------------------- | ----- | --------- | ----------------- | | foo.tar.gz | GZ | 0 | 0 | | └── foo.tar | TAR | 1 | 0 | | ├── bar.zip | ZIP | 2 | 1 | | │   └── hola.txt | ASCII | 3 | 0 | | └── baz.exe | PE | 2 | 1 | But suppose baz.exe embeds a ZIP archive and a 7Z archive, like this: | description | type | rec level | nested fmap level | | ------------------------- | ----- | --------- | ----------------- | | baz.exe | PE | 0 | 0 | | ├── sfx.zip | ZIP | 1 | 1 | | │   └── hello.txt | ASCII | 2 | 0 | | └── sfx.7z | 7Z | 1 | 1 | |    └── world.txt | ASCII | 2 | 0 | (A) If we scan for embedded files at any layer, we may detect: | description | type | rec level | nested fmap level | | ------------------------- | ----- | --------- | ----------------- | | foo.tar.gz | GZ | 0 | 0 | | ├── foo.tar | TAR | 1 | 0 | | │ ├── bar.zip | ZIP | 2 | 1 | | │ │   └── hola.txt | ASCII | 3 | 0 | | │ ├── baz.exe | PE | 2 | 1 | | │ │ ├── sfx.zip | ZIP | 3 | 1 | | │ │ │   └── hello.txt | ASCII | 4 | 0 | | │ │ └── sfx.7z | 7Z | 3 | 1 | | │ │    └── world.txt | ASCII | 4 | 0 | | │ ├── sfx.zip | ZIP | 2 | 1 | | │ │   └── hello.txt | ASCII | 3 | 0 | | │ └── sfx.7z | 7Z | 2 | 1 | | │   └── world.txt | ASCII | 3 | 0 | | ├── sfx.zip | ZIP | 1 | 1 | | └── sfx.7z | 7Z | 1 | 1 | (A) is bad because it scans content more than once. Note that for the GZ layer, it may detect the ZIP and 7Z if the signature hits on the compressed data, which it might, though extracting the ZIP and 7Z will likely fail. The reason the above doesn't happen now is that we restrict embedded type scans for a bunch of archive formats to include GZ and TAR. (B) If we scan for embedded files at the foo.tar layer, we may detect: | description | type | rec level | nested fmap level | | ------------------------- | ----- | --------- | ----------------- | | foo.tar.gz | GZ | 0 | 0 | | └── foo.tar | TAR | 1 | 0 | | ├── bar.zip | ZIP | 2 | 1 | | │   └── hola.txt | ASCII | 3 | 0 | | ├── baz.exe | PE | 2 | 1 | | ├── sfx.zip | ZIP | 2 | 1 | | │   └── hello.txt | ASCII | 3 | 0 | | └── sfx.7z | 7Z | 2 | 1 | |    └── world.txt | ASCII | 3 | 0 | (B) is almost right. But we can achieve it easily enough only scanning for embedded content in the current fmap when the "nested fmap level" is 0. The upside is that it should safely detect all embedded content, even if it may think the sfz.zip and sfx.7z are in foo.tar instead of in baz.exe. The biggest risk I can think of affects ZIPs. SFXZIP detection is identical to ZIP detection, which is why we don't allow SFXZIP to be detected if insize of a ZIP. If we only allow embedded type scanning at fmap-layer 0 in each buffer, this will fail to detect the embedded ZIP if the bar.exe was not compressed in foo.zip and if non-compressed files extracted from ZIPs aren't extracted as new buffers: | description | type | rec level | nested fmap level | | ------------------------- | ----- | --------- | ----------------- | | foo.zip | ZIP | 0 | 0 | | └── bar.exe | PE | 1 | 1 | | └── sfx.zip | ZIP | 2 | 2 | Provided that we ensure all files extracted from zips are scanned in new buffers, option (B) should be safe. (C) If we scan for embedded files at the baz.exe layer, we may detect: | description | type | rec level | nested fmap level | | ------------------------- | ----- | --------- | ----------------- | | foo.tar.gz | GZ | 0 | 0 | | └── foo.tar | TAR | 1 | 0 | | ├── bar.zip | ZIP | 2 | 1 | | │   └── hola.txt | ASCII | 3 | 0 | | └── baz.exe | PE | 2 | 1 | | ├── sfx.zip | ZIP | 3 | 1 | | │   └── hello.txt | ASCII | 4 | 0 | | └── sfx.7z | 7Z | 3 | 1 | |    └── world.txt | ASCII | 4 | 0 | (C) is right. But it's harder to achieve. For this example we can get it by restricting 7ZSFX and ZIPSFX detection only when scanning an executable. But that may mean losing detection of archives embedded elsewhere. And we'd have to identify allowable container types for each possible embedded type, which would be very difficult. So this commit aims to solve the issue the (B)-way. Note that in all situations, we still have to scan with file typing enabled to determine if we need to reassign the current file type, such as re-identifying a Bzip2 archive as a DMG that happens to be Bzip2- compressed. Detection of DMG and a handful of other types rely on finding data partway through or near the ned of a file before reassigning the entire file as the new type. Other fixes and considerations in this commit: - The utf16 HTML parser has weak error handling, particularly with respect to creating a nested fmap for scanning the ascii decoded file. This commit cleans up the error handling and wraps the nested scan with the recursion-stack push()/pop() for correct recursion tracking. Before this commit, each container layer had a flag to indicate if the container layer is valid. We need something similar so that the cli_recursion_stack_get_*() functions ignore normalized layers. Details... Imagine an LDB signature for HTML content that specifies a ZIP container. If the signature actually alerts on the normalized HTML and you don't ignore normalized layers for the container check, it will appear as though the alert is in an HTML container rather than a ZIP container. This commit accomplishes this with a boolean you set in the scan context before scanning a new layer. Then when the new fmap is created, it will use that flag to set similar flag for the layer. The context flag is reset those that anything after this doesn't have that flag. The flag allows the new recursion_stack_get() function to ignore normalized layers when iterating the stack to return a layer at a requested index, negative or positive. Scanning normalized extracted/normalized javascript and VBA should also use the 'layer is normalized' flag. - This commit also fixes Heuristic.Broken.Executable alert for ELF files to make sure that: A) these only alert if cli_append_virus() returns CL_VIRUS (aka it respects the FP check). B) all broken-executable alerts for ELF only happen if the SCAN_HEURISTIC_BROKEN option is enabled. - This commit also cleans up the error handling in cli_magic_scan_dir(). This was needed so we could correctly apply the layer-is-normalized-flag to all VBA macros extracted to a directory when scanning the directory. - Also fix an issue where exceeding scan maximums wouldn't cause embedded file detection scans to abort. Granted we don't actually want to abort if max filesize or max recursion depth are exceeded... only if max scansize, max files, and max scantime are exceeded. Add 'abort_scan' flag to scan context, to protect against depending on correct error propagation for fatal conditions. Instead, setting this flag in the scan context should guarantee that a fatal condition deep in scan recursion isn't lost which result in more stuff being scanned instead of aborting. This shouldn't be necessary, but some status codes like CL_ETIMEOUT never used to be fatal and it's easier to do this than to verify every parser only returns CL_ETIMEOUT and other "fatal status codes" in fatal conditions. - Remove duplicate is_tar() prototype from filestypes.c and include is_tar.h instead. - Presently we create the fmap hash when creating the fmap. This wastes a bit of CPU if the hash is never needed. Now that we're creating fmap's for all embedded files discovered with file type recognition scans, this is a much more frequent occurence and really slows things down. This commit fixes the issue by only creating fmap hashes as needed. This should not only resolve the perfomance impact of creating fmap's for all embedded files, but also should improve performance in general. - Add allmatch check to the zip parser after the central-header meta match. That way we don't multiple alerts with the same match except in allmatch mode. Clean up error handling in the zip parser a tiny bit. - Fixes to ensure that the scan limits such as scansize, filesize, recursion depth, # of embedded files, and scantime are always reported if AlertExceedsMax (--alert-exceeds-max) is enabled. - Fixed an issue where non-fatal alerts for exceeding scan maximums may mask signature matches later on. I changed it so these alerts use the "possibly unwanted" alert-type and thus only alert if no other alerts were found or if all-match or heuristic-precedence are enabled. - Added the "Heuristics.Limits.Exceeded.*" events to the JSON metadata when the --gen-json feature is enabled. These will show up once under "ParseErrors" the first time a limit is exceeded. In the present implementation, only one limits-exceeded events will be added, so as to prevent a malicious or malformed sample from filling the JSON buffer with millions of events and using a tonne of RAM.
2021-09-11 14:15:21 -07:00
res = cli_magic_scan_desc_type(ctx->outfd, ctx->tempfile, cctx, ctx->containertype, NULL);
if (res == CL_VIRUS) {
ctx->virname = cli_get_last_virus(cctx);
ctx->found = 1;
}
}
2010-03-19 15:47:26 +02:00
if ((cctx && cctx->engine->keeptmp) ||
(ftruncate(ctx->outfd, 0) == -1)) {
2010-03-19 15:47:26 +02:00
close(ctx->outfd);
if (!(cctx && cctx->engine->keeptmp) && ctx->tempfile)
cli_unlink(ctx->tempfile);
free(ctx->tempfile);
ctx->tempfile = NULL;
ctx->outfd = 0;
2010-03-19 15:47:26 +02:00
}
cli_dbgmsg("bytecode: extracting new file with id %u\n", id);
return res;
}
2010-03-19 22:20:55 +02:00
#define BUF 16
int32_t cli_bcapi_read_number(struct cli_bc_ctx *ctx, uint32_t radix)
{
unsigned i;
2012-01-05 14:16:09 +02:00
const char *p;
2010-03-19 22:20:55 +02:00
int32_t result;
2010-03-22 13:00:23 +02:00
if ((radix != 10 && radix != 16) || !ctx->fmap)
return -1;
cli_event_int(EV, BCEV_OFFSET, ctx->off);
2010-03-19 22:20:55 +02:00
while ((p = fmap_need_off_once(ctx->fmap, ctx->off, BUF))) {
for (i = 0; i < BUF; i++) {
if ((p[i] >= '0' && p[i] <= '9') || (radix == 16 && ((p[i] >= 'a' && p[i] <= 'f') || (p[i] >= 'A' && p[i] <= 'F')))) {
char *endptr;
p = fmap_need_ptr_once(ctx->fmap, p + i, 16);
if (!p)
return -1;
result = strtoul(p, &endptr, radix);
ctx->off += i + (endptr - p);
return result;
}
}
ctx->off += BUF;
2010-03-19 22:20:55 +02:00
}
return -1;
}
int32_t cli_bcapi_hashset_new(struct cli_bc_ctx *ctx)
2010-03-20 21:18:54 +02:00
{
unsigned n = ctx->nhashsets + 1;
struct cli_hashset *s = cli_realloc(ctx->hashsets, sizeof(*ctx->hashsets) * n);
if (!s) {
cli_event_error_oom(EV, 0);
return -1;
}
ctx->hashsets = s;
2010-03-21 15:10:49 +02:00
ctx->nhashsets = n;
s = &s[n - 1];
2010-03-21 15:10:49 +02:00
cli_hashset_init(s, 16, 80);
return n - 1;
2010-03-21 15:10:49 +02:00
}
static struct cli_hashset *get_hashset(struct cli_bc_ctx *ctx, int32_t id)
{
if (id < 0 || (unsigned int)id >= ctx->nhashsets || !ctx->hashsets) {
API_MISUSE();
return NULL;
}
2010-03-21 15:10:49 +02:00
return &ctx->hashsets[id];
2010-03-20 21:18:54 +02:00
}
2010-03-21 15:10:49 +02:00
int32_t cli_bcapi_hashset_add(struct cli_bc_ctx *ctx, int32_t id, uint32_t key)
2010-03-20 21:18:54 +02:00
{
2010-03-21 15:10:49 +02:00
struct cli_hashset *s = get_hashset(ctx, id);
if (!s)
return -1;
2010-03-21 15:10:49 +02:00
return cli_hashset_addkey(s, key);
2010-03-20 21:18:54 +02:00
}
2010-03-21 15:10:49 +02:00
int32_t cli_bcapi_hashset_remove(struct cli_bc_ctx *ctx, int32_t id, uint32_t key)
2010-03-20 21:18:54 +02:00
{
2010-03-21 15:10:49 +02:00
struct cli_hashset *s = get_hashset(ctx, id);
if (!s)
return -1;
2010-03-24 17:59:41 +02:00
return cli_hashset_removekey(s, key);
2010-03-20 21:18:54 +02:00
}
2010-03-21 15:10:49 +02:00
int32_t cli_bcapi_hashset_contains(struct cli_bc_ctx *ctx, int32_t id, uint32_t key)
2010-03-20 21:18:54 +02:00
{
2010-03-21 15:10:49 +02:00
struct cli_hashset *s = get_hashset(ctx, id);
if (!s)
return -1;
2010-03-21 15:10:49 +02:00
return cli_hashset_contains(s, key);
}
int32_t cli_bcapi_hashset_empty(struct cli_bc_ctx *ctx, int32_t id)
{
struct cli_hashset *s = get_hashset(ctx, id);
2012-07-10 13:17:45 -04:00
return s ? !s->count : 1;
2010-03-20 21:18:54 +02:00
}
2010-03-21 15:10:49 +02:00
int32_t cli_bcapi_hashset_done(struct cli_bc_ctx *ctx, int32_t id)
2010-03-20 21:18:54 +02:00
{
2010-03-24 17:07:14 +02:00
struct cli_hashset *s = get_hashset(ctx, id);
if (!s)
return -1;
2010-03-24 17:07:14 +02:00
cli_hashset_destroy(s);
if ((unsigned int)id == ctx->nhashsets - 1) {
ctx->nhashsets--;
if (!ctx->nhashsets) {
free(ctx->hashsets);
ctx->hashsets = NULL;
} else {
s = cli_realloc(ctx->hashsets, ctx->nhashsets * sizeof(*s));
if (s)
ctx->hashsets = s;
}
2010-05-12 18:26:02 +03:00
}
2010-03-24 17:07:14 +02:00
return 0;
2010-03-20 21:18:54 +02:00
}
2010-03-21 12:56:05 +02:00
int32_t cli_bcapi_buffer_pipe_new(struct cli_bc_ctx *ctx, uint32_t size)
2010-03-20 21:18:54 +02:00
{
2010-03-21 12:56:05 +02:00
unsigned char *data;
struct bc_buffer *b;
unsigned n = ctx->nbuffers + 1;
2010-03-24 17:37:23 +02:00
data = cli_calloc(1, size);
2010-03-21 12:56:05 +02:00
if (!data)
return -1;
b = cli_realloc(ctx->buffers, sizeof(*ctx->buffers) * n);
2010-03-21 12:56:05 +02:00
if (!b) {
free(data);
return -1;
2010-03-21 12:56:05 +02:00
}
ctx->buffers = b;
2010-03-21 12:56:05 +02:00
ctx->nbuffers = n;
b = &b[n - 1];
2010-03-21 12:56:05 +02:00
b->data = data;
b->size = size;
2010-03-21 12:56:05 +02:00
b->write_cursor = b->read_cursor = 0;
return n - 1;
2010-03-21 12:56:05 +02:00
}
int32_t cli_bcapi_buffer_pipe_new_fromfile(struct cli_bc_ctx *ctx, uint32_t at)
2010-03-21 12:56:05 +02:00
{
struct bc_buffer *b;
unsigned n = ctx->nbuffers + 1;
if (at >= ctx->file_size)
return -1;
2010-03-21 12:56:05 +02:00
b = cli_realloc(ctx->buffers, sizeof(*ctx->buffers) * n);
2010-03-21 12:56:05 +02:00
if (!b) {
return -1;
2010-03-21 12:56:05 +02:00
}
ctx->buffers = b;
2010-03-21 12:56:05 +02:00
ctx->nbuffers = n;
b = &b[n - 1];
2010-03-21 12:56:05 +02:00
/* NULL data means read from file at pos read_cursor */
b->data = NULL;
b->size = 0;
b->read_cursor = at;
2010-03-21 12:56:05 +02:00
b->write_cursor = 0;
return n - 1;
2010-03-21 12:56:05 +02:00
}
static struct bc_buffer *get_buffer(struct cli_bc_ctx *ctx, int32_t id)
{
if (!ctx->buffers || id < 0 || (unsigned int)id >= ctx->nbuffers) {
cli_dbgmsg("bytecode api: invalid buffer id %u\n", id);
return NULL;
2010-03-21 15:10:49 +02:00
}
2010-03-21 12:56:05 +02:00
return &ctx->buffers[id];
}
uint32_t cli_bcapi_buffer_pipe_read_avail(struct cli_bc_ctx *ctx, int32_t id)
2010-03-21 12:56:05 +02:00
{
struct bc_buffer *b = get_buffer(ctx, id);
if (!b)
return 0;
2010-03-21 12:56:05 +02:00
if (b->data) {
if (b->write_cursor <= b->read_cursor)
return 0;
return b->write_cursor - b->read_cursor;
2010-03-21 12:56:05 +02:00
}
if (!ctx->fmap || b->read_cursor >= ctx->file_size)
return 0;
if (b->read_cursor + BUFSIZ <= ctx->file_size)
return BUFSIZ;
return ctx->file_size - b->read_cursor;
2010-03-21 12:56:05 +02:00
}
const uint8_t *cli_bcapi_buffer_pipe_read_get(struct cli_bc_ctx *ctx, int32_t id, uint32_t size)
2010-03-21 12:56:05 +02:00
{
struct bc_buffer *b = get_buffer(ctx, id);
if (!b || size > cli_bcapi_buffer_pipe_read_avail(ctx, id) || !size)
return NULL;
2010-03-21 12:56:05 +02:00
if (b->data)
return b->data + b->read_cursor;
2010-03-21 12:56:05 +02:00
return fmap_need_off(ctx->fmap, b->read_cursor, size);
}
int32_t cli_bcapi_buffer_pipe_read_stopped(struct cli_bc_ctx *ctx, int32_t id, uint32_t amount)
2010-03-21 12:56:05 +02:00
{
struct bc_buffer *b = get_buffer(ctx, id);
if (!b)
return -1;
2010-03-21 12:56:05 +02:00
if (b->data) {
if (b->write_cursor <= b->read_cursor)
return -1;
if (b->read_cursor + amount > b->write_cursor)
b->read_cursor = b->write_cursor;
else
b->read_cursor += amount;
if (b->read_cursor >= b->size &&
b->write_cursor >= b->size)
b->read_cursor = b->write_cursor = 0;
return 0;
2010-03-21 12:56:05 +02:00
}
b->read_cursor += amount;
return 0;
}
uint32_t cli_bcapi_buffer_pipe_write_avail(struct cli_bc_ctx *ctx, int32_t id)
{
struct bc_buffer *b = get_buffer(ctx, id);
if (!b)
return 0;
2010-03-21 12:56:05 +02:00
if (!b->data)
return 0;
2010-03-21 12:56:05 +02:00
if (b->write_cursor >= b->size)
return 0;
2010-03-21 12:56:05 +02:00
return b->size - b->write_cursor;
}
uint8_t *cli_bcapi_buffer_pipe_write_get(struct cli_bc_ctx *ctx, int32_t id, uint32_t size)
2010-03-21 12:56:05 +02:00
{
struct bc_buffer *b = get_buffer(ctx, id);
if (!b || size > cli_bcapi_buffer_pipe_write_avail(ctx, id) || !size)
return NULL;
2010-03-21 12:56:05 +02:00
if (!b->data)
return NULL;
2010-03-21 12:56:05 +02:00
return b->data + b->write_cursor;
}
int32_t cli_bcapi_buffer_pipe_write_stopped(struct cli_bc_ctx *ctx, int32_t id, uint32_t size)
2010-03-21 12:56:05 +02:00
{
struct bc_buffer *b = get_buffer(ctx, id);
if (!b || !b->data)
return -1;
2010-03-21 12:56:05 +02:00
if (b->write_cursor + size >= b->size)
b->write_cursor = b->size;
2010-03-21 12:56:05 +02:00
else
b->write_cursor += size;
2010-03-21 12:56:05 +02:00
return 0;
}
int32_t cli_bcapi_buffer_pipe_done(struct cli_bc_ctx *ctx, int32_t id)
2010-03-21 12:56:05 +02:00
{
2010-03-24 17:07:14 +02:00
struct bc_buffer *b = get_buffer(ctx, id);
if (!b)
return -1;
2010-03-24 17:07:14 +02:00
free(b->data);
b->data = NULL;
return -0;
2010-03-21 12:56:05 +02:00
}
int32_t cli_bcapi_inflate_init(struct cli_bc_ctx *ctx, int32_t from, int32_t to, int32_t windowBits)
{
int ret;
z_stream stream;
struct bc_inflate *b;
unsigned n = ctx->ninflates + 1;
2010-03-21 15:10:49 +02:00
if (!get_buffer(ctx, from) || !get_buffer(ctx, to)) {
cli_dbgmsg("bytecode api: inflate_init: invalid buffers!\n");
return -1;
2010-03-21 15:10:49 +02:00
}
b = cli_realloc(ctx->inflates, sizeof(*ctx->inflates) * n);
if (!b) {
return -1;
}
ctx->inflates = b;
ctx->ninflates = n;
b = &b[n - 1];
b->from = from;
b->to = to;
b->needSync = 0;
memset(&b->stream, 0, sizeof(stream));
ret = inflateInit2(&b->stream, windowBits);
2010-03-21 12:56:05 +02:00
switch (ret) {
case Z_MEM_ERROR:
cli_dbgmsg("bytecode api: inflateInit2: out of memory!\n");
return -1;
case Z_VERSION_ERROR:
cli_dbgmsg("bytecode api: inflateinit2: zlib version error!\n");
return -1;
case Z_STREAM_ERROR:
cli_dbgmsg("bytecode api: inflateinit2: zlib stream error!\n");
return -1;
case Z_OK:
break;
default:
cli_dbgmsg("bytecode api: inflateInit2: unknown error %d\n", ret);
return -1;
2010-03-21 12:56:05 +02:00
}
return n - 1;
2010-03-20 21:18:54 +02:00
}
2010-03-21 12:56:05 +02:00
static struct bc_inflate *get_inflate(struct cli_bc_ctx *ctx, int32_t id)
2010-03-20 21:18:54 +02:00
{
if (id < 0 || (unsigned int)id >= ctx->ninflates || !ctx->inflates)
return NULL;
2010-03-21 12:56:05 +02:00
return &ctx->inflates[id];
}
int32_t cli_bcapi_inflate_process(struct cli_bc_ctx *ctx, int32_t id)
2010-03-21 12:56:05 +02:00
{
int ret;
unsigned avail_in_orig, avail_out_orig;
struct bc_inflate *b = get_inflate(ctx, id);
2010-03-21 15:10:49 +02:00
if (!b || b->from == -1 || b->to == -1)
return -1;
2010-03-21 12:56:05 +02:00
b->stream.avail_in = avail_in_orig =
cli_bcapi_buffer_pipe_read_avail(ctx, b->from);
2010-03-21 12:56:05 +02:00
b->stream.next_in = (void *)cli_bcapi_buffer_pipe_read_get(ctx, b->from,
b->stream.avail_in);
2010-03-21 12:56:05 +02:00
b->stream.avail_out = avail_out_orig =
cli_bcapi_buffer_pipe_write_avail(ctx, b->to);
2010-03-21 12:56:05 +02:00
b->stream.next_out = cli_bcapi_buffer_pipe_write_get(ctx, b->to,
b->stream.avail_out);
2010-03-21 12:56:05 +02:00
2010-03-22 13:41:18 +02:00
if (!b->stream.avail_in || !b->stream.avail_out || !b->stream.next_in || !b->stream.next_out)
return -1;
2010-03-21 12:56:05 +02:00
/* try hard to extract data, skipping over corrupted data */
do {
if (!b->needSync) {
ret = inflate(&b->stream, Z_NO_FLUSH);
if (ret == Z_DATA_ERROR) {
cli_dbgmsg("bytecode api: inflate at %lu: %s, trying to recover\n", b->stream.total_in,
b->stream.msg);
b->needSync = 1;
}
}
if (b->needSync) {
ret = inflateSync(&b->stream);
if (ret == Z_OK) {
cli_dbgmsg("bytecode api: successfully recovered inflate stream\n");
b->needSync = 0;
continue;
}
}
break;
2010-03-21 12:56:05 +02:00
} while (1);
cli_bcapi_buffer_pipe_read_stopped(ctx, b->from, avail_in_orig - b->stream.avail_in);
cli_bcapi_buffer_pipe_write_stopped(ctx, b->to, avail_out_orig - b->stream.avail_out);
if (ret == Z_MEM_ERROR) {
cli_dbgmsg("bytecode api: out of memory!\n");
cli_bcapi_inflate_done(ctx, id);
return ret;
2010-03-21 12:56:05 +02:00
}
if (ret == Z_STREAM_END) {
cli_bcapi_inflate_done(ctx, id);
2010-03-21 12:56:05 +02:00
}
if (ret == Z_BUF_ERROR) {
cli_dbgmsg("bytecode api: buffer error!\n");
2010-03-21 12:56:05 +02:00
}
return ret;
2010-03-20 21:18:54 +02:00
}
int32_t cli_bcapi_inflate_done(struct cli_bc_ctx *ctx, int32_t id)
2010-03-20 21:18:54 +02:00
{
2010-03-21 12:56:05 +02:00
int ret;
struct bc_inflate *b = get_inflate(ctx, id);
if (!b || b->from == -1 || b->to == -1)
return -1;
2010-03-21 12:56:05 +02:00
ret = inflateEnd(&b->stream);
if (ret == Z_STREAM_ERROR)
cli_dbgmsg("bytecode api: inflateEnd: %s\n", b->stream.msg);
2010-03-21 12:56:05 +02:00
b->from = b->to = -1;
return ret;
2010-03-20 21:18:54 +02:00
}
2010-03-21 12:56:05 +02:00
int32_t cli_bcapi_lzma_init(struct cli_bc_ctx *ctx, int32_t from, int32_t to)
{
int ret;
struct bc_lzma *b;
unsigned n = ctx->nlzmas + 1;
unsigned avail_in_orig;
if (!get_buffer(ctx, from) || !get_buffer(ctx, to)) {
cli_dbgmsg("bytecode api: lzma_init: invalid buffers!\n");
return -1;
}
avail_in_orig = cli_bcapi_buffer_pipe_read_avail(ctx, from);
if (avail_in_orig < LZMA_PROPS_SIZE + 8) {
cli_dbgmsg("bytecode api: lzma_init: not enough bytes in pipe to read LZMA header!\n");
return -1;
}
b = cli_realloc(ctx->lzmas, sizeof(*ctx->lzmas) * n);
if (!b) {
return -1;
}
ctx->lzmas = b;
ctx->nlzmas = n;
b = &b[n - 1];
b->from = from;
b->to = to;
memset(&b->stream, 0, sizeof(b->stream));
b->stream.avail_in = avail_in_orig;
Code cleanup: Refactor to clean up formatting issues Refactored the clamscan code that determines 'what to scan' in order to clean up some very messy logic and also to get around a difference in how vscode and clang-format handle formatting #ifdef blocks in the middle of an else/if. In addition to refactoring, there is a slight behavior improvement. With this change, doing `clamscan blah -` will now scan `blah` and then also scan `stdin`. You can even do `clamscan - blah` to now scan `stdin` and then scan `blah`. Before, The `-` had to be the only "filename" argument in order to scan from stdin. In addition, added a bunch of extra empty lines or changing multi-line function calls to single-line function calls in order to get around a bug in clang-format with these two options do not playing nice together: - AlignConsecutiveAssignments: true - AlignAfterOpenBracket: true AlignAfterOpenBracket is not taking account the spaces inserted by AlignConsecutiveAssignments, so you end up with stuff like this: ```c bleeblah = 1; blah = function(arg1, arg2, arg3); // ^--- these args 4-left from where they should be. ``` VSCode, meanwhile, somehow fixes this whitespace issue so code that is correctly formatted by VSCode doesn't have this bug, meaning that: 1. The clang-format check in GH Actions fails. 2. We'd all have to stop using format-on-save in VSCode and accept the bug if we wanted those GH Actions tests to pass. Adding an empty line before variable assignments from multi-line function calls evades the buggy behavior. This commit should resolve the clang-format github action test failures, for now.
2022-03-10 20:55:13 -08:00
b->stream.next_in = (void *)cli_bcapi_buffer_pipe_read_get(ctx, b->from,
b->stream.avail_in);
if ((ret = cli_LzmaInit(&b->stream, 0)) != LZMA_RESULT_OK) {
cli_dbgmsg("bytecode api: LzmaInit: Failed to initialize LZMA decompressor: %d!\n", ret);
cli_bcapi_buffer_pipe_read_stopped(ctx, b->from, avail_in_orig - b->stream.avail_in);
return ret;
}
cli_bcapi_buffer_pipe_read_stopped(ctx, b->from, avail_in_orig - b->stream.avail_in);
return n - 1;
}
static struct bc_lzma *get_lzma(struct cli_bc_ctx *ctx, int32_t id)
{
if (id < 0 || (unsigned int)id >= ctx->nlzmas || !ctx->lzmas)
return NULL;
return &ctx->lzmas[id];
}
int32_t cli_bcapi_lzma_process(struct cli_bc_ctx *ctx, int32_t id)
{
int ret;
unsigned avail_in_orig, avail_out_orig;
struct bc_lzma *b = get_lzma(ctx, id);
if (!b || b->from == -1 || b->to == -1)
return -1;
b->stream.avail_in = avail_in_orig =
cli_bcapi_buffer_pipe_read_avail(ctx, b->from);
b->stream.next_in = (void *)cli_bcapi_buffer_pipe_read_get(ctx, b->from,
b->stream.avail_in);
b->stream.avail_out = avail_out_orig =
cli_bcapi_buffer_pipe_write_avail(ctx, b->to);
b->stream.next_out = (uint8_t *)cli_bcapi_buffer_pipe_write_get(ctx, b->to,
b->stream.avail_out);
if (!b->stream.avail_in || !b->stream.avail_out || !b->stream.next_in || !b->stream.next_out)
return -1;
ret = cli_LzmaDecode(&b->stream);
cli_bcapi_buffer_pipe_read_stopped(ctx, b->from, avail_in_orig - b->stream.avail_in);
cli_bcapi_buffer_pipe_write_stopped(ctx, b->to, avail_out_orig - b->stream.avail_out);
if (ret != LZMA_RESULT_OK && ret != LZMA_STREAM_END) {
cli_dbgmsg("bytecode api: LzmaDecode: Error %d while decoding\n", ret);
cli_bcapi_lzma_done(ctx, id);
}
return ret;
}
int32_t cli_bcapi_lzma_done(struct cli_bc_ctx *ctx, int32_t id)
{
struct bc_lzma *b = get_lzma(ctx, id);
if (!b || b->from == -1 || b->to == -1)
return -1;
cli_LzmaShutdown(&b->stream);
b->from = b->to = -1;
return 0;
}
int32_t cli_bcapi_bzip2_init(struct cli_bc_ctx *ctx, int32_t from, int32_t to)
{
#if HAVE_BZLIB_H
int ret;
struct bc_bzip2 *b;
unsigned n = ctx->nbzip2s + 1;
if (!get_buffer(ctx, from) || !get_buffer(ctx, to)) {
cli_dbgmsg("bytecode api: bzip2_init: invalid buffers!\n");
return -1;
}
b = cli_realloc(ctx->bzip2s, sizeof(*ctx->bzip2s) * n);
if (!b) {
return -1;
}
ctx->bzip2s = b;
ctx->nbzip2s = n;
b = &b[n - 1];
b->from = from;
b->to = to;
memset(&b->stream, 0, sizeof(b->stream));
ret = BZ2_bzDecompressInit(&b->stream, 0, 0);
switch (ret) {
case BZ_CONFIG_ERROR:
cli_dbgmsg("bytecode api: BZ2_bzDecompressInit: Library has been mis-compiled!\n");
return -1;
case BZ_PARAM_ERROR:
cli_dbgmsg("bytecode api: BZ2_bzDecompressInit: Invalid arguments!\n");
return -1;
case BZ_MEM_ERROR:
cli_dbgmsg("bytecode api: BZ2_bzDecompressInit: Insufficient memory available!\n");
return -1;
case BZ_OK:
break;
default:
cli_dbgmsg("bytecode api: BZ2_bzDecompressInit: unknown error %d\n", ret);
return -1;
}
return n - 1;
#else
return -1;
#endif
}
#if HAVE_BZLIB_H
static struct bc_bzip2 *get_bzip2(struct cli_bc_ctx *ctx, int32_t id)
{
if (id < 0 || (unsigned int)id >= ctx->nbzip2s || !ctx->bzip2s)
return NULL;
return &ctx->bzip2s[id];
}
#endif
int32_t cli_bcapi_bzip2_process(struct cli_bc_ctx *ctx, int32_t id)
{
#if HAVE_BZLIB_H
int ret;
unsigned avail_in_orig, avail_out_orig;
struct bc_bzip2 *b = get_bzip2(ctx, id);
if (!b || b->from == -1 || b->to == -1)
return -1;
b->stream.avail_in = avail_in_orig =
cli_bcapi_buffer_pipe_read_avail(ctx, b->from);
b->stream.next_in = (void *)cli_bcapi_buffer_pipe_read_get(ctx, b->from,
b->stream.avail_in);
b->stream.avail_out = avail_out_orig =
cli_bcapi_buffer_pipe_write_avail(ctx, b->to);
b->stream.next_out = (char *)cli_bcapi_buffer_pipe_write_get(ctx, b->to,
b->stream.avail_out);
if (!b->stream.avail_in || !b->stream.avail_out || !b->stream.next_in || !b->stream.next_out)
return -1;
/* try hard to extract data, skipping over corrupted data */
ret = BZ2_bzDecompress(&b->stream);
cli_bcapi_buffer_pipe_read_stopped(ctx, b->from, avail_in_orig - b->stream.avail_in);
cli_bcapi_buffer_pipe_write_stopped(ctx, b->to, avail_out_orig - b->stream.avail_out);
/* check if nothing written whatsoever */
if ((ret != BZ_OK) && (b->stream.avail_out == avail_out_orig)) {
/* Inflation failed */
cli_errmsg("cli_bcapi_bzip2_process: failed to decompress data\n");
}
return ret;
#else
return -1;
#endif
}
int32_t cli_bcapi_bzip2_done(struct cli_bc_ctx *ctx, int32_t id)
{
#if HAVE_BZLIB_H
struct bc_bzip2 *b = get_bzip2(ctx, id);
if (!b || b->from == -1 || b->to == -1)
return -1;
BZ2_bzDecompressEnd(&b->stream);
b->from = b->to = -1;
return 0;
#else
return -1;
#endif
}
int32_t cli_bcapi_bytecode_rt_error(struct cli_bc_ctx *ctx, int32_t id)
2010-03-22 14:58:58 +02:00
{
int32_t line = id >> 8;
int32_t col = id & 0xff;
UNUSEDPARAM(ctx);
2010-03-22 14:58:58 +02:00
cli_warnmsg("Bytecode runtime error at line %u, col %u\n", line, col);
return 0;
}
2010-03-31 10:53:11 +03:00
int32_t cli_bcapi_jsnorm_init(struct cli_bc_ctx *ctx, int32_t from)
{
struct parser_state *state;
struct bc_jsnorm *b;
unsigned n = ctx->njsnorms + 1;
2010-03-31 10:53:11 +03:00
if (!get_buffer(ctx, from)) {
cli_dbgmsg("bytecode api: jsnorm_init: invalid buffers!\n");
return -1;
2010-03-31 10:53:11 +03:00
}
state = cli_js_init();
if (!state)
return -1;
b = cli_realloc(ctx->jsnorms, sizeof(*ctx->jsnorms) * n);
2010-03-31 10:53:11 +03:00
if (!b) {
cli_js_destroy(state);
return -1;
2010-03-31 10:53:11 +03:00
}
ctx->jsnorms = b;
2010-03-31 10:53:11 +03:00
ctx->njsnorms = n;
b = &b[n - 1];
b->from = from;
b->state = state;
2010-03-31 10:53:11 +03:00
if (!ctx->jsnormdir) {
cli_ctx *cctx = (cli_ctx *)ctx->ctx;
Improve tmp sub-directory names At present many parsers create tmp subdirectories to store extracted files. For parsers like the vba parser, this is required as the directory is later scanned. For other parsers, these subdirectories are probably not helpful now that we provide recursive sub-dirs when --leave-temps is enabled. It's not quite as simple as removing the extra subdirectories, however. Certain parsers, like autoit, don't create very unique filenames and would result in file name collisions when --leave-temps is not enabled. The best thing to do would be to make sure each parser uses unique filenames and doesn't rely on cli_magic_scan_dir() to scan extracted content before removing the extra subdirectory. In the meantime, this commit gives the extra subdirectories meaningful names to improve readability. This commit also: - Provides the 'bmp' prefix for extracted PE icons. - Removes empty tmp subdirs when extracting rtf files, to eliminate clutter. - The PDF parser sometimes creates tmp files when decompressing streams before it knows if there is actually any content to decompress. This resulted in a large number of empty files. While it would be best to avoid creating empty files in the first place, that's not quite as as it sounds. This commit does the next best thing and deletes the tmp files if nothing was actually extracted, even if --leave-temps is enabled. - Removes the "scantemp" prefix for unnamed fmaps scanned with cli_magic_scan(). The 5-character hashes given to tmp files with prefixes resulted in occasional file name collisions when extracting certain file types with thousands of embedded files. - The VBA and TAR parsers mistakenly used NAME_MAX instead of PATH_MAX, resulting in truncated file paths and failed extraction when --leave-temps is enabled and a lot of recursion is in play. This commit switches them from NAME_MAX to PATH_MAX.
2020-03-27 16:06:22 -04:00
ctx->jsnormdir = cli_gentemp_with_prefix(cctx ? cctx->engine->tmpdir : NULL, "normalized-js");
if (ctx->jsnormdir) {
if (mkdir(ctx->jsnormdir, 0700)) {
cli_dbgmsg("js: can't create temp dir %s\n", ctx->jsnormdir);
free(ctx->jsnormdir);
return CL_ETMPDIR;
}
}
2010-03-31 10:53:11 +03:00
}
return n - 1;
2010-03-31 10:53:11 +03:00
}
static struct bc_jsnorm *get_jsnorm(struct cli_bc_ctx *ctx, int32_t id)
{
if (id < 0 || (unsigned int)id >= ctx->njsnorms || !ctx->jsnorms)
return NULL;
2010-03-31 10:53:11 +03:00
return &ctx->jsnorms[id];
}
int32_t cli_bcapi_jsnorm_process(struct cli_bc_ctx *ctx, int32_t id)
{
unsigned avail;
2012-01-05 14:16:09 +02:00
const unsigned char *in;
cli_ctx *cctx = ctx->ctx;
2010-03-31 10:53:11 +03:00
struct bc_jsnorm *b = get_jsnorm(ctx, id);
if (!b || b->from == -1 || !b->state)
return -1;
2010-03-31 10:53:11 +03:00
avail = cli_bcapi_buffer_pipe_read_avail(ctx, b->from);
in = cli_bcapi_buffer_pipe_read_get(ctx, b->from, avail);
2010-03-31 10:53:11 +03:00
if (!avail || !in)
return -1;
2010-03-31 10:53:11 +03:00
if (cctx && cli_checklimits("bytecode js api", cctx, ctx->jsnormwritten + avail, 0, 0))
return -1;
2010-03-31 10:53:11 +03:00
cli_bcapi_buffer_pipe_read_stopped(ctx, b->from, avail);
cli_js_process_buffer(b->state, (char *)in, avail);
2010-03-31 10:53:11 +03:00
return 0;
}
int32_t cli_bcapi_jsnorm_done(struct cli_bc_ctx *ctx, int32_t id)
2010-03-31 10:53:11 +03:00
{
struct bc_jsnorm *b = get_jsnorm(ctx, id);
if (!b || b->from == -1)
return -1;
2010-03-31 10:53:11 +03:00
if (ctx->ctx && cli_updatelimits(ctx->ctx, ctx->jsnormwritten))
return -1;
2010-03-31 10:53:11 +03:00
ctx->jsnormwritten = 0;
cli_js_parse_done(b->state);
cli_js_output(b->state, ctx->jsnormdir);
cli_js_destroy(b->state);
b->from = -1;
return 0;
}
2010-05-12 18:26:02 +03:00
static inline double myround(double a)
{
if (a < 0)
return a - 0.5;
return a + 0.5;
}
2010-05-12 18:26:02 +03:00
int32_t cli_bcapi_ilog2(struct cli_bc_ctx *ctx, uint32_t a, uint32_t b)
{
double f;
UNUSEDPARAM(ctx);
2010-05-12 18:26:02 +03:00
if (!b)
return 0x7fffffff;
2010-05-12 18:26:02 +03:00
/* log(a/b) is -32..32, so 2^26*32=2^31 covers the entire range of int32 */
f = (1 << 26) * log((double)a / b) / log(2);
return (int32_t)myround(f);
2010-05-12 18:26:02 +03:00
}
int32_t cli_bcapi_ipow(struct cli_bc_ctx *ctx, int32_t a, int32_t b, int32_t c)
{
UNUSEDPARAM(ctx);
2010-05-12 18:26:02 +03:00
if (!a && b < 0)
return 0x7fffffff;
return (int32_t)myround(c * pow(a, b));
2010-05-12 18:26:02 +03:00
}
2010-05-14 10:35:16 +03:00
uint32_t cli_bcapi_iexp(struct cli_bc_ctx *ctx, int32_t a, int32_t b, int32_t c)
2010-05-12 18:26:02 +03:00
{
double f;
UNUSEDPARAM(ctx);
2010-05-12 18:26:02 +03:00
if (!b)
return 0x7fffffff;
f = c * exp((double)a / b);
return (uint32_t)myround(f);
2010-05-12 18:26:02 +03:00
}
int32_t cli_bcapi_isin(struct cli_bc_ctx *ctx, int32_t a, int32_t b, int32_t c)
{
double f;
UNUSEDPARAM(ctx);
2010-05-12 18:26:02 +03:00
if (!b)
return 0x7fffffff;
f = c * sin((double)a / b);
return (int32_t)myround(f);
2010-05-12 18:26:02 +03:00
}
int32_t cli_bcapi_icos(struct cli_bc_ctx *ctx, int32_t a, int32_t b, int32_t c)
{
double f;
UNUSEDPARAM(ctx);
2010-05-12 18:26:02 +03:00
if (!b)
return 0x7fffffff;
f = c * cos((double)a / b);
return (int32_t)myround(f);
2010-05-12 18:26:02 +03:00
}
int32_t cli_bcapi_memstr(struct cli_bc_ctx *ctx, const uint8_t *h, int32_t hs,
const uint8_t *n, int32_t ns)
2010-05-12 18:26:02 +03:00
{
const uint8_t *s;
if (!h || !n || hs < 0 || ns < 0) {
API_MISUSE();
return -1;
}
cli_event_fastdata(EV, BCEV_MEM_1, h, hs);
cli_event_fastdata(EV, BCEV_MEM_2, n, ns);
s = (const uint8_t *)cli_memstr((const char *)h, hs, (const char *)n, ns);
2010-05-12 18:26:02 +03:00
if (!s)
return -1;
2010-05-12 18:26:02 +03:00
return s - h;
}
int32_t cli_bcapi_hex2ui(struct cli_bc_ctx *ctx, uint32_t ah, uint32_t bh)
{
2010-08-02 15:42:58 +03:00
char result = 0;
2010-05-12 18:26:02 +03:00
unsigned char in[2];
UNUSEDPARAM(ctx);
2010-05-12 18:26:02 +03:00
in[0] = ah;
in[1] = bh;
if (cli_hex2str_to((const char *)in, &result, 2) == -1)
return -1;
2010-05-12 18:26:02 +03:00
return result;
}
int32_t cli_bcapi_atoi(struct cli_bc_ctx *ctx, const uint8_t *str, int32_t len)
2010-05-12 18:26:02 +03:00
{
int32_t number = 0;
2010-05-12 18:26:02 +03:00
const uint8_t *end = str + len;
UNUSEDPARAM(ctx);
2010-05-12 18:26:02 +03:00
while (isspace(*str) && str < end) str++;
if (str == end)
return -1; /* all spaces */
2010-05-12 18:26:02 +03:00
if (*str == '+') str++;
if (str == end)
return -1; /* all spaces and +*/
2010-05-12 18:26:02 +03:00
if (*str == '-')
return -1; /* only positive numbers */
2010-05-12 18:26:02 +03:00
if (!isdigit(*str))
return -1;
2010-05-12 18:26:02 +03:00
while (isdigit(*str) && str < end) {
number = number * 10 + (*str - '0');
2010-05-12 18:26:02 +03:00
}
return number;
}
uint32_t cli_bcapi_debug_print_str_start(struct cli_bc_ctx *ctx, const uint8_t *s, uint32_t len)
2010-05-12 18:26:02 +03:00
{
UNUSEDPARAM(ctx);
2010-05-12 18:26:02 +03:00
if (!s || len <= 0)
return -1;
cli_event_fastdata(EV, BCEV_DBG_STR, s, len);
2010-05-13 00:27:00 +03:00
cli_dbgmsg("bytecode debug: %.*s", len, s);
2010-05-12 18:26:02 +03:00
return 0;
}
uint32_t cli_bcapi_debug_print_str_nonl(struct cli_bc_ctx *ctx, const uint8_t *s, uint32_t len)
2010-05-12 18:26:02 +03:00
{
UNUSEDPARAM(ctx);
2010-05-12 18:26:02 +03:00
if (!s || len <= 0)
return -1;
2010-05-13 20:02:02 +03:00
if (!cli_debug_flag)
return 0;
2010-05-13 00:27:00 +03:00
return fwrite(s, 1, len, stderr);
2010-05-12 18:26:02 +03:00
}
uint32_t cli_bcapi_entropy_buffer(struct cli_bc_ctx *ctx, uint8_t *s, int32_t len)
2010-05-12 18:26:02 +03:00
{
uint32_t probTable[256];
unsigned int i;
2010-05-12 18:26:02 +03:00
double entropy = 0;
double log2 = log(2);
2010-05-12 18:26:02 +03:00
UNUSEDPARAM(ctx);
2010-05-12 18:26:02 +03:00
if (!s || len <= 0)
return -1;
2010-05-12 18:26:02 +03:00
memset(probTable, 0, sizeof(probTable));
for (i = 0; i < (unsigned int)len; i++) {
probTable[s[i]]++;
2010-05-12 18:26:02 +03:00
}
for (i = 0; i < 256; i++) {
double p;
if (!probTable[i])
continue;
p = (double)probTable[i] / len;
entropy += -p * log(p) / log2;
2010-05-12 18:26:02 +03:00
}
entropy *= 1 << 26;
2010-05-12 18:26:02 +03:00
return (uint32_t)entropy;
}
int32_t cli_bcapi_map_new(struct cli_bc_ctx *ctx, int32_t keysize, int32_t valuesize)
{
unsigned n = ctx->nmaps + 1;
2010-05-13 23:35:47 +03:00
struct cli_map *s;
if (!keysize)
return -1;
s = cli_realloc(ctx->maps, sizeof(*ctx->maps) * n);
2010-05-13 23:35:47 +03:00
if (!s)
return -1;
ctx->maps = s;
2010-05-12 18:26:02 +03:00
ctx->nmaps = n;
s = &s[n - 1];
2010-05-12 18:26:02 +03:00
cli_map_init(s, keysize, valuesize, 16);
return n - 1;
2010-05-12 18:26:02 +03:00
}
static struct cli_map *get_hashtab(struct cli_bc_ctx *ctx, int32_t id)
{
if (id < 0 || (unsigned int)id >= ctx->nmaps || !ctx->maps)
return NULL;
2010-05-12 18:26:02 +03:00
return &ctx->maps[id];
}
int32_t cli_bcapi_map_addkey(struct cli_bc_ctx *ctx, const uint8_t *key, int32_t keysize, int32_t id)
2010-05-12 18:26:02 +03:00
{
struct cli_map *s = get_hashtab(ctx, id);
if (!s)
return -1;
2010-05-12 18:26:02 +03:00
return cli_map_addkey(s, key, keysize);
}
int32_t cli_bcapi_map_setvalue(struct cli_bc_ctx *ctx, const uint8_t *value, int32_t valuesize, int32_t id)
2010-05-12 18:26:02 +03:00
{
struct cli_map *s = get_hashtab(ctx, id);
if (!s)
return -1;
2010-05-12 18:26:02 +03:00
return cli_map_setvalue(s, value, valuesize);
}
int32_t cli_bcapi_map_remove(struct cli_bc_ctx *ctx, const uint8_t *key, int32_t keysize, int32_t id)
2010-05-12 18:26:02 +03:00
{
struct cli_map *s = get_hashtab(ctx, id);
if (!s)
return -1;
2010-05-12 18:26:02 +03:00
return cli_map_removekey(s, key, keysize);
}
int32_t cli_bcapi_map_find(struct cli_bc_ctx *ctx, const uint8_t *key, int32_t keysize, int32_t id)
2010-05-12 18:26:02 +03:00
{
struct cli_map *s = get_hashtab(ctx, id);
if (!s)
return -1;
2010-05-12 18:26:02 +03:00
return cli_map_find(s, key, keysize);
}
int32_t cli_bcapi_map_getvaluesize(struct cli_bc_ctx *ctx, int32_t id)
{
struct cli_map *s = get_hashtab(ctx, id);
if (!s)
return -1;
2010-05-12 18:26:02 +03:00
return cli_map_getvalue_size(s);
}
uint8_t *cli_bcapi_map_getvalue(struct cli_bc_ctx *ctx, int32_t id, int32_t valuesize)
2010-05-12 18:26:02 +03:00
{
struct cli_map *s = get_hashtab(ctx, id);
if (!s)
return NULL;
2010-05-12 18:26:02 +03:00
if (cli_map_getvalue_size(s) != valuesize)
return NULL;
2010-05-12 18:26:02 +03:00
return cli_map_getvalue(s);
}
int32_t cli_bcapi_map_done(struct cli_bc_ctx *ctx, int32_t id)
2010-05-12 18:26:02 +03:00
{
struct cli_map *s = get_hashtab(ctx, id);
if (!s)
return -1;
2010-05-12 18:26:02 +03:00
cli_map_delete(s);
if ((unsigned int)id == ctx->nmaps - 1) {
ctx->nmaps--;
if (!ctx->nmaps) {
free(ctx->maps);
ctx->maps = NULL;
} else {
s = cli_realloc(ctx->maps, ctx->nmaps * (sizeof(*s)));
if (s)
ctx->maps = s;
}
2010-05-12 18:26:02 +03:00
}
return 0;
}
uint32_t cli_bcapi_engine_functionality_level(struct cli_bc_ctx *ctx)
{
UNUSEDPARAM(ctx);
2010-05-12 18:26:02 +03:00
return cl_retflevel();
}
uint32_t cli_bcapi_engine_dconf_level(struct cli_bc_ctx *ctx)
{
UNUSEDPARAM(ctx);
2010-05-12 18:26:02 +03:00
return CL_FLEVEL_DCONF;
}
uint32_t cli_bcapi_engine_scan_options(struct cli_bc_ctx *ctx)
{
cli_ctx *cctx = (cli_ctx *)ctx->ctx;
uint32_t options = CL_SCAN_RAW;
if (cctx->options->general & CL_SCAN_GENERAL_ALLMATCHES)
options |= CL_SCAN_ALLMATCHES;
if (cctx->options->general & CL_SCAN_GENERAL_HEURISTICS)
options |= CL_SCAN_ALGORITHMIC;
if (cctx->options->general & CL_SCAN_GENERAL_COLLECT_METADATA)
options |= CL_SCAN_FILE_PROPERTIES;
if (cctx->options->general & CL_SCAN_GENERAL_HEURISTIC_PRECEDENCE)
options |= CL_SCAN_HEURISTIC_PRECEDENCE;
if (cctx->options->parse & CL_SCAN_PARSE_ARCHIVE)
options |= CL_SCAN_ARCHIVE;
if (cctx->options->parse & CL_SCAN_PARSE_ELF)
options |= CL_SCAN_ELF;
if (cctx->options->parse & CL_SCAN_PARSE_PDF)
options |= CL_SCAN_PDF;
if (cctx->options->parse & CL_SCAN_PARSE_SWF)
options |= CL_SCAN_SWF;
if (cctx->options->parse & CL_SCAN_PARSE_HWP3)
options |= CL_SCAN_HWP3;
if (cctx->options->parse & CL_SCAN_PARSE_XMLDOCS)
options |= CL_SCAN_XMLDOCS;
if (cctx->options->parse & CL_SCAN_PARSE_MAIL)
options |= CL_SCAN_MAIL;
if (cctx->options->parse & CL_SCAN_PARSE_OLE2)
options |= CL_SCAN_OLE2;
if (cctx->options->parse & CL_SCAN_PARSE_HTML)
options |= CL_SCAN_HTML;
if (cctx->options->parse & CL_SCAN_PARSE_PE)
options |= CL_SCAN_PE;
// if (cctx->options->parse & CL_SCAN_MAIL_URL)
// options |= CL_SCAN_MAILURL; /* deprecated circa 2009 */
if (cctx->options->heuristic & CL_SCAN_HEURISTIC_BROKEN)
options |= CL_SCAN_BLOCKBROKEN;
if (cctx->options->heuristic & CL_SCAN_HEURISTIC_EXCEEDS_MAX)
options |= CL_SCAN_BLOCKMAX;
if (cctx->options->heuristic & CL_SCAN_HEURISTIC_PHISHING_SSL_MISMATCH)
options |= CL_SCAN_PHISHING_BLOCKSSL;
if (cctx->options->heuristic & CL_SCAN_HEURISTIC_PHISHING_CLOAK)
options |= CL_SCAN_PHISHING_BLOCKCLOAK;
if (cctx->options->heuristic & CL_SCAN_HEURISTIC_MACROS)
options |= CL_SCAN_BLOCKMACROS;
if ((cctx->options->heuristic & CL_SCAN_HEURISTIC_ENCRYPTED_ARCHIVE) ||
(cctx->options->heuristic & CL_SCAN_HEURISTIC_ENCRYPTED_DOC))
options |= CL_SCAN_BLOCKENCRYPTED;
if (cctx->options->heuristic & CL_SCAN_HEURISTIC_PARTITION_INTXN)
options |= CL_SCAN_PARTITION_INTXN;
if (cctx->options->heuristic & CL_SCAN_HEURISTIC_STRUCTURED)
options |= CL_SCAN_STRUCTURED;
if (cctx->options->heuristic & CL_SCAN_HEURISTIC_STRUCTURED_SSN_NORMAL)
options |= CL_SCAN_STRUCTURED_SSN_NORMAL;
if (cctx->options->heuristic & CL_SCAN_HEURISTIC_STRUCTURED_SSN_STRIPPED)
options |= CL_SCAN_STRUCTURED_SSN_STRIPPED;
if (cctx->options->mail & CL_SCAN_MAIL_PARTIAL_MESSAGE)
options |= CL_SCAN_PARTIAL_MESSAGE;
if (cctx->options->dev & CL_SCAN_DEV_COLLECT_SHA)
options |= CL_SCAN_INTERNAL_COLLECT_SHA;
if (cctx->options->dev & CL_SCAN_DEV_COLLECT_PERFORMANCE_INFO)
options |= CL_SCAN_PERFORMANCE_INFO;
return options;
}
uint32_t cli_bcapi_engine_scan_options_ex(struct cli_bc_ctx *ctx, const uint8_t *option_name, uint32_t name_len)
{
uint32_t i = 0;
uint32_t result = 0;
char *option_name_l = NULL;
if (ctx == NULL || option_name == NULL || name_len == 0) {
cli_warnmsg("engine_scan_options_ex: Invalid arguments!\n");
goto done;
}
cli_ctx *cctx = (cli_ctx *)ctx->ctx;
if (cctx == NULL || cctx->options == NULL) {
cli_warnmsg("engine_scan_options_ex: Invalid arguments!\n");
goto done;
}
option_name_l = malloc(name_len + 1);
if (NULL == option_name_l) {
cli_warnmsg("Failed to allocate memory for option name.\n");
goto done;
}
for (i = 0; i < name_len; i++) {
option_name_l[0] = tolower(option_name[i]);
}
option_name_l[name_len] = '\0';
if (strncmp(option_name_l, "general", MIN(name_len, sizeof("general")))) {
if (cli_memstr(option_name_l, name_len, "allmatch", sizeof("allmatch"))) {
result = (cctx->options->general & CL_SCAN_GENERAL_ALLMATCHES) ? 1 : 0;
} else if (cli_memstr(option_name_l, name_len, "collect metadata", sizeof("collect metadata"))) {
result = (cctx->options->general & CL_SCAN_GENERAL_COLLECT_METADATA) ? 1 : 0;
} else if (cli_memstr(option_name_l, name_len, "heuristics", sizeof("heuristics"))) {
result = (cctx->options->general & CL_SCAN_GENERAL_HEURISTICS) ? 1 : 0;
} else if (cli_memstr(option_name_l, name_len, "precedence", sizeof("precedence"))) {
result = (cctx->options->general & CL_SCAN_GENERAL_HEURISTIC_PRECEDENCE) ? 1 : 0;
}
/* else unknown option */
} else if (strncmp(option_name_l, "parse", MIN(name_len, sizeof("parse")))) {
if (cli_memstr(option_name_l, name_len, "archive", sizeof("archive"))) {
result = (cctx->options->parse & CL_SCAN_PARSE_ARCHIVE) ? 1 : 0;
} else if (cli_memstr(option_name_l, name_len, "elf", sizeof("elf"))) {
result = (cctx->options->parse & CL_SCAN_PARSE_ELF) ? 1 : 0;
} else if (cli_memstr(option_name_l, name_len, "pdf", sizeof("pdf"))) {
result = (cctx->options->parse & CL_SCAN_PARSE_PDF) ? 1 : 0;
} else if (cli_memstr(option_name_l, name_len, "swf", sizeof("swf"))) {
result = (cctx->options->parse & CL_SCAN_PARSE_SWF) ? 1 : 0;
} else if (cli_memstr(option_name_l, name_len, "hwp3", sizeof("hwp3"))) {
result = (cctx->options->parse & CL_SCAN_PARSE_HWP3) ? 1 : 0;
} else if (cli_memstr(option_name_l, name_len, "xmldocs", sizeof("xmldocs"))) {
result = (cctx->options->parse & CL_SCAN_PARSE_XMLDOCS) ? 1 : 0;
} else if (cli_memstr(option_name_l, name_len, "mail", sizeof("mail"))) {
result = (cctx->options->parse & CL_SCAN_PARSE_MAIL) ? 1 : 0;
} else if (cli_memstr(option_name_l, name_len, "ole2", sizeof("ole2"))) {
result = (cctx->options->parse & CL_SCAN_PARSE_OLE2) ? 1 : 0;
} else if (cli_memstr(option_name_l, name_len, "html", sizeof("html"))) {
result = (cctx->options->parse & CL_SCAN_PARSE_HTML) ? 1 : 0;
} else if (cli_memstr(option_name_l, name_len, "pe", sizeof("pe"))) {
result = (cctx->options->parse & CL_SCAN_PARSE_PE) ? 1 : 0;
}
/* else unknown option */
} else if (strncmp(option_name_l, "heuristic", MIN(name_len, sizeof("heuristic")))) {
if (cli_memstr(option_name_l, name_len, "broken", sizeof("broken"))) {
result = (cctx->options->heuristic & CL_SCAN_HEURISTIC_BROKEN) ? 1 : 0;
} else if (cli_memstr(option_name_l, name_len, "exceeds max", sizeof("exceeds max"))) {
result = (cctx->options->heuristic & CL_SCAN_HEURISTIC_EXCEEDS_MAX) ? 1 : 0;
} else if (cli_memstr(option_name_l, name_len, "phishing ssl mismatch", sizeof("phishing ssl mismatch"))) {
result = (cctx->options->heuristic & CL_SCAN_HEURISTIC_PHISHING_SSL_MISMATCH) ? 1 : 0;
} else if (cli_memstr(option_name_l, name_len, "phishing cloak", sizeof("phishing cloak"))) {
result = (cctx->options->heuristic & CL_SCAN_HEURISTIC_PHISHING_CLOAK) ? 1 : 0;
} else if (cli_memstr(option_name_l, name_len, "macros", sizeof("macros"))) {
result = (cctx->options->heuristic & CL_SCAN_HEURISTIC_MACROS) ? 1 : 0;
} else if (cli_memstr(option_name_l, name_len, "encrypted archive", sizeof("encrypted archive"))) {
result = (cctx->options->heuristic & CL_SCAN_HEURISTIC_ENCRYPTED_ARCHIVE) ? 1 : 0;
} else if (cli_memstr(option_name_l, name_len, "encrypted doc", sizeof("encrypted doc"))) {
result = (cctx->options->heuristic & CL_SCAN_HEURISTIC_ENCRYPTED_DOC) ? 1 : 0;
Add CMake build tooling This patch adds experimental-quality CMake build tooling. The libmspack build required a modification to use "" instead of <> for header #includes. This will hopefully be included in the libmspack upstream project when adding CMake build tooling to libmspack. Removed use of libltdl when using CMake. Flex & Bison are now required to build. If -DMAINTAINER_MODE, then GPERF is also required, though it currently doesn't actually do anything. TODO! I found that the autotools build system was generating the lexer output but not actually compiling it, instead using previously generated (and manually renamed) lexer c source. As a consequence, changes to the .l and .y files weren't making it into the build. To resolve this, I removed generated flex/bison files and fixed the tooling to use the freshly generated files. Flex and bison are now required build tools. On Windows, this adds a dependency on the winflexbison package, which can be obtained using Chocolatey or may be manually installed. CMake tooling only has partial support for building with external LLVM library, and no support for the internal LLVM (to be removed in the future). I.e. The CMake build currently only supports the bytecode interpreter. Many files used include paths relative to the top source directory or relative to the current project, rather than relative to each build target. Modern CMake support requires including internal dependency headers the same way you would external dependency headers (albeit with "" instead of <>). This meant correcting all header includes to be relative to the build targets and not relative to the workspace. For example, ... ```c include "../libclamav/clamav.h" include "clamd/clamd_others.h" ``` ... becomes: ```c // libclamav include "clamav.h" // clamd include "clamd_others.h" ``` Fixes header name conflicts by renaming a few of the files. Converted the "shared" code into a static library, which depends on libclamav. The ironically named "shared" static library provides features common to the ClamAV apps which are not required in libclamav itself and are not intended for use by downstream projects. This change was required for correct modern CMake practices but was also required to use the automake "subdir-objects" option. This eliminates warnings when running autoreconf which, in the next version of autoconf & automake are likely to break the build. libclamav used to build in multiple stages where an earlier stage is a static library containing utils required by the "shared" code. Linking clamdscan and clamdtop with this libclamav utils static lib allowed these two apps to function without libclamav. While this is nice in theory, the practical gains are minimal and it complicates the build system. As such, the autotools and CMake tooling was simplified for improved maintainability and this feature was thrown out. clamdtop and clamdscan now require libclamav to function. Removed the nopthreads version of the autotools libclamav_internal_utils static library and added pthread linking to a couple apps that may have issues building on some platforms without it, with the intention of removing needless complexity from the source. Kept the regular version of libclamav_internal_utils.la though it is no longer used anywhere but in libclamav. Added an experimental doxygen build option which attempts to build clamav.h and libfreshclam doxygen html docs. The CMake build tooling also may build the example program(s), which isn't a feature in the Autotools build system. Changed C standard to C90+ due to inline linking issues with socket.h when linking libfreshclam.so on Linux. Generate common.rc for win32. Fix tabs/spaces in shared Makefile.am, and remove vestigial ifndef from misc.c. Add CMake files to the automake dist, so users can try the new CMake tooling w/out having to build from a git clone. clamonacc changes: - Renamed FANOTIFY macro to HAVE_SYS_FANOTIFY_H to better match other similar macros. - Added a new clamav-clamonacc.service systemd unit file, based on the work of ChadDevOps & Aaron Brighton. - Added missing clamonacc man page. Updates to clamdscan man page, add missing options. Remove vestigial CL_NOLIBCLAMAV definitions (all apps now use libclamav). Rename Windows mspack.dll to libmspack.dll so all ClamAV-built libraries have the lib-prefix with Visual Studio as with CMake.
2020-08-13 00:25:34 -07:00
} else if (cli_memstr(option_name_l, name_len, "partition intersection", sizeof("partition intersection"))) {
result = (cctx->options->heuristic & CL_SCAN_HEURISTIC_PARTITION_INTXN) ? 1 : 0;
} else if (cli_memstr(option_name_l, name_len, "structured", sizeof("structured"))) {
result = (cctx->options->heuristic & CL_SCAN_HEURISTIC_STRUCTURED) ? 1 : 0;
} else if (cli_memstr(option_name_l, name_len, "structured ssn normal", sizeof("structured ssn normal"))) {
result = (cctx->options->heuristic & CL_SCAN_HEURISTIC_STRUCTURED_SSN_NORMAL) ? 1 : 0;
} else if (cli_memstr(option_name_l, name_len, "structured ssn stripped", sizeof("structured ssn stripped"))) {
result = (cctx->options->heuristic & CL_SCAN_HEURISTIC_STRUCTURED_SSN_STRIPPED) ? 1 : 0;
}
/* else unknown option */
} else if (strncmp(option_name_l, "mail", MIN(name_len, sizeof("mail")))) {
if (cli_memstr(option_name_l, name_len, "partial message", sizeof("partial message"))) {
result = (cctx->options->mail & CL_SCAN_MAIL_PARTIAL_MESSAGE) ? 1 : 0;
}
/* else unknown option */
} else if (strncmp(option_name_l, "dev", MIN(name_len, sizeof("dev")))) {
if (cli_memstr(option_name_l, name_len, "collect sha", sizeof("collect sha"))) {
result = (cctx->options->dev & CL_SCAN_DEV_COLLECT_SHA) ? 1 : 0;
} else if (cli_memstr(option_name_l, name_len, "collect performance info", sizeof("collect performance info"))) {
result = (cctx->options->dev & CL_SCAN_DEV_COLLECT_PERFORMANCE_INFO) ? 1 : 0;
}
/* else unknown option */
}
/* else unknown option */
done:
if (NULL != option_name_l)
free(option_name_l);
return result;
2010-05-12 18:26:02 +03:00
}
uint32_t cli_bcapi_engine_db_options(struct cli_bc_ctx *ctx)
{
cli_ctx *cctx = (cli_ctx *)ctx->ctx;
2010-05-12 18:26:02 +03:00
return cctx->engine->dboptions;
}
int32_t cli_bcapi_extract_set_container(struct cli_bc_ctx *ctx, uint32_t ftype)
{
if (ftype > CL_TYPE_IGNORED)
return -1;
2010-05-12 18:26:02 +03:00
ctx->containertype = ftype;
2010-05-13 00:27:00 +03:00
return 0;
2010-05-12 18:26:02 +03:00
}
int32_t cli_bcapi_input_switch(struct cli_bc_ctx *ctx, int32_t extracted_file)
2010-05-12 18:26:02 +03:00
{
fmap_t *map;
if (0 == extracted_file) {
/*
* Set input back to original fmap.
*/
if (0 == ctx->extracted_file_input) {
/* Input already set to original fmap, nothing to do. */
return 0;
}
/* Free the fmap used for the extracted file */
funmap(ctx->fmap);
/* Restore pointer to original fmap */
cli_bytecode_context_setfile(ctx, ctx->save_map);
ctx->save_map = NULL;
ctx->extracted_file_input = 0;
cli_dbgmsg("bytecode api: input switched back to main file\n");
return 0;
} else {
/*
* Set input to extracted file.
*/
if (1 == ctx->extracted_file_input) {
/* Input already set to extracted file, nothing to do. */
return 0;
}
if (ctx->outfd < 0) {
/* no valid fd to switch to use for fmap */
return -1;
}
/* Create fmap for the extracted file */
Record names of extracted files A way is needed to record scanned file names for two purposes: 1. File names (and extensions) must be stored in the json metadata properties recorded when using the --gen-json clamscan option. Future work may use this to compare file extensions with detected file types. 2. File names are useful when interpretting tmp directory output when using the --leave-temps option. This commit enables file name retention for later use by storing file names in the fmap header structure, if a file name exists. To store the names in fmaps, an optional name argument has been added to any internal scan API's that create fmaps and every call to these APIs has been modified to pass a file name or NULL if a file name is not required. The zip and gpt parsers required some modification to record file names. The NSIS and XAR parsers fail to collect file names at all and will require future work to support file name extraction. Also: - Added recursive extraction to the tmp directory when the --leave-temps option is enabled. When not enabled, the tmp directory structure remains flat so as to prevent the likelihood of exceeding MAX_PATH. The current tmp directory is stored in the scan context. - Made the cli_scanfile() internal API non-static and added it to scanners.h so it would be accessible outside of scanners.c in order to remove code duplication within libmspack.c. - Added function comments to scanners.h and matcher.h - Converted a TDB-type macros and LSIG-type macros to enums for improved type safey. - Converted more return status variables from `int` to `cl_error_t` for improved type safety, and corrected ooxml file typing functions so they use `cli_file_t` exclusively rather than mixing types with `cl_error_t`. - Restructured the magic_scandesc() function to use goto's for error handling and removed the early_ret_from_magicscan() macro and magic_scandesc_cleanup() function. This makes the code easier to read and made it easier to add the recursive tmp directory cleanup to magic_scandesc(). - Corrected zip, egg, rar filename extraction issues. - Removed use of extra sub-directory layer for zip, egg, and rar file extraction. For Zip, this also involved changing the extracted filenames to be randomly generated rather than using the "zip.###" file name scheme.
2020-03-19 21:23:54 -04:00
map = fmap(ctx->outfd, 0, 0, NULL);
if (!map) {
cli_warnmsg("can't mmap() extracted temporary file %s\n", ctx->tempfile);
return -1;
}
/* Save off pointer to original fmap */
ctx->save_map = ctx->fmap;
cli_bytecode_context_setfile(ctx, map);
ctx->extracted_file_input = 1;
cli_dbgmsg("bytecode api: input switched to extracted file\n");
return 0;
2010-05-12 18:26:02 +03:00
}
}
uint32_t cli_bcapi_get_environment(struct cli_bc_ctx *ctx, struct cli_environment *env, uint32_t len)
{
if (len > sizeof(*env)) {
cli_dbgmsg("cli_bcapi_get_environment len %u > %lu\n", len, (unsigned long)sizeof(*env));
return -1;
}
memcpy(env, ctx->env, len);
return 0;
}
uint32_t cli_bcapi_disable_bytecode_if(struct cli_bc_ctx *ctx, const int8_t *reason, uint32_t len, uint32_t cond)
{
UNUSEDPARAM(len);
if (ctx->bc->kind != BC_STARTUP) {
cli_dbgmsg("Bytecode must be BC_STARTUP to call disable_bytecode_if\n");
return -1;
}
if (!cond)
return ctx->bytecode_disable_status;
if (*reason == '^')
cli_warnmsg("Bytecode: disabling completely because %s\n", reason + 1);
else
cli_dbgmsg("Bytecode: disabling completely because %s\n", reason);
ctx->bytecode_disable_status = 2;
return ctx->bytecode_disable_status;
}
uint32_t cli_bcapi_disable_jit_if(struct cli_bc_ctx *ctx, const int8_t *reason, uint32_t len, uint32_t cond)
{
UNUSEDPARAM(len);
if (ctx->bc->kind != BC_STARTUP) {
cli_dbgmsg("Bytecode must be BC_STARTUP to call disable_jit_if\n");
return -1;
}
if (!cond)
return ctx->bytecode_disable_status;
if (*reason == '^')
cli_warnmsg("Bytecode: disabling JIT because %s\n", reason + 1);
else
cli_dbgmsg("Bytecode: disabling JIT because %s\n", reason);
if (ctx->bytecode_disable_status != 2) /* no reenabling */
ctx->bytecode_disable_status = 1;
return ctx->bytecode_disable_status;
}
int32_t cli_bcapi_version_compare(struct cli_bc_ctx *ctx, const uint8_t *lhs, uint32_t lhs_len,
const uint8_t *rhs, uint32_t rhs_len)
{
unsigned i = 0, j = 0;
unsigned long li = 0, ri = 0;
UNUSEDPARAM(ctx);
do {
while (i < lhs_len && j < rhs_len && lhs[i] == rhs[j] &&
!isdigit(lhs[i]) && !isdigit(rhs[j])) {
i++;
j++;
}
if (i == lhs_len && j == rhs_len)
return 0;
if (i == lhs_len)
return -1;
if (j == rhs_len)
return 1;
if (!isdigit(lhs[i]) || !isdigit(rhs[j]))
return lhs[i] < rhs[j] ? -1 : 1;
while (isdigit(lhs[i]) && i < lhs_len)
li = 10 * li + (lhs[i++] - '0');
while (isdigit(rhs[j]) && j < rhs_len)
ri = 10 * ri + (rhs[j++] - '0');
if (li < ri)
return -1;
if (li > ri)
return 1;
} while (1);
}
static int check_bits(uint32_t query, uint32_t value, uint8_t shift, uint8_t mask)
{
uint8_t q = (query >> shift) & mask;
uint8_t v = (value >> shift) & mask;
/* q == mask -> ANY */
if (q == v || q == mask)
return 1;
return 0;
}
uint32_t cli_bcapi_check_platform(struct cli_bc_ctx *ctx, uint32_t a, uint32_t b, uint32_t c)
{
unsigned ret =
check_bits(a, ctx->env->platform_id_a, 24, 0xff) &&
check_bits(a, ctx->env->platform_id_a, 20, 0xf) &&
check_bits(a, ctx->env->platform_id_a, 16, 0xf) &&
check_bits(a, ctx->env->platform_id_a, 8, 0xff) &&
check_bits(a, ctx->env->platform_id_a, 0, 0xff) &&
check_bits(b, ctx->env->platform_id_b, 28, 0xf) &&
check_bits(b, ctx->env->platform_id_b, 24, 0xf) &&
check_bits(b, ctx->env->platform_id_b, 16, 0xff) &&
check_bits(b, ctx->env->platform_id_b, 8, 0xff) &&
check_bits(b, ctx->env->platform_id_b, 0, 0xff) &&
check_bits(c, ctx->env->platform_id_c, 24, 0xff) &&
check_bits(c, ctx->env->platform_id_c, 16, 0xff) &&
check_bits(c, ctx->env->platform_id_c, 8, 0xff) &&
check_bits(c, ctx->env->platform_id_c, 0, 0xff);
if (ret) {
cli_dbgmsg("check_platform(0x%08x,0x%08x,0x%08x) = match\n", a, b, c);
}
return ret;
}
2010-08-02 15:42:58 +03:00
int cli_bytecode_context_setpdf(struct cli_bc_ctx *ctx, unsigned phase,
unsigned nobjs,
struct pdf_obj **objs, uint32_t *pdf_flags,
uint32_t pdfsize, uint32_t pdfstartoff)
2010-08-02 15:42:58 +03:00
{
ctx->pdf_nobjs = nobjs;
ctx->pdf_objs = objs;
ctx->pdf_flags = pdf_flags;
ctx->pdf_size = pdfsize;
2010-08-02 15:42:58 +03:00
ctx->pdf_startoff = pdfstartoff;
ctx->pdf_phase = phase;
2010-08-02 15:42:58 +03:00
return 0;
}
int32_t cli_bcapi_pdf_get_obj_num(struct cli_bc_ctx *ctx)
{
if (!ctx->pdf_phase)
return -1;
2010-08-02 15:42:58 +03:00
return ctx->pdf_nobjs;
}
int32_t cli_bcapi_pdf_get_flags(struct cli_bc_ctx *ctx)
{
if (!ctx->pdf_phase)
return -1;
2010-08-02 15:42:58 +03:00
return *ctx->pdf_flags;
}
int32_t cli_bcapi_pdf_set_flags(struct cli_bc_ctx *ctx, int32_t flags)
2010-08-02 15:42:58 +03:00
{
if (!ctx->pdf_phase)
return -1;
2010-08-02 15:42:58 +03:00
cli_dbgmsg("cli_pdf: bytecode set_flags %08x -> %08x\n",
*ctx->pdf_flags,
flags);
2010-08-02 15:42:58 +03:00
*ctx->pdf_flags = flags;
return 0;
}
int32_t cli_bcapi_pdf_lookupobj(struct cli_bc_ctx *ctx, uint32_t objid)
2010-08-02 15:42:58 +03:00
{
unsigned i;
if (!ctx->pdf_phase)
return -1;
for (i = 0; i < ctx->pdf_nobjs; i++) {
if (ctx->pdf_objs[i]->id == objid)
return i;
2010-08-02 15:42:58 +03:00
}
return -1;
}
uint32_t cli_bcapi_pdf_getobjsize(struct cli_bc_ctx *ctx, int32_t objidx)
2010-08-02 15:42:58 +03:00
{
if (!ctx->pdf_phase ||
(uint32_t)objidx >= ctx->pdf_nobjs ||
ctx->pdf_phase == PDF_PHASE_POSTDUMP /* map is obj itself, no access to pdf anymore */
)
return 0;
if ((uint32_t)(objidx + 1) == ctx->pdf_nobjs)
return ctx->pdf_size - ctx->pdf_objs[objidx]->start;
return ctx->pdf_objs[objidx + 1]->start - ctx->pdf_objs[objidx]->start - 4;
2010-08-02 15:42:58 +03:00
}
const uint8_t *cli_bcapi_pdf_getobj(struct cli_bc_ctx *ctx, int32_t objidx, uint32_t amount)
2010-08-02 15:42:58 +03:00
{
uint32_t size = cli_bcapi_pdf_getobjsize(ctx, objidx);
if (amount > size)
return NULL;
return fmap_need_off(ctx->fmap, ctx->pdf_objs[objidx]->start, amount);
2010-08-02 15:42:58 +03:00
}
int32_t cli_bcapi_pdf_getobjid(struct cli_bc_ctx *ctx, int32_t objidx)
2010-08-02 15:42:58 +03:00
{
if (!ctx->pdf_phase ||
(uint32_t)objidx >= ctx->pdf_nobjs)
return -1;
return ctx->pdf_objs[objidx]->id;
2010-08-02 15:42:58 +03:00
}
int32_t cli_bcapi_pdf_getobjflags(struct cli_bc_ctx *ctx, int32_t objidx)
2010-08-02 15:42:58 +03:00
{
if (!ctx->pdf_phase ||
(uint32_t)objidx >= ctx->pdf_nobjs)
return -1;
return ctx->pdf_objs[objidx]->flags;
2010-08-02 15:42:58 +03:00
}
int32_t cli_bcapi_pdf_setobjflags(struct cli_bc_ctx *ctx, int32_t objidx, int32_t flags)
2010-08-02 15:42:58 +03:00
{
if (!ctx->pdf_phase ||
(uint32_t)objidx >= ctx->pdf_nobjs)
return -1;
2010-08-02 15:42:58 +03:00
cli_dbgmsg("cli_pdf: bytecode setobjflags %08x -> %08x\n",
ctx->pdf_objs[objidx]->flags,
flags);
ctx->pdf_objs[objidx]->flags = flags;
2010-08-02 15:42:58 +03:00
return 0;
}
int32_t cli_bcapi_pdf_get_offset(struct cli_bc_ctx *ctx, int32_t objidx)
2010-08-02 15:42:58 +03:00
{
if (!ctx->pdf_phase ||
(uint32_t)objidx >= ctx->pdf_nobjs)
return -1;
return ctx->pdf_startoff + ctx->pdf_objs[objidx]->start;
2010-08-02 15:42:58 +03:00
}
int32_t cli_bcapi_pdf_get_phase(struct cli_bc_ctx *ctx)
{
return ctx->pdf_phase;
}
int32_t cli_bcapi_pdf_get_dumpedobjid(struct cli_bc_ctx *ctx)
{
if (ctx->pdf_phase != PDF_PHASE_POSTDUMP)
return -1;
2010-08-02 15:42:58 +03:00
return ctx->pdf_dumpedid;
}
2010-08-02 17:04:35 +03:00
int32_t cli_bcapi_running_on_jit(struct cli_bc_ctx *ctx)
{
ctx->no_diff = 1;
return ctx->on_jit;
}
2010-10-19 16:23:19 +03:00
int32_t cli_bcapi_get_file_reliability(struct cli_bc_ctx *ctx)
2010-10-19 16:23:19 +03:00
{
cli_ctx *cctx = (cli_ctx *)ctx->ctx;
2010-10-19 16:23:19 +03:00
return cctx ? cctx->corrupted_input : 3;
}
int32_t cli_bcapi_json_is_active(struct cli_bc_ctx *ctx)
{
#if HAVE_JSON
cli_ctx *cctx = (cli_ctx *)ctx->ctx;
if (cctx->properties != NULL) {
return 1;
}
#else
UNUSEDPARAM(ctx);
cli_dbgmsg("bytecode api: libjson is not enabled!\n");
#endif
return 0;
}
static int32_t cli_bcapi_json_objs_init(struct cli_bc_ctx *ctx)
{
#if HAVE_JSON
unsigned n = ctx->njsonobjs + 1;
json_object **j, **jobjs = (json_object **)(ctx->jsonobjs);
cli_ctx *cctx = (cli_ctx *)ctx->ctx;
j = cli_realloc(jobjs, sizeof(json_object *) * n);
if (!j) { /* memory allocation failure */
cli_event_error_oom(EV, 0);
return -1;
}
ctx->jsonobjs = (void **)j;
ctx->njsonobjs = n;
j[n - 1] = cctx->properties;
return 0;
#else
UNUSEDPARAM(ctx);
return -1;
#endif
}
#define INIT_JSON_OBJS(ctx) \
if (!cli_bcapi_json_is_active(ctx)) \
return -1; \
if (ctx->njsonobjs == 0) { \
if (cli_bcapi_json_objs_init(ctx)) { \
return -1; \
} \
}
int32_t cli_bcapi_json_get_object(struct cli_bc_ctx *ctx, const int8_t *name, int32_t name_len, int32_t objid)
{
#if HAVE_JSON
unsigned n;
json_object **j, *jobj, **jobjs;
char *namep;
INIT_JSON_OBJS(ctx);
jobjs = ((json_object **)(ctx->jsonobjs));
if (objid < 0 || (unsigned int)objid >= ctx->njsonobjs) {
cli_dbgmsg("bytecode api[json_get_object]: invalid json objid requested\n");
return -1;
}
if (!name || name_len < 0) {
cli_dbgmsg("bytecode api[json_get_object]: unnamed object queried\n");
return -1;
}
n = ctx->njsonobjs + 1;
jobj = jobjs[objid];
if (!jobj) /* shouldn't be possible */
return -1;
namep = (char *)cli_malloc(sizeof(char) * (name_len + 1));
if (!namep)
return -1;
strncpy(namep, (char *)name, name_len);
namep[name_len] = '\0';
if (!json_object_object_get_ex(jobj, namep, &jobj)) { /* object not found */
free(namep);
return 0;
}
j = cli_realloc(jobjs, sizeof(json_object *) * n);
if (!j) { /* memory allocation failure */
free(namep);
cli_event_error_oom(EV, 0);
return -1;
}
ctx->jsonobjs = (void **)j;
ctx->njsonobjs = n;
j[n - 1] = jobj;
cli_dbgmsg("bytecode api[json_get_object]: assigned %s => ID %d\n", namep, n - 1);
free(namep);
return n - 1;
#else
UNUSEDPARAM(ctx);
UNUSEDPARAM(name);
UNUSEDPARAM(name_len);
UNUSEDPARAM(objid);
cli_dbgmsg("bytecode api: libjson is not enabled!\n");
return -1;
#endif
}
int32_t cli_bcapi_json_get_type(struct cli_bc_ctx *ctx, int32_t objid)
{
#if HAVE_JSON
enum json_type type;
json_object **jobjs;
INIT_JSON_OBJS(ctx);
jobjs = ((json_object **)(ctx->jsonobjs));
if (objid < 0 || (unsigned int)objid >= ctx->njsonobjs) {
cli_dbgmsg("bytecode api[json_get_type]: invalid json objid requested\n");
return -1;
}
type = json_object_get_type(jobjs[objid]);
switch (type) {
case json_type_null:
return JSON_TYPE_NULL;
case json_type_boolean:
return JSON_TYPE_BOOLEAN;
case json_type_double:
return JSON_TYPE_DOUBLE;
case json_type_int:
return JSON_TYPE_INT;
case json_type_object:
return JSON_TYPE_OBJECT;
case json_type_array:
return JSON_TYPE_ARRAY;
case json_type_string:
return JSON_TYPE_STRING;
default:
cli_dbgmsg("bytecode api[json_get_type]: unrecognized json type %d\n", type);
}
#else
UNUSEDPARAM(ctx);
UNUSEDPARAM(objid);
cli_dbgmsg("bytecode api: libjson is not enabled!\n");
#endif
return -1;
}
int32_t cli_bcapi_json_get_array_length(struct cli_bc_ctx *ctx, int32_t objid)
{
#if HAVE_JSON
enum json_type type;
json_object **jobjs;
INIT_JSON_OBJS(ctx);
jobjs = (json_object **)(ctx->jsonobjs);
if (objid < 0 || (unsigned int)objid >= ctx->njsonobjs) {
cli_dbgmsg("bytecode api[json_array_get_length]: invalid json objid requested\n");
return -1;
}
type = json_object_get_type(jobjs[objid]);
if (type != json_type_array) {
return -2; /* error code for not an array */
}
return json_object_array_length(jobjs[objid]);
#else
UNUSEDPARAM(ctx);
UNUSEDPARAM(objid);
cli_dbgmsg("bytecode api: libjson is not enabled!\n");
return -1;
#endif
}
int32_t cli_bcapi_json_get_array_idx(struct cli_bc_ctx *ctx, int32_t idx, int32_t objid)
{
#if HAVE_JSON
enum json_type type;
unsigned n;
int length;
json_object **j, *jarr = NULL, *jobj = NULL, **jobjs;
INIT_JSON_OBJS(ctx);
jobjs = (json_object **)(ctx->jsonobjs);
if (objid < 0 || (unsigned int)objid >= ctx->njsonobjs) {
cli_dbgmsg("bytecode api[json_array_get_idx]: invalid json objid requested\n");
return -1;
}
jarr = jobjs[objid];
if (!jarr) /* shouldn't be possible */
return -1;
type = json_object_get_type(jarr);
if (type != json_type_array) {
return -2; /* error code for not an array */
}
length = json_object_array_length(jarr);
if (idx >= 0 && idx < length) {
n = ctx->njsonobjs + 1;
jobj = json_object_array_get_idx(jarr, idx);
if (!jobj) { /* object not found */
return 0;
}
j = cli_realloc(jobjs, sizeof(json_object *) * n);
if (!j) { /* memory allocation failure */
cli_event_error_oom(EV, 0);
return -1;
}
ctx->jsonobjs = (void **)j;
ctx->njsonobjs = n;
j[n - 1] = jobj;
cli_dbgmsg("bytecode api[json_array_get_idx]: assigned array @ %d => ID %d\n", idx, n - 1);
return n - 1;
}
return 0;
#else
UNUSEDPARAM(ctx);
UNUSEDPARAM(idx);
UNUSEDPARAM(objid);
cli_dbgmsg("bytecode api: libjson is not enabled!\n");
return -1;
#endif
}
int32_t cli_bcapi_json_get_string_length(struct cli_bc_ctx *ctx, int32_t objid)
{
#if HAVE_JSON
enum json_type type;
json_object *jobj, **jobjs;
int32_t len;
const char *jstr;
INIT_JSON_OBJS(ctx);
jobjs = (json_object **)(ctx->jsonobjs);
if (objid < 0 || (unsigned int)objid >= ctx->njsonobjs) {
cli_dbgmsg("bytecode api[json_get_string_length]: invalid json objid requested\n");
return -1;
}
jobj = jobjs[objid];
if (!jobj) /* shouldn't be possible */
return -1;
type = json_object_get_type(jobj);
if (type != json_type_string) {
return -2; /* error code for not an array */
}
// len = json_object_get_string_len(jobj); /* not in JSON <0.10 */
jstr = json_object_get_string(jobj);
len = strlen(jstr);
return len;
#else
UNUSEDPARAM(ctx);
UNUSEDPARAM(objid);
cli_dbgmsg("bytecode api: libjson is not enabled!\n");
return -1;
#endif
}
int32_t cli_bcapi_json_get_string(struct cli_bc_ctx *ctx, int8_t *str, int32_t str_len, int32_t objid)
{
#if HAVE_JSON
enum json_type type;
json_object *jobj, **jobjs;
int32_t len;
const char *jstr;
INIT_JSON_OBJS(ctx);
jobjs = (json_object **)(ctx->jsonobjs);
if (objid < 0 || (unsigned int)objid >= ctx->njsonobjs) {
cli_dbgmsg("bytecode api[json_get_string]: invalid json objid requested\n");
return -1;
}
jobj = jobjs[objid];
if (!jobj) /* shouldn't be possible */
return -1;
type = json_object_get_type(jobj);
if (type != json_type_string) {
return -2; /* error code for not an array */
}
// len = json_object_get_string_len(jobj); /* not in JSON <0.10 */
jstr = json_object_get_string(jobj);
len = strlen(jstr);
if (len + 1 > str_len) {
/* limit on str-len */
strncpy((char *)str, jstr, str_len - 1);
str[str_len - 1] = '\0';
return str_len;
} else {
/* limit on len+1 */
strncpy((char *)str, jstr, len);
str[len] = '\0';
return len + 1;
}
#else
UNUSEDPARAM(ctx);
UNUSEDPARAM(str);
UNUSEDPARAM(str_len);
UNUSEDPARAM(objid);
cli_dbgmsg("bytecode api: libjson is not enabled!\n");
return -1;
#endif
}
int32_t cli_bcapi_json_get_boolean(struct cli_bc_ctx *ctx, int32_t objid)
{
#if HAVE_JSON
json_object *jobj, **jobjs;
INIT_JSON_OBJS(ctx);
jobjs = (json_object **)(ctx->jsonobjs);
if (objid < 0 || (unsigned int)objid >= ctx->njsonobjs) {
cli_dbgmsg("bytecode api[json_get_boolean]: invalid json objid requested\n");
return -1;
}
jobj = jobjs[objid];
return json_object_get_boolean(jobj);
#else
UNUSEDPARAM(ctx);
UNUSEDPARAM(objid);
cli_dbgmsg("bytecode api: libjson is not enabled!\n");
return 0;
#endif
}
int32_t cli_bcapi_json_get_int(struct cli_bc_ctx *ctx, int32_t objid)
{
#if HAVE_JSON
json_object *jobj, **jobjs;
INIT_JSON_OBJS(ctx);
jobjs = (json_object **)(ctx->jsonobjs);
if (objid < 0 || (unsigned int)objid >= ctx->njsonobjs) {
cli_dbgmsg("bytecode api[json_get_int]: invalid json objid requested\n");
return -1;
}
jobj = jobjs[objid];
return json_object_get_int(jobj);
#else
UNUSEDPARAM(ctx);
UNUSEDPARAM(objid);
cli_dbgmsg("bytecode api: libjson is not enabled!\n");
return 0;
#endif
}
// int64_t cli_bcapi_json_get_int64(struct cli_bc_ctx *ctx, int32_t objid);
// double cli_bcapi_json_get_double(struct cli_bc_ctx *ctx, int32_t objid);