clamav/libclamav/vba_extract.c

2440 lines
84 KiB
C
Raw Normal View History

2004-01-23 11:17:16 +00:00
/*
2004-01-25 06:34:45 +00:00
* Extract VBA source code for component MS Office Documents
2004-01-23 11:17:16 +00:00
*
2023-02-07 19:35:18 -08:00
* Copyright (C) 2013-2023 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
2013-03-12 13:51:49 -04:00
* Copyright (C) 2007-2013 Sourcefire, Inc.
*
* Authors: Trog, Nigel Horne
*
2004-01-23 11:17:16 +00:00
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
2004-01-23 11:17:16 +00:00
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
2004-01-23 11:17:16 +00:00
*/
#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif
2004-01-23 11:17:16 +00:00
#include <stdio.h>
#include <string.h>
#ifdef HAVE_UNISTD_H
2004-01-23 11:17:16 +00:00
#include <unistd.h>
#endif
2004-01-23 11:17:16 +00:00
#include <fcntl.h>
#include <stdlib.h>
#include <ctype.h>
Add CMake build tooling This patch adds experimental-quality CMake build tooling. The libmspack build required a modification to use "" instead of <> for header #includes. This will hopefully be included in the libmspack upstream project when adding CMake build tooling to libmspack. Removed use of libltdl when using CMake. Flex & Bison are now required to build. If -DMAINTAINER_MODE, then GPERF is also required, though it currently doesn't actually do anything. TODO! I found that the autotools build system was generating the lexer output but not actually compiling it, instead using previously generated (and manually renamed) lexer c source. As a consequence, changes to the .l and .y files weren't making it into the build. To resolve this, I removed generated flex/bison files and fixed the tooling to use the freshly generated files. Flex and bison are now required build tools. On Windows, this adds a dependency on the winflexbison package, which can be obtained using Chocolatey or may be manually installed. CMake tooling only has partial support for building with external LLVM library, and no support for the internal LLVM (to be removed in the future). I.e. The CMake build currently only supports the bytecode interpreter. Many files used include paths relative to the top source directory or relative to the current project, rather than relative to each build target. Modern CMake support requires including internal dependency headers the same way you would external dependency headers (albeit with "" instead of <>). This meant correcting all header includes to be relative to the build targets and not relative to the workspace. For example, ... ```c include "../libclamav/clamav.h" include "clamd/clamd_others.h" ``` ... becomes: ```c // libclamav include "clamav.h" // clamd include "clamd_others.h" ``` Fixes header name conflicts by renaming a few of the files. Converted the "shared" code into a static library, which depends on libclamav. The ironically named "shared" static library provides features common to the ClamAV apps which are not required in libclamav itself and are not intended for use by downstream projects. This change was required for correct modern CMake practices but was also required to use the automake "subdir-objects" option. This eliminates warnings when running autoreconf which, in the next version of autoconf & automake are likely to break the build. libclamav used to build in multiple stages where an earlier stage is a static library containing utils required by the "shared" code. Linking clamdscan and clamdtop with this libclamav utils static lib allowed these two apps to function without libclamav. While this is nice in theory, the practical gains are minimal and it complicates the build system. As such, the autotools and CMake tooling was simplified for improved maintainability and this feature was thrown out. clamdtop and clamdscan now require libclamav to function. Removed the nopthreads version of the autotools libclamav_internal_utils static library and added pthread linking to a couple apps that may have issues building on some platforms without it, with the intention of removing needless complexity from the source. Kept the regular version of libclamav_internal_utils.la though it is no longer used anywhere but in libclamav. Added an experimental doxygen build option which attempts to build clamav.h and libfreshclam doxygen html docs. The CMake build tooling also may build the example program(s), which isn't a feature in the Autotools build system. Changed C standard to C90+ due to inline linking issues with socket.h when linking libfreshclam.so on Linux. Generate common.rc for win32. Fix tabs/spaces in shared Makefile.am, and remove vestigial ifndef from misc.c. Add CMake files to the automake dist, so users can try the new CMake tooling w/out having to build from a git clone. clamonacc changes: - Renamed FANOTIFY macro to HAVE_SYS_FANOTIFY_H to better match other similar macros. - Added a new clamav-clamonacc.service systemd unit file, based on the work of ChadDevOps & Aaron Brighton. - Added missing clamonacc man page. Updates to clamdscan man page, add missing options. Remove vestigial CL_NOLIBCLAMAV definitions (all apps now use libclamav). Rename Windows mspack.dll to libmspack.dll so all ClamAV-built libraries have the lib-prefix with Visual Studio as with CMake.
2020-08-13 00:25:34 -07:00
#include <zlib.h>
Add CMake build tooling This patch adds experimental-quality CMake build tooling. The libmspack build required a modification to use "" instead of <> for header #includes. This will hopefully be included in the libmspack upstream project when adding CMake build tooling to libmspack. Removed use of libltdl when using CMake. Flex & Bison are now required to build. If -DMAINTAINER_MODE, then GPERF is also required, though it currently doesn't actually do anything. TODO! I found that the autotools build system was generating the lexer output but not actually compiling it, instead using previously generated (and manually renamed) lexer c source. As a consequence, changes to the .l and .y files weren't making it into the build. To resolve this, I removed generated flex/bison files and fixed the tooling to use the freshly generated files. Flex and bison are now required build tools. On Windows, this adds a dependency on the winflexbison package, which can be obtained using Chocolatey or may be manually installed. CMake tooling only has partial support for building with external LLVM library, and no support for the internal LLVM (to be removed in the future). I.e. The CMake build currently only supports the bytecode interpreter. Many files used include paths relative to the top source directory or relative to the current project, rather than relative to each build target. Modern CMake support requires including internal dependency headers the same way you would external dependency headers (albeit with "" instead of <>). This meant correcting all header includes to be relative to the build targets and not relative to the workspace. For example, ... ```c include "../libclamav/clamav.h" include "clamd/clamd_others.h" ``` ... becomes: ```c // libclamav include "clamav.h" // clamd include "clamd_others.h" ``` Fixes header name conflicts by renaming a few of the files. Converted the "shared" code into a static library, which depends on libclamav. The ironically named "shared" static library provides features common to the ClamAV apps which are not required in libclamav itself and are not intended for use by downstream projects. This change was required for correct modern CMake practices but was also required to use the automake "subdir-objects" option. This eliminates warnings when running autoreconf which, in the next version of autoconf & automake are likely to break the build. libclamav used to build in multiple stages where an earlier stage is a static library containing utils required by the "shared" code. Linking clamdscan and clamdtop with this libclamav utils static lib allowed these two apps to function without libclamav. While this is nice in theory, the practical gains are minimal and it complicates the build system. As such, the autotools and CMake tooling was simplified for improved maintainability and this feature was thrown out. clamdtop and clamdscan now require libclamav to function. Removed the nopthreads version of the autotools libclamav_internal_utils static library and added pthread linking to a couple apps that may have issues building on some platforms without it, with the intention of removing needless complexity from the source. Kept the regular version of libclamav_internal_utils.la though it is no longer used anywhere but in libclamav. Added an experimental doxygen build option which attempts to build clamav.h and libfreshclam doxygen html docs. The CMake build tooling also may build the example program(s), which isn't a feature in the Autotools build system. Changed C standard to C90+ due to inline linking issues with socket.h when linking libfreshclam.so on Linux. Generate common.rc for win32. Fix tabs/spaces in shared Makefile.am, and remove vestigial ifndef from misc.c. Add CMake files to the automake dist, so users can try the new CMake tooling w/out having to build from a git clone. clamonacc changes: - Renamed FANOTIFY macro to HAVE_SYS_FANOTIFY_H to better match other similar macros. - Added a new clamav-clamonacc.service systemd unit file, based on the work of ChadDevOps & Aaron Brighton. - Added missing clamonacc man page. Updates to clamdscan man page, add missing options. Remove vestigial CL_NOLIBCLAMAV definitions (all apps now use libclamav). Rename Windows mspack.dll to libmspack.dll so all ClamAV-built libraries have the lib-prefix with Visual Studio as with CMake.
2020-08-13 00:25:34 -07:00
#ifdef HAVE_JSON
#include <json.h>
#endif
#include "clamav.h"
2004-01-23 11:17:16 +00:00
2004-03-02 13:06:23 +00:00
#include "others.h"
#include "scanners.h"
#include "vba_extract.h"
#ifdef CL_DEBUG
#include "mbox.h"
#endif
#include "blob.h"
#include "ole2_extract.h"
#include "entconv.h"
2004-01-23 11:17:16 +00:00
#define PPT_LZW_BUFFSIZE 8192
#define VBA_COMPRESSION_WINDOW 4096
#define MIDDLE_SIZE 20
#define MAX_VBA_COUNT 1000 /* If there's more than 1000 macros something's up! */
#ifndef HAVE_ATTRIB_PACKED
#define __attribute__(x)
2007-10-26 16:29:26 +00:00
#endif
/*
* VBA (Visual Basic for Applications), versions 5 and 6
*/
2007-10-26 16:29:26 +00:00
struct vba56_header {
unsigned char magic[2];
unsigned char version[4];
unsigned char ignore[28];
2007-10-26 16:29:26 +00:00
};
typedef struct {
uint32_t sig;
const char *ver;
int big_endian; /* e.g. MAC Office */
2004-01-23 11:17:16 +00:00
} vba_version_t;
static int skip_past_nul(int fd);
static int read_uint16(int fd, uint16_t *u, int big_endian);
static int read_uint32(int fd, uint32_t *u, int big_endian);
static int seekandread(int fd, off_t offset, int whence, void *data, size_t len);
static vba_project_t *create_vba_project(int record_count, const char *dir, struct uniq *U);
2008-01-05 10:57:19 +00:00
static uint16_t
vba_endian_convert_16(uint16_t value, int big_endian)
{
if (big_endian)
return (uint16_t)be16_to_host(value);
else
return le16_to_host(value);
}
/* Seems to be a duplicate of riff_endian_convert_32() */
2008-01-05 10:57:19 +00:00
static uint32_t
vba_endian_convert_32(uint32_t value, int big_endian)
{
if (big_endian)
return be32_to_host(value);
else
return le32_to_host(value);
}
static char *
2007-12-21 11:17:25 +00:00
get_unicode_name(const char *name, int size, int big_endian)
2004-01-23 11:17:16 +00:00
{
int i, increment;
char *newname, *ret;
if ((name == NULL) || (*name == '\0') || (size <= 0))
return NULL;
newname = (char *)cli_malloc(size * 7 + 1);
if (newname == NULL) {
cli_errmsg("get_unicode_name: Unable to allocate memory for newname\n");
return NULL;
}
if ((!big_endian) && (size & 0x1)) {
cli_dbgmsg("get_unicode_name: odd number of bytes %d\n", size);
--size;
}
increment = (big_endian) ? 1 : 2;
ret = newname;
for (i = 0; i < size; i += increment) {
if ((!(name[i] & 0x80)) && isprint(name[i])) {
*ret++ = tolower(name[i]);
} else {
if ((name[i] < 10) && (name[i] >= 0)) {
*ret++ = '_';
*ret++ = (char)(name[i] + '0');
} else {
uint16_t x;
if ((i + 1) >= size)
break;
x = (uint16_t)((name[i] < 0 ? 0 : name[i] << 8) | name[i + 1]);
*ret++ = '_';
*ret++ = (char)('a' + ((x & 0xF)));
*ret++ = (char)('a' + ((x >> 4) & 0xF));
*ret++ = (char)('a' + ((x >> 8) & 0xF));
*ret++ = 'a';
*ret++ = 'a';
}
*ret++ = '_';
}
}
*ret = '\0';
/* Saves a lot of memory */
ret = cli_realloc(newname, (ret - newname) + 1);
return ret ? ret : newname;
}
static void vba56_test_middle(int fd)
{
char test_middle[MIDDLE_SIZE];
/* MacOffice middle */
static const uint8_t middle1_str[MIDDLE_SIZE] = {
0x00, 0x01, 0x0d, 0x45, 0x2e, 0xe1, 0xe0, 0x8f, 0x10, 0x1a,
0x85, 0x2e, 0x02, 0x60, 0x8c, 0x4d, 0x0b, 0xb4, 0x00, 0x00};
/* MS Office middle */
static const uint8_t middle2_str[MIDDLE_SIZE] = {
0x00, 0x00, 0xe1, 0x2e, 0x45, 0x0d, 0x8f, 0xe0, 0x1a, 0x10,
0x85, 0x2e, 0x02, 0x60, 0x8c, 0x4d, 0x0b, 0xb4, 0x00, 0x00};
if (cli_readn(fd, &test_middle, MIDDLE_SIZE) != MIDDLE_SIZE)
return;
if ((memcmp(test_middle, middle1_str, MIDDLE_SIZE) != 0) &&
(memcmp(test_middle, middle2_str, MIDDLE_SIZE) != 0)) {
cli_dbgmsg("middle not found\n");
if (lseek(fd, -MIDDLE_SIZE, SEEK_CUR) == -1) {
2013-02-28 21:01:40 -05:00
cli_dbgmsg("vba_test_middle: call to lseek() failed\n");
return;
}
} else
cli_dbgmsg("middle found\n");
}
2013-08-07 13:41:14 -04:00
/* return count of valid strings found, 0 on error */
2007-11-01 18:32:17 +00:00
static int
2007-12-21 11:17:25 +00:00
vba_read_project_strings(int fd, int big_endian)
{
2013-03-12 13:51:49 -04:00
unsigned char *buf = NULL;
uint16_t buflen = 0;
uint16_t length = 0;
2013-03-22 12:05:59 -04:00
int ret = 0, getnewlength = 1;
for (;;) {
2013-03-12 13:51:49 -04:00
off_t offset;
char *name;
2013-03-22 12:05:59 -04:00
/* if no initial name length, exit */
if (getnewlength && !read_uint16(fd, &length, big_endian)) {
2013-08-07 13:41:14 -04:00
ret = 0;
break;
}
2013-03-22 12:05:59 -04:00
getnewlength = 0;
/* if too short, break */
2013-03-12 13:51:49 -04:00
if (length < 6) {
if (lseek(fd, -2, SEEK_CUR) == -1) {
2013-02-28 21:01:40 -05:00
cli_dbgmsg("vba_read_project_strings: call to lseek() has failed\n");
ret = 0;
2013-02-28 21:01:40 -05:00
}
2013-03-12 13:51:49 -04:00
break;
}
/* ensure buffer is large enough */
if (length > buflen) {
2013-03-12 13:51:49 -04:00
unsigned char *newbuf = (unsigned char *)cli_realloc(buf, length);
if (newbuf == NULL) {
ret = 0;
break;
2013-03-12 13:51:49 -04:00
}
buflen = length;
buf = newbuf;
2013-03-12 13:51:49 -04:00
}
/* save current offset */
2013-03-12 13:51:49 -04:00
offset = lseek(fd, 0, SEEK_CUR);
2013-02-28 21:01:40 -05:00
if (offset == -1) {
cli_dbgmsg("vba_read_project_strings: call to lseek() has failed\n");
ret = 0;
break;
2013-02-28 21:01:40 -05:00
}
2007-10-26 16:29:26 +00:00
/* if read name failed, break */
if (cli_readn(fd, buf, (size_t)length) != (size_t)length) {
2013-03-12 13:51:49 -04:00
cli_dbgmsg("read name failed - rewinding\n");
if (lseek(fd, offset, SEEK_SET) == -1) {
2013-02-28 21:01:40 -05:00
cli_dbgmsg("call to lseek() in read name failed\n");
ret = 0;
2013-02-28 21:01:40 -05:00
}
2013-03-12 13:51:49 -04:00
break;
}
name = get_unicode_name((const char *)buf, length, big_endian);
cli_dbgmsg("length: %d, name: %s\n", length, (name) ? name : "[null]");
/* if invalid name, break */
if ((name == NULL) || (memcmp("*\\", name, 2) != 0) ||
(strchr("ghcd", name[2]) == NULL)) {
/* Not a valid string, rewind */
if (lseek(fd, -(length + 2), SEEK_CUR) == -1) {
2013-02-28 21:01:40 -05:00
cli_dbgmsg("call to lseek() after get_unicode_name has failed\n");
ret = 0;
2013-02-28 21:01:40 -05:00
}
2013-03-12 13:51:49 -04:00
free(name);
break;
}
free(name);
2007-11-01 18:32:17 +00:00
/* can't get length, break */
if (!read_uint16(fd, &length, big_endian)) {
2013-03-12 13:51:49 -04:00
break;
}
2007-11-01 18:32:17 +00:00
2013-03-12 13:51:49 -04:00
ret++;
/* continue on reasonable length value */
2013-03-12 13:51:49 -04:00
if ((length != 0) && (length != 65535)) {
continue;
}
/* determine offset and run middle test */
2013-03-12 13:51:49 -04:00
offset = lseek(fd, 10, SEEK_CUR);
2013-02-28 21:01:40 -05:00
if (offset == -1) {
cli_dbgmsg("call to lseek() has failed\n");
ret = 0;
break;
2013-02-28 21:01:40 -05:00
}
2013-03-12 13:51:49 -04:00
cli_dbgmsg("offset: %lu\n", (unsigned long)offset);
vba56_test_middle(fd);
2013-03-22 12:05:59 -04:00
getnewlength = 1;
2013-03-12 13:51:49 -04:00
}
2013-03-12 13:51:49 -04:00
free(buf);
return ret;
}
2020-07-24 08:32:47 -07:00
static size_t vba_normalize(unsigned char *buffer, size_t size)
{
enum {
2020-07-24 08:32:47 -07:00
NORMAL = 0,
IN_STRING = 1,
UNDERSCORE = 2,
UNDERSCORE_CR = 3,
2020-07-24 08:32:47 -07:00
SPACE = 5,
} state = NORMAL;
size_t o = 0;
size_t i;
for (i = 0; i < size; ++i) {
// TODO: Don't normalize stuff in comments
// FIXME: Use UTF glyphs instead of raw bytes
switch (buffer[i]) {
case '"':
if (state == IN_STRING) {
state = NORMAL;
2020-07-24 08:32:47 -07:00
} else if (state == NORMAL || state == UNDERSCORE || state == SPACE) {
state = IN_STRING;
}
buffer[o++] = '"';
break;
case '_':
if (state == SPACE) {
state = UNDERSCORE;
}
buffer[o++] = '_';
break;
case '\r':
if (state == UNDERSCORE) {
state = UNDERSCORE_CR;
}
buffer[o++] = '\r';
break;
case '\n':
if (state == UNDERSCORE) {
o -= 1;
state = SPACE;
2020-07-24 08:32:47 -07:00
} else if (state == UNDERSCORE_CR) {
o -= 2;
state = SPACE;
2020-07-24 08:32:47 -07:00
} else {
buffer[o++] = '\n';
;
}
break;
case '\t':
case ' ':
if (state != SPACE) {
buffer[o++] = ' ';
}
if (state == NORMAL || state == UNDERSCORE) {
state = SPACE;
}
break;
default:
if (state == NORMAL || state == UNDERSCORE || state == SPACE) {
if (buffer[i] >= 'A' && buffer[i] <= 'Z') {
2020-07-24 08:32:47 -07:00
buffer[o++] = (unsigned char)tolower((int)buffer[i]);
} else {
buffer[o++] = buffer[i];
}
state = NORMAL;
2020-07-24 08:32:47 -07:00
} else {
buffer[o++] = buffer[i];
}
break;
}
}
return o;
}
/**
* Read a VBA project in an OLE directory.
* Contrary to cli_vba_readdir, this function uses the dir file to locate VBA modules.
*/
cl_error_t cli_vba_readdir_new(cli_ctx *ctx, const char *dir, struct uniq *U, const char *hash, uint32_t which, int *tempfd, int *has_macros)
{
cl_error_t ret = CL_SUCCESS;
char fullname[1024];
2020-07-24 08:32:47 -07:00
int fd = -1;
unsigned char *data = NULL;
size_t data_len;
size_t data_offset;
const char *stream_name = NULL;
2020-07-24 08:32:47 -07:00
char *tempfile = NULL;
uint16_t codepage = CODEPAGE_ISO8859_1;
unsigned i;
char *mbcs_name = NULL, *utf16_name = NULL;
size_t mbcs_name_size = 0, utf16_name_size = 0;
unsigned char *module_data = NULL, *module_data_utf8 = NULL;
size_t module_data_size = 0, module_data_utf8_size = 0;
if (dir == NULL || hash == NULL || tempfd == NULL || has_macros == NULL) {
return CL_EARG;
}
cli_dbgmsg("vba_readdir_new: Scanning directory %s for VBA project\n", dir);
snprintf(fullname, sizeof(fullname), "%s" PATHSEP "%s_%u", dir, hash, which);
fullname[sizeof(fullname) - 1] = '\0';
fd = open(fullname, O_RDONLY | O_BINARY);
if (fd == -1) {
ret = CL_EOPEN;
goto done;
}
if ((data = cli_vba_inflate(fd, 0, &data_len)) == NULL) {
cli_dbgmsg("vba_readdir_new: Failed to decompress 'dir'\n");
ret = CL_EARG;
goto done;
}
*has_macros = *has_macros + 1;
if ((ret = cli_gentempfd_with_prefix(ctx->sub_tmpdir, "vba_project", &tempfile, tempfd)) != CL_SUCCESS) {
2020-07-24 08:32:47 -07:00
cli_warnmsg("vba_readdir_new: VBA project cannot be dumped to file\n");
goto done;
}
cli_dbgmsg("Dumping VBA project from dir %s to file %s\n", fullname, tempfile);
2020-07-24 08:32:47 -07:00
#define CLI_WRITEN(msg, size) \
do { \
if (cli_writen(*tempfd, msg, size) != size) { \
cli_warnmsg("vba_readdir_new: Failed to write to output file\n"); \
2020-07-24 08:32:47 -07:00
ret = CL_EWRITE; \
goto done; \
} \
} while (0)
2020-07-24 08:32:47 -07:00
#define CLI_WRITENHEX(msg, size) \
do { \
unsigned i; \
for (i = 0; i < size; ++i) { \
char buf[4]; \
if (snprintf(buf, sizeof(buf), "%02x", (msg)[i]) != 2) { \
cli_warnmsg("vba_readdir_new: Failed to write hex data to output file\n"); \
2020-07-24 08:32:47 -07:00
ret = CL_EWRITE; \
goto done; \
} \
CLI_WRITEN(buf, 2); \
} \
} while (0)
2020-07-24 08:32:47 -07:00
#define CLI_WRITEN_MBCS(msg, size) \
do { \
char *utf8 = NULL; \
size_t utf8_size; \
if (size > 0) { \
if (CL_SUCCESS == cli_codepage_to_utf8((char *)&data[data_offset], size, codepage, &utf8, &utf8_size)) { \
CLI_WRITEN(utf8, utf8_size); \
free(utf8); \
utf8 = NULL; \
} else { \
cli_dbgmsg("cli_vba_readdir_new: failed to convert codepage %" PRIu16 " to UTF-8\n", codepage); \
2020-07-24 08:32:47 -07:00
CLI_WRITEN("<error decoding string>", 23); \
} \
} \
} while (0)
2020-07-24 08:32:47 -07:00
#define CLI_WRITEN_UTF16LE(msg, size) \
do { \
char *utf8 = NULL; \
size_t utf8_size; \
if (size > 0) { \
if (CL_SUCCESS == cli_codepage_to_utf8((char *)&data[data_offset], size, CODEPAGE_UTF16_LE, &utf8, &utf8_size)) { \
CLI_WRITEN(utf8, utf8_size); \
free(utf8); \
utf8 = NULL; \
} else { \
cli_dbgmsg("cli_vba_readdir_new: failed to convert UTF16LE to UTF-8\n"); \
2020-07-24 08:32:47 -07:00
CLI_WRITEN("<error decoding string>", 23); \
} \
} \
} while (0)
CLI_WRITEN("REM VBA project extracted from Microsoft Office document\n\n", 58);
for (data_offset = 0; data_offset < data_len;) {
uint16_t id;
uint32_t size;
if (sizeof(uint16_t) > data_len - data_offset) {
cli_warnmsg("vba_readdir_new: Failed to read record type from dir\n");
ret = CL_EREAD;
goto done;
}
2020-07-24 08:32:47 -07:00
id = le16_to_host(*(uint16_t *)&data[data_offset]);
data_offset += sizeof(uint16_t);
if (sizeof(uint32_t) > data_len - data_offset) {
cli_warnmsg("vba_readdir_new: Failed to read record size from dir\n");
ret = CL_EREAD;
goto done;
}
2020-07-24 08:32:47 -07:00
size = le32_to_host(*(uint32_t *)&data[data_offset]);
data_offset += sizeof(uint32_t);
if (size > data_len - data_offset) {
cli_warnmsg("vba_readdir_new: Record stretches past the end of the file\n");
ret = CL_EREAD;
goto done;
}
switch (id) {
// MS-OVBA 2.3.4.2.1.1 PROJECTSYSKIND
case 0x0001: {
if (size != sizeof(uint32_t)) {
cli_dbgmsg("cli_vba_readdir_new: Expected PROJECTSYSKIND record size (%" PRIu32 " != 4)\n", size);
ret = CL_EREAD;
goto done;
}
2020-07-24 08:32:47 -07:00
uint32_t sys_kind = le32_to_host(*(uint32_t *)&data[data_offset]);
data_offset += sizeof(uint32_t);
CLI_WRITEN("REM PROJECTSYSKIND: ", 20);
switch (sys_kind) {
case 0x0:
CLI_WRITEN("Windows 16 bit", 14);
break;
case 0x1:
CLI_WRITEN("Windows 32 bit", 14);
break;
case 0x2:
CLI_WRITEN("Macintosh", 9);
break;
case 0x3:
CLI_WRITEN("Windows 64 bit", 14);
break;
default: {
char str_sys_kind[22];
2020-07-24 08:32:47 -07:00
int len = snprintf(str_sys_kind, sizeof(str_sys_kind), "Unknown (0x%x)", sys_kind);
str_sys_kind[sizeof(str_sys_kind) - 1] = '\0';
if (len > 0) {
2020-07-24 08:32:47 -07:00
CLI_WRITEN(str_sys_kind, (size_t)len);
}
break;
}
}
CLI_WRITEN("\n", 1);
break;
}
// MS-OVBA 2.3.4.2.1.2 PROJECTLCID
case 0x0002: {
if (size != sizeof(uint32_t)) {
cli_dbgmsg("cli_vba_readdir_new: Expected PROJECTLCID record size (%" PRIu32 " != 4)\n", size);
ret = CL_EREAD;
goto done;
}
2020-07-24 08:32:47 -07:00
uint32_t lcid = le32_to_host(*(uint32_t *)&data[data_offset]);
char buf[64];
data_offset += size;
2020-07-24 08:32:47 -07:00
int buf_length = snprintf(buf, sizeof(buf), "REM PROJECTLCID: 0x%08x\n", lcid);
buf[sizeof(buf) - 1] = '\0';
if (buf_length > 0) {
2020-07-24 08:32:47 -07:00
CLI_WRITEN(buf, (size_t)buf_length);
}
break;
}
// MS-OVBA 2.3.4.2.1.3 PROJECTLCIDINVOKE
case 0x0014: {
if (size != sizeof(uint32_t)) {
cli_dbgmsg("cli_vba_readdir_new: Expected PROJECTLCIDINVOKE record size (%" PRIu32 " != 4)\n", size);
ret = CL_EREAD;
goto done;
}
2020-07-24 08:32:47 -07:00
uint32_t lcid_invoke = le32_to_host(*(uint32_t *)&data[data_offset]);
char buf[64];
data_offset += sizeof(uint32_t);
2020-07-24 08:32:47 -07:00
int buf_length = snprintf(buf, sizeof(buf), "REM PROJECTLCIDINVOKE: 0x%08x\n", lcid_invoke);
buf[sizeof(buf) - 1] = '\0';
if (buf_length > 0) {
2020-07-24 08:32:47 -07:00
CLI_WRITEN(buf, (size_t)buf_length);
}
break;
}
// MS-OVBA 2.3.4.2.1.4 PROJECTCODEPAGE
case 0x0003: {
if (size != sizeof(uint16_t)) {
cli_dbgmsg("cli_vba_readdir_new: Expected PROJECTCODEPAGE record size (%" PRIu32 " != 2)\n", size);
ret = CL_EREAD;
goto done;
}
2020-07-24 08:32:47 -07:00
codepage = le16_to_host(*(uint16_t *)&data[data_offset]);
char buf[64];
data_offset += sizeof(uint16_t);
2020-07-24 08:32:47 -07:00
int buf_length = snprintf(buf, sizeof(buf), "REM PROJECTCODEPAGE: 0x%04x\n", codepage);
buf[sizeof(buf) - 1] = '\0';
if (buf_length > 0) {
2020-07-24 08:32:47 -07:00
CLI_WRITEN(buf, (size_t)buf_length);
}
break;
}
// MS-OVBA 2.3.4.2.1.5 PROJECTNAME
case 0x0004: {
if (size < 1 || size > 128) {
cli_dbgmsg("cli_vba_readdir_new: Expected PROJECTNAME record size (1 <= %" PRIu32 " <= 128)\n", size);
ret = CL_EREAD;
goto done;
}
CLI_WRITEN("REM PROJECTNAME: ", 17);
CLI_WRITEN_MBCS(&data[data_offset], size);
data_offset += size;
CLI_WRITEN("\n", 1);
break;
}
// MS-OVBA 2.3.4.2.1.6 PROJECTDOCSTRING
case 0x0005: {
if (size > 2000) {
cli_dbgmsg("cli_vba_readdir_new: Expected PROJECTDOCSTRING record size (%" PRIu32 " <= 2000)\n", size);
ret = CL_EREAD;
goto done;
}
CLI_WRITEN("REM PROJECTDOCSTRING: ", 22);
CLI_WRITEN_MBCS(&data[data_offset], size);
data_offset += size;
CLI_WRITEN("\n", 1);
break;
}
// MS-OVBA 2.3.4.2.1.6 PROJECTDOCSTRING Unicode
case 0x0040: {
if (size % 2 != 0) {
cli_dbgmsg("cli_vba_readdir_new: Expected PROJECTDOCSTRINGUNICODE record size (%" PRIu32 " but should be even)\n", size);
ret = CL_EREAD;
goto done;
}
CLI_WRITEN("REM PROJECTDOCSTRINGUNICODE: ", 29);
CLI_WRITEN_UTF16LE(&data[data_offset], size);
data_offset += size;
CLI_WRITEN("\n", 1);
break;
}
// MS-OVBA 2.3.4.2.1.7 PROJECTHELPFILEPATH
case 0x0006: {
if (size > 260) {
cli_dbgmsg("cli_vba_readdir_new: Expected PROJECTHELPFILEPATH record size (%" PRIu32 " <= 260)\n", size);
ret = CL_EREAD;
goto done;
}
const size_t projecthelpfilepath_offset = data_offset;
CLI_WRITEN("REM PROJECTHELPFILEPATH: ", 25);
CLI_WRITEN_MBCS(&data[data_offset], size);
data_offset += size;
CLI_WRITEN("\n", 1);
if (sizeof(uint16_t) > data_len - data_offset) {
cli_warnmsg("vba_readdir_new: Failed to read record type from dir\n");
ret = CL_EREAD;
goto done;
}
2020-07-24 08:32:47 -07:00
id = le16_to_host(*(uint16_t *)&data[data_offset]);
if (id != 0x003d) {
cli_warnmsg("vba_readdir_new: PROJECTHELPFILEPATH is not followed by PROJECTHELPFILEPATH2\n");
CLI_WRITEN("REM WARNING: PROJECTHELPFILEPATH is not followed by PROJECTHELPFILEPATH2\n", 73);
continue;
}
data_offset += sizeof(uint16_t);
if (sizeof(uint32_t) > data_len - data_offset) {
cli_warnmsg("vba_readdir_new: Failed to read record size of PROJECTHELPFILEPATH2 record from dir\n");
ret = CL_EREAD;
goto done;
}
uint32_t size2;
2020-07-24 08:32:47 -07:00
size2 = le32_to_host(*(uint32_t *)&data[data_offset]);
data_offset += sizeof(uint32_t);
if (size2 > data_len - data_offset) {
cli_warnmsg("vba_readdir_new: PROJECTHELPFILEPATH2 record stretches past the end of the file\n");
ret = CL_EREAD;
goto done;
}
if (size2 > 260) {
cli_dbgmsg("cli_vba_readdir_new: Expected PROJECTHELPFILEPATH2 record size (%" PRIu32 " <= 260)\n", size);
ret = CL_EREAD;
goto done;
}
if (size != size2) {
CLI_WRITEN("REM WARNING: PROJECTHELPFILEPATH and PROJECTHELPFILEPATH2 record sizes differ\n", 78);
2020-07-24 08:32:47 -07:00
} else {
if (memcmp(&data[projecthelpfilepath_offset], &data[data_offset], size) != 0) {
Bug fixes related to the recent HFS+/VBA/OLE2/XLM code changes This commit includes bug fixes and minor modifications based on warnings generated by Coverity. These include: - 287096 - In cli_xlm_extract_macros: Leak of memory or pointers to system resources (CWE-404). This was a legitimate leak of a generated temp filename and could occur frequently. - 287095 - In scan_for_xlm_macros: Use of an uninitialized variable. The uninitialized value (state.length) was likely never used unitialized, but we now initialize it just in case. - 287094 - In cli_vba_readdir_new: Out-of-bounds access to a buffer (CWE-119). This looks like a copy-paste error and was a legitimate read past the bounds of a buffer in an error case. - 284479 - In hfsplus_walk_catalog: All paths that lead to this null pointer comparison already dereference the pointer earlier (CWE-476). In certain cases a NULL pointer could be returned in the success case of hfsplus_scanfile, which was not handled correctly. This case may have been prevented in practice by an earlier check, but adding a check for NULL just in case. - 284478 - In hfsplus_walk_catalog: A value assigned to a variable is never used. ret would be set if zlib's inflateEnd function fails. The fix is to just not set ret in this case, since the error doesn't seem fatal (although would result in a memory leak by the zlib code...). - 284477 - In hfsplus_check_attribute: Pointer is checked against null but then dereferenced anyway. I just took out the NULL check of record and recordSize, since the code requires these values to not be NULL elsewhere and there's no way an error could occur as currently used (stack var addresses are passed via these parameters). I also fixed up some of the function identifiers in debug print messages.
2020-05-06 14:41:32 -04:00
CLI_WRITEN("REM WARNING: PROJECTHELPFILEPATH and PROJECTHELPFILEPATH2 contents differ\n", 74);
}
}
CLI_WRITEN("REM PROJECTHELPFILEPATH2: ", 26);
CLI_WRITEN_UTF16LE(&data[data_offset], size2);
data_offset += size2;
CLI_WRITEN("\n", 1);
break;
}
// MS-OVBA 2.3.4.2.1.8 PROJECTHELPCONTEXT
case 0x0007: {
if (size != sizeof(uint32_t)) {
cli_dbgmsg("cli_vba_readdir_new: Expected PROJECTHELPCONTEXT record size (%" PRIu32 " != 4)\n", size);
ret = CL_EREAD;
goto done;
}
2020-07-24 08:32:47 -07:00
uint32_t context = le32_to_host(*(uint32_t *)&data[data_offset]);
char buf[64];
data_offset += size;
2020-07-24 08:32:47 -07:00
int buf_length = snprintf(buf, sizeof(buf), "REM PROJECTHELPCONTEXT: 0x%04x\n", context);
buf[sizeof(buf) - 1] = '\0';
if (buf_length > 0) {
2020-07-24 08:32:47 -07:00
CLI_WRITEN(buf, (size_t)buf_length);
}
break;
}
// MS-OVBA 2.3.4.2.1.9 PROJECTLIBFLAGS
case 0x0008: {
if (size != sizeof(uint32_t)) {
cli_dbgmsg("cli_vba_readdir_new: Expected PROJECTLIBFLAGS record size (%" PRIu32 " != 4)\n", size);
ret = CL_EREAD;
goto done;
}
2020-07-24 08:32:47 -07:00
uint32_t libflags = le32_to_host(*(uint32_t *)&data[data_offset]);
char buf[64];
data_offset += sizeof(uint32_t);
2020-07-24 08:32:47 -07:00
int buf_length = snprintf(buf, sizeof(buf), "REM PROJECTLIBFLAGS: 0x%04x\n", libflags);
buf[sizeof(buf) - 1] = '\0';
if (buf_length > 0) {
2020-07-24 08:32:47 -07:00
CLI_WRITEN(buf, (size_t)buf_length);
}
break;
}
// MS-OVBA 2.3.4.2.1.10 PROJECTVERSION
case 0x0009: {
// The PROJECTVERSION record size is expected to be 4, even though the record size is 6.
if (size != 4) {
cli_dbgmsg("cli_vba_readdir_new: Expected PROJECTVERSION record size (%" PRIu32 " != 4)\n", size);
ret = CL_EREAD;
goto done;
}
2020-07-24 08:32:47 -07:00
uint32_t major = le32_to_host(*(uint32_t *)&data[data_offset]);
data_offset += size;
if (sizeof(uint16_t) > data_len - data_offset) {
cli_warnmsg("vba_readdir_new: PROJECTVERSION record stretches past the end of the file\n");
ret = CL_EREAD;
goto done;
}
2020-07-24 08:32:47 -07:00
uint16_t minor = le16_to_host(*(uint16_t *)&data[data_offset]);
data_offset += sizeof(uint16_t);
char buf[64];
2020-07-24 08:32:47 -07:00
int buf_length = snprintf(buf, sizeof(buf), "REM PROJECTVERSION: %u.%u\n", major, minor);
buf[sizeof(buf) - 1] = '\0';
if (buf_length > 0) {
2020-07-24 08:32:47 -07:00
CLI_WRITEN(buf, (size_t)buf_length);
}
break;
}
// MS-OVBA 2.3.4.2.3 PROJECTMODULES
case 0x000f: {
if (size != sizeof(uint16_t)) {
cli_dbgmsg("cli_vba_readdir_new: Expected PROJECTMODULES record size\n");
ret = CL_EREAD;
goto done;
}
2020-07-24 08:32:47 -07:00
uint16_t modules = le16_to_host(*(uint16_t *)&data[data_offset]);
data_offset += sizeof(uint16_t);
char buf[64];
2020-07-24 08:32:47 -07:00
int buf_length = snprintf(buf, sizeof(buf), "REM PROJECTMODULES: %u\n", modules);
buf[sizeof(buf) - 1] = '\0';
if (buf_length > 0) {
2020-07-24 08:32:47 -07:00
CLI_WRITEN(buf, (size_t)buf_length);
}
break;
}
// MS-OVBA 2.3.4.2.3.1 PROJECTCOOKIE
case 0x0013: {
if (size != sizeof(uint16_t)) {
cli_dbgmsg("cli_vba_readdir_new: Expected PROJECTCOOKIE record size\n");
ret = CL_EREAD;
goto done;
}
2020-07-24 08:32:47 -07:00
uint16_t cookie = le16_to_host(*(uint16_t *)&data[data_offset]);
data_offset += sizeof(uint16_t);
char buf[64];
2020-07-24 08:32:47 -07:00
int buf_length = snprintf(buf, sizeof(buf), "REM PROJECTCOOKIE: 0x%04x\n", cookie);
buf[sizeof(buf) - 1] = '\0';
if (buf_length > 0) {
2020-07-24 08:32:47 -07:00
CLI_WRITEN(buf, (size_t)buf_length);
}
break;
}
// MS-OVBA 2.3.4.2.3.2 MODULE record
case 0x0019: {
// MS-OVBA 2.3.4.2.3.2.1 MODULENAME
CLI_WRITEN("\n\nREM MODULENAME: ", 18);
if (size > 0) {
2020-07-24 08:32:47 -07:00
if (CL_SUCCESS == cli_codepage_to_utf8((char *)&data[data_offset], size, codepage, &mbcs_name, &mbcs_name_size)) {
CLI_WRITEN(mbcs_name, mbcs_name_size);
2020-07-24 08:32:47 -07:00
} else {
cli_dbgmsg("cli_vba_readdir_new: failed to convert codepage %" PRIu16 " to UTF-8\n", codepage);
CLI_WRITEN("<error decoding string>", 23);
}
}
data_offset += size;
// MS-OVBA 2.3.4.2.3.2.2 MODULENAMEUNICODE
cli_dbgmsg("Reading MODULENAMEUNICODE record\n");
if (sizeof(uint16_t) + sizeof(uint32_t) > data_len - data_offset) {
cli_dbgmsg("vba_readdir_new: MODULENAMEUNICODE record stretches past the end of the file\n");
ret = CL_EREAD;
goto done;
}
2020-07-24 08:32:47 -07:00
if ((id = le16_to_host(*(uint16_t *)&data[data_offset])) != 0x0047) {
cli_dbgmsg("cli_vba_readdir_new: Expected MODULENAMEUNICODE (0x47) record, but got 0x%04x\n", id);
ret = CL_EREAD;
goto done;
}
data_offset += sizeof(uint16_t);
CLI_WRITEN("\nREM MODULENAMEUNICODE: ", 24);
2020-07-24 08:32:47 -07:00
size = le32_to_host(*(uint32_t *)&data[data_offset]);
data_offset += sizeof(uint32_t);
if (size > data_len - data_offset) {
cli_dbgmsg("vba_readdir_new: MODULENAMEUNICODE stretches past the end of the file\n");
ret = CL_EREAD;
goto done;
}
if (size > 0) {
2020-07-24 08:32:47 -07:00
if (CL_SUCCESS == cli_codepage_to_utf8((char *)&data[data_offset], size, CODEPAGE_UTF16_LE, &utf16_name, &utf16_name_size)) {
CLI_WRITEN(utf16_name, utf16_name_size);
} else {
cli_dbgmsg("cli_vba_readdir_new: failed to convert UTF16LE to UTF-8\n");
CLI_WRITEN("<error decoding string>", 23);
}
}
data_offset += size;
if (mbcs_name && utf16_name &&
(mbcs_name_size != utf16_name_size ||
2020-07-24 08:32:47 -07:00
memcmp(mbcs_name, utf16_name, mbcs_name_size) != 0)) {
CLI_WRITEN("\nREM WARNING: MODULENAME and MODULENAMEUNICODE differ", 53);
}
if (mbcs_name) {
free(mbcs_name);
mbcs_name = NULL;
}
if (utf16_name) {
free(utf16_name);
utf16_name = NULL;
}
// MS-OVBA 2.3.4.2.3.2.3 MODULESTREAMNAME
cli_dbgmsg("Reading MODULESTREAMNAME record\n");
if (sizeof(uint16_t) + sizeof(uint32_t) > data_len - data_offset) {
cli_dbgmsg("vba_readdir_new: MODULESTREAMNAME record stretches past the end of the file\n");
ret = CL_EREAD;
goto done;
}
2020-07-24 08:32:47 -07:00
if ((id = le16_to_host(*(uint16_t *)&data[data_offset])) != 0x001a) {
cli_dbgmsg("cli_vba_readdir_new: Expected MODULESTREAMNAME (0x1a) record, but got 0x%04x\n", id);
ret = CL_EREAD;
goto done;
}
data_offset += sizeof(uint16_t);
CLI_WRITEN("\nREM MODULESTREAMNAME: ", 23);
2020-07-24 08:32:47 -07:00
size = le32_to_host(*(uint32_t *)&data[data_offset]);
data_offset += sizeof(uint32_t);
if (size > data_len - data_offset) {
cli_dbgmsg("vba_readdir_new: MODULESTREAMNAME stretches past the end of the file\n");
ret = CL_EREAD;
goto done;
}
if (size > 0) {
2020-07-24 08:32:47 -07:00
if (CL_SUCCESS == cli_codepage_to_utf8((char *)&data[data_offset], size, codepage, &mbcs_name, &mbcs_name_size)) {
CLI_WRITEN(mbcs_name, mbcs_name_size);
2020-07-24 08:32:47 -07:00
} else {
cli_dbgmsg("cli_vba_readdir_new: failed to convert codepage %" PRIu16 " to UTF-8\n", codepage);
CLI_WRITEN("<error decoding string>", 23);
}
}
data_offset += size;
cli_dbgmsg("Reading MODULESTREAMNAMEUNICODE record\n");
if (sizeof(uint16_t) + sizeof(uint32_t) > data_len - data_offset) {
cli_dbgmsg("vba_readdir_new: MODULESTREAMNAMEUNICODE record stretches past the end of the file\n");
ret = CL_EREAD;
goto done;
}
2020-07-24 08:32:47 -07:00
if ((id = le16_to_host(*(uint16_t *)&data[data_offset])) != 0x0032) {
cli_dbgmsg("cli_vba_readdir_new: Expected MODULESTREAMNAMEUNICODE (0x32) record, but got 0x%04x\n", id);
ret = CL_EREAD;
goto done;
}
data_offset += sizeof(uint16_t);
CLI_WRITEN("\nREM MODULESTREAMNAMEUNICODE: ", 30);
2020-07-24 08:32:47 -07:00
uint32_t module_stream_name_size = le32_to_host(*(uint32_t *)&data[data_offset]);
data_offset += sizeof(uint32_t);
if (module_stream_name_size > data_len - data_offset) {
cli_dbgmsg("vba_readdir_new: MODULESTREAMNAMEUNICODE stretches past the end of the file\n");
ret = CL_EREAD;
goto done;
}
2020-07-24 08:32:47 -07:00
const unsigned char *module_stream_name = &data[data_offset];
if (module_stream_name_size > 0) {
2020-07-24 08:32:47 -07:00
if (CL_SUCCESS == cli_codepage_to_utf8((char *)&data[data_offset], module_stream_name_size, CODEPAGE_UTF16_LE, &utf16_name, &utf16_name_size)) {
CLI_WRITEN(utf16_name, utf16_name_size);
} else {
cli_dbgmsg("cli_vba_readdir_new: failed to convert UTF16LE to UTF-8\n");
CLI_WRITEN("<error decoding string>", 23);
}
}
data_offset += module_stream_name_size;
if (mbcs_name && utf16_name &&
(mbcs_name_size != utf16_name_size ||
2020-07-24 08:32:47 -07:00
memcmp(mbcs_name, utf16_name, mbcs_name_size) != 0)) {
CLI_WRITEN("\nREM WARNING: MODULESTREAMNAME and MODULESTREAMNAMEUNICODE differ", 65);
}
if (mbcs_name) {
free(mbcs_name);
mbcs_name = NULL;
}
if (utf16_name) {
free(utf16_name);
utf16_name = NULL;
}
// MS-OVBA 2.3.4.2.3.2.4 MODULEDOCSTRING
cli_dbgmsg("Reading MODULEDOCSTRING record\n");
if (sizeof(uint16_t) + sizeof(uint32_t) > data_len - data_offset) {
cli_dbgmsg("vba_readdir_new: MODULEDOCSTRING record stretches past the end of the file\n");
ret = CL_EREAD;
goto done;
}
2020-07-24 08:32:47 -07:00
if ((id = le16_to_host(*(uint16_t *)&data[data_offset])) != 0x001c) {
cli_dbgmsg("cli_vba_readdir_new: Expected MODULEDOCSTRING (0x1c) record, but got 0x%04x\n", id);
ret = CL_EREAD;
goto done;
}
data_offset += sizeof(uint16_t);
CLI_WRITEN("\nREM MODULEDOCSTRING: ", 22);
2020-07-24 08:32:47 -07:00
size = le32_to_host(*(uint32_t *)&data[data_offset]);
data_offset += sizeof(uint32_t);
if (size > data_len - data_offset) {
cli_dbgmsg("vba_readdir_new: MODULEDOCSTRING stretches past the end of the file\n");
ret = CL_EREAD;
goto done;
}
if (size > 0) {
2020-07-24 08:32:47 -07:00
if (CL_SUCCESS == cli_codepage_to_utf8((char *)&data[data_offset], size, codepage, &mbcs_name, &mbcs_name_size)) {
CLI_WRITEN(mbcs_name, mbcs_name_size);
2020-07-24 08:32:47 -07:00
} else {
cli_dbgmsg("cli_vba_readdir_new: failed to convert codepage %" PRIu16 " to UTF-8\n", codepage);
CLI_WRITEN("<error decoding string>", 23);
}
}
data_offset += size;
cli_dbgmsg("Reading MODULEDOCSTRINGUNICODE record\n");
if (sizeof(uint16_t) + sizeof(uint32_t) > data_len - data_offset) {
cli_dbgmsg("vba_readdir_new: MODULEDOCSTRINGUNICODE record stretches past the end of the file\n");
ret = CL_EREAD;
goto done;
}
2020-07-24 08:32:47 -07:00
if ((id = le16_to_host(*(uint16_t *)&data[data_offset])) != 0x0048) {
cli_dbgmsg("cli_vba_readdir_new: Expected MODULEDOCSTRINGUNICODE (0x32) record, but got 0x%04x\n", id);
ret = CL_EREAD;
goto done;
}
data_offset += sizeof(uint16_t);
CLI_WRITEN("\nREM MODULEDOCSTRINGUNICODE: ", 29);
2020-07-24 08:32:47 -07:00
size = le32_to_host(*(uint32_t *)&data[data_offset]);
data_offset += sizeof(uint32_t);
if (size > data_len - data_offset) {
cli_dbgmsg("vba_readdir_new: MODULEDOCSTRINGUNICODE stretches past the end of the file\n");
ret = CL_EREAD;
goto done;
}
if (size > 0) {
2020-07-24 08:32:47 -07:00
if (CL_SUCCESS == cli_codepage_to_utf8((char *)&data[data_offset], size, CODEPAGE_UTF16_LE, &utf16_name, &utf16_name_size)) {
CLI_WRITEN(utf16_name, utf16_name_size);
} else {
cli_dbgmsg("cli_vba_readdir_new: failed to convert UTF16LE to UTF-8\n");
CLI_WRITEN("<error decoding string>", 23);
}
}
data_offset += size;
if (mbcs_name && utf16_name &&
(mbcs_name_size != utf16_name_size ||
2020-07-24 08:32:47 -07:00
memcmp(mbcs_name, utf16_name, mbcs_name_size) != 0)) {
CLI_WRITEN("\nREM WARNING: MODULEDOCSTRING and MODULEDOCSTRINGUNICODE differ", 63);
}
if (mbcs_name) {
free(mbcs_name);
mbcs_name = NULL;
}
if (utf16_name) {
free(utf16_name);
utf16_name = NULL;
}
// MS-OVBA 2.3.4.2.3.2.5 MODULEOFFSET
cli_dbgmsg("Reading MODULEOFFSET record\n");
if (sizeof(uint16_t) + sizeof(uint32_t) > data_len - data_offset) {
cli_dbgmsg("vba_readdir_new: MODULEOFFSET record stretches past the end of the file\n");
ret = CL_EREAD;
goto done;
}
2020-07-24 08:32:47 -07:00
if ((id = le16_to_host(*(uint16_t *)&data[data_offset])) != 0x0031) {
cli_dbgmsg("cli_vba_readdir_new: Expected MODULEOFFSET (0x31) record, but got 0x%04x\n", id);
ret = CL_EREAD;
goto done;
}
data_offset += sizeof(uint16_t);
2020-07-24 08:32:47 -07:00
size = le32_to_host(*(uint32_t *)&data[data_offset]);
data_offset += sizeof(uint32_t);
if (size != sizeof(uint32_t)) {
cli_dbgmsg("cli_vba_readdir_new: Expected MODULEOFFSET record size");
ret = CL_EREAD;
goto done;
}
if (size > data_len - data_offset) {
cli_dbgmsg("vba_readdir_new: MODULEOFFSET stretches past the end of the file\n");
ret = CL_EREAD;
goto done;
}
2020-07-24 08:32:47 -07:00
uint32_t module_offset = le32_to_host(*(uint32_t *)&data[data_offset]);
data_offset += size;
char buffer[64];
int buffer_size = snprintf(buffer, sizeof(buffer), "\nREM MODULEOFFSET: 0x%08x", module_offset);
if (buffer_size > 0) {
2020-07-24 08:32:47 -07:00
CLI_WRITEN(buffer, (size_t)buffer_size);
}
// MS-OVBA 2.3.4.2.3.2.6 MODULEHELPCONTEXT
cli_dbgmsg("Reading MODULEHELPCONTEXT record\n");
if (sizeof(uint16_t) + sizeof(uint32_t) > data_len - data_offset) {
cli_dbgmsg("vba_readdir_new: MODULEHELPCONTEXT record stretches past the end of the file\n");
ret = CL_EREAD;
goto done;
}
2020-07-24 08:32:47 -07:00
if ((id = le16_to_host(*(uint16_t *)&data[data_offset])) != 0x001e) {
cli_dbgmsg("cli_vba_readdir_new: Expected MODULEHELPCONTEXT (0x1e) record, but got 0x%04x\n", id);
ret = CL_EREAD;
goto done;
}
data_offset += sizeof(uint16_t);
2020-07-24 08:32:47 -07:00
size = le32_to_host(*(uint32_t *)&data[data_offset]);
data_offset += sizeof(uint32_t);
if (size != sizeof(uint32_t)) {
cli_dbgmsg("cli_vba_readdir_new: Expected MODULEHELPCONTEXT record size");
ret = CL_EREAD;
goto done;
}
if (size > data_len - data_offset) {
cli_dbgmsg("vba_readdir_new: MODULEHELPCONTEXT stretches past the end of the file\n");
ret = CL_EREAD;
goto done;
}
2020-07-24 08:32:47 -07:00
uint32_t help_context = le32_to_host(*(uint32_t *)&data[data_offset]);
data_offset += size;
buffer_size = snprintf(buffer, sizeof(buffer), "\nREM MODULEHELPCONTEXT: 0x%08x", help_context);
if (buffer_size > 0) {
2020-07-24 08:32:47 -07:00
CLI_WRITEN(buffer, (size_t)buffer_size);
}
// MS-OVBA 2.3.4.2.3.2.7 MODULECOOKIE
cli_dbgmsg("Reading MODULECOOKIE record\n");
if (sizeof(uint16_t) + sizeof(uint32_t) > data_len - data_offset) {
cli_dbgmsg("vba_readdir_new: MODULECOOKIE record stretches past the end of the file\n");
ret = CL_EREAD;
goto done;
}
2020-07-24 08:32:47 -07:00
if ((id = le16_to_host(*(uint16_t *)&data[data_offset])) != 0x002c) {
cli_dbgmsg("cli_vba_readdir_new: Expected MODULECOOKIE (0x2c) record, but got 0x%04x\n", id);
ret = CL_EREAD;
goto done;
}
data_offset += sizeof(uint16_t);
2020-07-24 08:32:47 -07:00
size = le32_to_host(*(uint32_t *)&data[data_offset]);
data_offset += sizeof(uint32_t);
if (size != sizeof(uint16_t)) {
cli_dbgmsg("cli_vba_readdir_new: Expected MODULECOOKIE record size");
ret = CL_EREAD;
goto done;
}
if (size > data_len - data_offset) {
cli_dbgmsg("vba_readdir_new: MODULECOOKIE record's cookie stretches past the end of the file\n");
ret = CL_EREAD;
goto done;
}
uint16_t cookie = le16_to_host(*(uint16_t *)&data[data_offset]);
data_offset += size;
buffer_size = snprintf(buffer, sizeof(buffer), "\nREM MODULECOOKIE: 0x%04x", cookie);
if (buffer_size > 0) {
2020-07-24 08:32:47 -07:00
CLI_WRITEN(buffer, (size_t)buffer_size);
}
// MS-OVBA 2.3.4.2.3.2.8 MODULETYPE
if (sizeof(uint16_t) + sizeof(uint32_t) > data_len - data_offset) {
cli_dbgmsg("vba_readdir_new: MODULETYPE record stretches past the end of the file\n");
ret = CL_EREAD;
goto done;
}
2020-07-24 08:32:47 -07:00
id = le16_to_host(*(uint16_t *)&data[data_offset]);
if (id != 0x0021 && id != 0x0022) {
cli_dbgmsg("cli_vba_readdir_new: Expected MODULETYPE (0x21/0x22) record, but got 0x%04x\n", id);
ret = CL_EREAD;
goto done;
}
data_offset += sizeof(uint16_t);
2020-07-24 08:32:47 -07:00
size = le32_to_host(*(uint32_t *)&data[data_offset]);
data_offset += sizeof(uint32_t);
if (size != 0) {
cli_dbgmsg("cli_vba_readdir_new: Expected MODULETYPE record size");
ret = CL_EREAD;
goto done;
}
if (id == 0x21) {
CLI_WRITEN("\nREM MODULETYPE: Procedural", 27);
2020-07-24 08:32:47 -07:00
} else {
CLI_WRITEN("\nREM MODULETYPE: Class", 22);
}
// MS-OVBA 2.3.4.2.3.2.9 MODULEREADONLY
if (sizeof(uint16_t) > data_len - data_offset) {
cli_dbgmsg("vba_readdir_new: MODULEREADONLY record id field stretches past the end of the file\n");
ret = CL_EREAD;
goto done;
}
2020-07-24 08:32:47 -07:00
id = le16_to_host(*(uint16_t *)&data[data_offset]);
data_offset += sizeof(uint16_t);
if (id == 0x0025) {
if (sizeof(uint32_t) > data_len - data_offset) {
cli_dbgmsg("vba_readdir_new: MODULEREADONLY record size field stretches past the end of the file\n");
ret = CL_EREAD;
goto done;
}
size = le32_to_host(*(uint32_t *)&data[data_offset]);
data_offset += sizeof(uint32_t);
if (size != 0) {
cli_dbgmsg("cli_vba_readdir_new: Expected MODULEREADONLY record size");
ret = CL_EREAD;
goto done;
}
CLI_WRITEN("\nREM MODULEREADONLY", 19);
if (sizeof(uint16_t) > data_len - data_offset) {
cli_dbgmsg("vba_readdir_new: record id field after MODULEREADONLY stretches past the end of the file\n");
ret = CL_EREAD;
goto done;
}
2020-07-24 08:32:47 -07:00
id = le16_to_host(*(uint16_t *)&data[data_offset]);
data_offset += sizeof(uint16_t);
}
// MS-OVBA 2.3.4.2.3.2.10 MODULEPRIVATE
if (id == 0x0028) {
if (sizeof(uint32_t) > data_len - data_offset) {
cli_dbgmsg("vba_readdir_new: MODULEPRIVATE record size field stretches past the end of the file\n");
ret = CL_EREAD;
goto done;
}
2020-07-24 08:32:47 -07:00
size = le32_to_host(*(uint32_t *)&data[data_offset]);
data_offset += sizeof(uint32_t);
if (size != 0) {
cli_dbgmsg("cli_vba_readdir_new: Expected MODULEPRIVATE record size");
ret = CL_EREAD;
goto done;
}
CLI_WRITEN("\nREM MODULEPRIVATE", 18);
if (sizeof(uint16_t) > data_len - data_offset) {
cli_dbgmsg("vba_readdir_new: record id field after MODULEPRIVATE stretches past the end of the file\n");
ret = CL_EREAD;
goto done;
}
2020-07-24 08:32:47 -07:00
id = le16_to_host(*(uint16_t *)&data[data_offset]);
data_offset += sizeof(uint16_t);
}
// Terminator
if (id != 0x002b) {
cli_dbgmsg("cli_vba_readdir_new: Expected MODULETERMINATOR ....");
ret = CL_EREAD;
goto done;
}
if (sizeof(uint32_t) > data_len - data_offset) {
cli_dbgmsg("vba_readdir_new: MODULETERMINATOR record size field stretches past the end of the file\n");
ret = CL_EREAD;
goto done;
}
size = le32_to_host(*(uint32_t *)&data[data_offset]);
data_offset += sizeof(uint32_t);
if (size != 0) {
cli_dbgmsg("cli_vba_readdir_new: Expected MODULETERMINATOR record size");
ret = CL_EREAD;
goto done;
}
CLI_WRITEN("\nREM ##################################################\n", 56);
2020-07-24 08:32:47 -07:00
stream_name = cli_ole2_get_property_name2((const char *)module_stream_name, (int)(module_stream_name_size + 2));
char *module_hash;
uint32_t module_hashcnt;
if (stream_name == NULL) {
ret = CL_EMEM;
goto done;
}
2020-07-24 08:32:47 -07:00
if (uniq_get(U, stream_name, (uint32_t)strlen(stream_name), &module_hash, &module_hashcnt) != CL_SUCCESS) {
cli_dbgmsg("cli_vba_readdir_new: Cannot find module stream %s\n", stream_name);
ret = CL_EOPEN;
goto done;
}
int module_stream_found = 0;
for (i = 1; i <= module_hashcnt; ++i) {
char module_filename[PATH_MAX];
snprintf(module_filename, sizeof(module_filename), "%s" PATHSEP "%s_%u", dir, module_hash, i);
module_filename[sizeof(module_filename) - 1] = '\0';
int module_fd = open(module_filename, O_RDONLY | O_BINARY);
if (module_fd == -1) {
continue;
}
module_data = cli_vba_inflate(module_fd, module_offset, &module_data_size);
if (!module_data) {
cli_dbgmsg("cli_vba_readdir_new: Failed to extract module data\n");
close(module_fd);
continue;
}
close(module_fd);
2020-07-24 08:32:47 -07:00
if (CL_SUCCESS == cli_codepage_to_utf8((char *)module_data, module_data_size, codepage, (char **)&module_data_utf8, &module_data_utf8_size)) {
module_data_utf8_size = vba_normalize(module_data_utf8, module_data_utf8_size);
CLI_WRITEN(module_data_utf8, module_data_utf8_size);
if (NULL != ctx->engine->cb_vba) {
ctx->engine->cb_vba(module_data_utf8, module_data_utf8_size, ctx->cb_ctx);
}
module_stream_found = 1;
free(module_data_utf8);
module_data_utf8 = NULL;
2020-07-24 08:32:47 -07:00
} else {
/*If normalization didn't work, fall back to the pre-normalized data.*/
if (NULL != ctx->engine->cb_vba) {
ctx->engine->cb_vba(module_data, module_data_size, ctx->cb_ctx);
}
CLI_WRITEN("\n<Error decoding module data>\n", 30);
cli_dbgmsg("cli_vba_readdir_new: Failed to decode VBA module content from codepage %" PRIu16 " to UTF8\n", codepage);
}
free(module_data);
module_data = NULL;
break;
}
if (!module_stream_found) {
cli_dbgmsg("cli_vba_readdir_new: Cannot find module stream %s\n", stream_name);
}
2020-07-24 08:32:47 -07:00
free((void *)stream_name);
stream_name = NULL;
break;
}
case 0x0010: { // Terminator
ret = CL_SUCCESS;
goto done;
}
default: {
data_offset += size;
}
}
}
#undef CLI_WRITEN
#undef CLI_WRITENHEX
#undef CLI_WRITEN_MBCS
#undef CLI_WRITEN_UTF16LE
done:
if (fd >= 0) {
close(fd);
}
if (data) {
2020-07-24 08:32:47 -07:00
free((void *)data);
}
if (stream_name) {
2020-07-24 08:32:47 -07:00
free((void *)stream_name);
}
if (tempfile) {
free(tempfile);
}
if (ret != CL_SUCCESS && *tempfd >= 0) {
close(*tempfd);
*tempfd = -1;
}
if (utf16_name) {
free(utf16_name);
utf16_name = NULL;
}
if (mbcs_name) {
free(mbcs_name);
mbcs_name = NULL;
}
2020-07-24 08:32:47 -07:00
if (module_data) {
free(module_data);
module_data = NULL;
}
2020-07-24 08:32:47 -07:00
if (module_data_utf8) {
free(module_data_utf8);
module_data_utf8 = NULL;
}
return ret;
}
2007-12-18 14:32:47 +00:00
vba_project_t *
cli_vba_readdir(const char *dir, struct uniq *U, uint32_t which)
2004-01-23 11:17:16 +00:00
{
unsigned char *buf;
const unsigned char vba56_signature[] = {0xcc, 0x61};
uint16_t record_count, buflen, ffff, byte_count;
uint32_t offset;
int i, j, fd, big_endian = FALSE;
vba_project_t *vba_project;
struct vba56_header v56h;
off_t seekback;
char fullname[1024], *hash;
uint32_t hashcnt = 0;
cli_dbgmsg("in cli_vba_readdir()\n");
if (dir == NULL)
return NULL;
/*
* _VBA_PROJECT files are embedded within office documents (OLE2)
*/
if (CL_SUCCESS != uniq_get(U, "_vba_project", 12, &hash, &hashcnt)) {
cli_dbgmsg("vba_readdir: uniq_get('_vba_project') failed. Unable to check # of embedded vba proj files\n");
return NULL;
}
if (hashcnt == 0) {
return NULL;
}
snprintf(fullname, sizeof(fullname), "%s" PATHSEP "%s_%u", dir, hash, which);
fullname[sizeof(fullname) - 1] = '\0';
fd = open(fullname, O_RDONLY | O_BINARY);
if (fd == -1)
return NULL;
if (cli_readn(fd, &v56h, sizeof(struct vba56_header)) != sizeof(struct vba56_header)) {
close(fd);
return NULL;
}
if (memcmp(v56h.magic, vba56_signature, sizeof(v56h.magic)) != 0) {
close(fd);
return NULL;
}
i = vba_read_project_strings(fd, TRUE);
if ((seekback = lseek(fd, 0, SEEK_CUR)) == -1) {
cli_dbgmsg("vba_readdir: lseek() failed. Unable to guess VBA type\n");
close(fd);
return NULL;
}
if (lseek(fd, sizeof(struct vba56_header), SEEK_SET) == -1) {
cli_dbgmsg("vba_readdir: lseek() failed. Unable to guess VBA type\n");
close(fd);
return NULL;
}
j = vba_read_project_strings(fd, FALSE);
if (!i && !j) {
close(fd);
cli_dbgmsg("vba_readdir: Unable to guess VBA type\n");
return NULL;
}
if (i > j) {
big_endian = TRUE;
if (lseek(fd, seekback, SEEK_SET) == -1) {
cli_dbgmsg("vba_readdir: call to lseek() while guessing big-endian has failed\n");
close(fd);
return NULL;
}
cli_dbgmsg("vba_readdir: Guessing big-endian\n");
} else {
cli_dbgmsg("vba_readdir: Guessing little-endian\n");
}
/* junk some more stuff */
do
if (cli_readn(fd, &ffff, 2) != 2) {
close(fd);
return NULL;
}
while (ffff != 0xFFFF);
/* check for alignment error */
if (!seekandread(fd, -3, SEEK_CUR, &ffff, sizeof(uint16_t))) {
close(fd);
return NULL;
}
if (ffff != 0xFFFF) {
if (lseek(fd, 1, SEEK_CUR) == -1) {
2013-02-28 21:01:40 -05:00
cli_dbgmsg("call to lseek() while checking alignment error has failed\n");
close(fd);
return NULL;
}
}
if (!read_uint16(fd, &ffff, big_endian)) {
close(fd);
return NULL;
}
2004-01-23 11:17:16 +00:00
if (ffff != 0xFFFF) {
if (lseek(fd, ffff, SEEK_CUR) == -1) {
2013-02-28 21:01:40 -05:00
cli_dbgmsg("call to lseek() while checking alignment error has failed\n");
close(fd);
return NULL;
}
}
if (!read_uint16(fd, &ffff, big_endian)) {
close(fd);
return NULL;
}
2004-01-23 11:17:16 +00:00
if (ffff == 0xFFFF)
ffff = 0;
if (lseek(fd, ffff + 100, SEEK_CUR) == -1) {
2013-02-28 21:01:40 -05:00
cli_dbgmsg("call to lseek() failed\n");
close(fd);
return NULL;
}
if (!read_uint16(fd, &record_count, big_endian)) {
close(fd);
return NULL;
}
cli_dbgmsg("vba_readdir: VBA Record count %d\n", record_count);
if (record_count == 0) {
/* No macros, assume clean */
close(fd);
return NULL;
}
if (record_count > MAX_VBA_COUNT) {
/* Almost certainly an error */
cli_dbgmsg("vba_readdir: VBA Record count too big\n");
close(fd);
return NULL;
}
vba_project = create_vba_project(record_count, dir, U);
if (vba_project == NULL) {
close(fd);
return NULL;
}
buf = NULL;
buflen = 0;
for (i = 0; i < record_count; i++) {
uint16_t length;
char *ptr;
vba_project->colls[i] = 0;
if (!read_uint16(fd, &length, big_endian))
break;
if (length == 0) {
cli_dbgmsg("vba_readdir: zero name length\n");
break;
}
if (length > buflen) {
unsigned char *newbuf = (unsigned char *)cli_realloc(buf, length);
if (newbuf == NULL)
break;
buflen = length;
buf = newbuf;
}
if (cli_readn(fd, buf, (size_t)length) != (size_t)length) {
cli_dbgmsg("vba_readdir: read name failed\n");
break;
}
ptr = get_unicode_name((const char *)buf, length, big_endian);
if (ptr == NULL) break;
if (CL_SUCCESS != uniq_get(U, ptr, strlen(ptr), &hash, &hashcnt)) {
cli_dbgmsg("vba_readdir: uniq_get('%s') failed.\n", ptr);
free(ptr);
break;
}
vba_project->colls[i] = hashcnt;
if (0 == vba_project->colls[i]) {
cli_dbgmsg("vba_readdir: cannot find project %s (%s)\n", ptr, hash);
free(ptr);
break;
}
cli_dbgmsg("vba_readdir: project name: %s (%s)\n", ptr, hash);
free(ptr);
vba_project->name[i] = hash;
if (!read_uint16(fd, &length, big_endian))
break;
lseek(fd, length, SEEK_CUR);
if (!read_uint16(fd, &ffff, big_endian))
break;
if (ffff == 0xFFFF) {
lseek(fd, 2, SEEK_CUR);
if (!read_uint16(fd, &ffff, big_endian))
break;
lseek(fd, ffff + 8, SEEK_CUR);
} else
lseek(fd, ffff + 10, SEEK_CUR);
if (!read_uint16(fd, &byte_count, big_endian))
break;
lseek(fd, (8 * byte_count) + 5, SEEK_CUR);
if (!read_uint32(fd, &offset, big_endian))
break;
cli_dbgmsg("vba_readdir: offset: %u\n", (unsigned int)offset);
vba_project->offset[i] = offset;
lseek(fd, 2, SEEK_CUR);
}
if (buf)
free(buf);
close(fd);
if (i < record_count) {
free(vba_project->name);
free(vba_project->colls);
free(vba_project->dir);
free(vba_project->offset);
free(vba_project);
return NULL;
}
return vba_project;
2004-01-23 11:17:16 +00:00
}
2007-12-17 20:22:11 +00:00
unsigned char *
cli_vba_inflate(int fd, off_t offset, size_t *size)
2004-01-23 11:17:16 +00:00
{
unsigned int pos, shift, mask, distance, clean;
uint8_t flag;
uint16_t token;
blob *b;
unsigned char buffer[VBA_COMPRESSION_WINDOW];
if (fd < 0)
return NULL;
b = blobCreate();
if (b == NULL)
return NULL;
memset(buffer, 0, sizeof(buffer));
lseek(fd, offset + 3, SEEK_SET); /* 1byte ?? , 2byte length ?? */
clean = TRUE;
pos = 0;
while (cli_readn(fd, &flag, 1) == 1) {
for (mask = 1; mask < 0x100; mask <<= 1) {
unsigned int winpos = pos % VBA_COMPRESSION_WINDOW;
if (flag & mask) {
uint16_t len;
unsigned int srcpos;
if (!read_uint16(fd, &token, FALSE)) {
blobDestroy(b);
if (size)
*size = 0;
return NULL;
}
shift = 12 - (winpos > 0x10) - (winpos > 0x20) - (winpos > 0x40) - (winpos > 0x80) - (winpos > 0x100) - (winpos > 0x200) - (winpos > 0x400) - (winpos > 0x800);
len = (uint16_t)((token & ((1 << shift) - 1)) + 3);
distance = token >> shift;
srcpos = pos - distance - 1;
if ((((srcpos + len) % VBA_COMPRESSION_WINDOW) < winpos) &&
((winpos + len) < VBA_COMPRESSION_WINDOW) &&
(((srcpos % VBA_COMPRESSION_WINDOW) + len) < VBA_COMPRESSION_WINDOW) &&
(len <= VBA_COMPRESSION_WINDOW)) {
srcpos %= VBA_COMPRESSION_WINDOW;
memcpy(&buffer[winpos], &buffer[srcpos],
len);
pos += len;
} else
while (len-- > 0) {
srcpos = (pos - distance - 1) % VBA_COMPRESSION_WINDOW;
buffer[pos++ % VBA_COMPRESSION_WINDOW] = buffer[srcpos];
}
} else {
if ((pos != 0) && (winpos == 0) && clean) {
if (cli_readn(fd, &token, 2) != 2) {
blobDestroy(b);
if (size)
*size = 0;
return NULL;
}
(void)blobAddData(b, buffer, VBA_COMPRESSION_WINDOW);
clean = FALSE;
break;
}
if (cli_readn(fd, &buffer[winpos], 1) == 1)
pos++;
}
clean = TRUE;
}
}
if (blobAddData(b, buffer, pos % VBA_COMPRESSION_WINDOW) < 0) {
blobDestroy(b);
if (size)
*size = 0;
return NULL;
}
if (size)
*size = blobGetDataSize(b);
return (unsigned char *)blobToMem(b);
2004-01-23 11:17:16 +00:00
}
/*
* See also cli_filecopy()
*/
static void
ole_copy_file_data(int s, int d, uint32_t len)
{
unsigned char data[FILEBUFF];
while (len > 0) {
size_t todo = MIN(sizeof(data), len);
if (cli_readn(s, data, todo) != todo)
break;
if (cli_writen(d, data, todo) != todo)
break;
if (todo > len) {
break;
} else {
len -= todo;
}
}
}
int cli_scan_ole10(int fd, cli_ctx *ctx)
{
int ofd;
cl_error_t ret;
uint32_t object_size;
STATBUF statbuf;
char *fullname;
if (fd < 0)
return CL_CLEAN;
lseek(fd, 0, SEEK_SET);
if (!read_uint32(fd, &object_size, FALSE))
return CL_CLEAN;
if (FSTAT(fd, &statbuf) == -1)
return CL_ESTAT;
if ((statbuf.st_size - object_size) >= 4) {
/* Probably the OLE type id */
if (lseek(fd, 2, SEEK_CUR) == -1) {
return CL_CLEAN;
}
/* Attachment name */
if (!skip_past_nul(fd))
return CL_CLEAN;
/* Attachment full path */
if (!skip_past_nul(fd))
return CL_CLEAN;
/* ??? */
if (lseek(fd, 8, SEEK_CUR) == -1)
return CL_CLEAN;
/* Attachment full path */
if (!skip_past_nul(fd))
return CL_CLEAN;
if (!read_uint32(fd, &object_size, FALSE))
return CL_CLEAN;
}
Record names of extracted files A way is needed to record scanned file names for two purposes: 1. File names (and extensions) must be stored in the json metadata properties recorded when using the --gen-json clamscan option. Future work may use this to compare file extensions with detected file types. 2. File names are useful when interpretting tmp directory output when using the --leave-temps option. This commit enables file name retention for later use by storing file names in the fmap header structure, if a file name exists. To store the names in fmaps, an optional name argument has been added to any internal scan API's that create fmaps and every call to these APIs has been modified to pass a file name or NULL if a file name is not required. The zip and gpt parsers required some modification to record file names. The NSIS and XAR parsers fail to collect file names at all and will require future work to support file name extraction. Also: - Added recursive extraction to the tmp directory when the --leave-temps option is enabled. When not enabled, the tmp directory structure remains flat so as to prevent the likelihood of exceeding MAX_PATH. The current tmp directory is stored in the scan context. - Made the cli_scanfile() internal API non-static and added it to scanners.h so it would be accessible outside of scanners.c in order to remove code duplication within libmspack.c. - Added function comments to scanners.h and matcher.h - Converted a TDB-type macros and LSIG-type macros to enums for improved type safey. - Converted more return status variables from `int` to `cl_error_t` for improved type safety, and corrected ooxml file typing functions so they use `cli_file_t` exclusively rather than mixing types with `cl_error_t`. - Restructured the magic_scandesc() function to use goto's for error handling and removed the early_ret_from_magicscan() macro and magic_scandesc_cleanup() function. This makes the code easier to read and made it easier to add the recursive tmp directory cleanup to magic_scandesc(). - Corrected zip, egg, rar filename extraction issues. - Removed use of extra sub-directory layer for zip, egg, and rar file extraction. For Zip, this also involved changing the extracted filenames to be randomly generated rather than using the "zip.###" file name scheme.
2020-03-19 21:23:54 -04:00
if (!(fullname = cli_gentemp(ctx ? ctx->sub_tmpdir : NULL))) {
return CL_EMEM;
}
ofd = open(fullname, O_RDWR | O_CREAT | O_TRUNC | O_BINARY | O_EXCL,
S_IWUSR | S_IRUSR);
if (ofd < 0) {
cli_warnmsg("cli_decode_ole_object: can't create %s\n", fullname);
free(fullname);
return CL_ECREAT;
}
cli_dbgmsg("cli_decode_ole_object: decoding to %s\n", fullname);
ole_copy_file_data(fd, ofd, object_size);
lseek(ofd, 0, SEEK_SET);
ret = cli_magic_scan_desc(ofd, fullname, ctx, NULL, LAYER_ATTRIBUTES_NONE);
close(ofd);
if (ctx && !ctx->engine->keeptmp) {
if (cli_unlink(fullname)) {
cli_dbgmsg("cli_decode_ole_object: Failed to remove temp file: %s\n", fullname);
}
}
free(fullname);
return ret;
}
/*
* Powerpoint files
*/
typedef struct {
uint16_t type;
uint32_t length;
} atom_header_t;
static int
ppt_read_atom_header(int fd, atom_header_t *atom_header)
{
uint16_t v;
struct ppt_header {
uint16_t ver;
uint16_t type;
uint32_t length;
} h;
cli_dbgmsg("in ppt_read_atom_header\n");
if (cli_readn(fd, &h, sizeof(struct ppt_header)) != sizeof(struct ppt_header)) {
cli_dbgmsg("read ppt_header failed\n");
return FALSE;
}
v = vba_endian_convert_16(h.ver, FALSE);
cli_dbgmsg("\tversion: 0x%.2x\n", v & 0xF);
cli_dbgmsg("\tinstance: 0x%.2x\n", v >> 4);
atom_header->type = vba_endian_convert_16(h.type, FALSE);
cli_dbgmsg("\ttype: 0x%.4x\n", atom_header->type);
atom_header->length = vba_endian_convert_32(h.length, FALSE);
cli_dbgmsg("\tlength: 0x%.8x\n", (int)atom_header->length);
return TRUE;
}
/*
2007-12-21 11:17:25 +00:00
* TODO: combine shared code with flatedecode() or cli_unzip_single()
* Needs cli_unzip_single to have a "length" argument
*/
2007-12-21 11:17:25 +00:00
static int
ppt_unlzw(const char *dir, int fd, uint32_t length)
{
int ofd;
z_stream stream;
unsigned char inbuff[PPT_LZW_BUFFSIZE], outbuff[PPT_LZW_BUFFSIZE];
Improve tmp sub-directory names At present many parsers create tmp subdirectories to store extracted files. For parsers like the vba parser, this is required as the directory is later scanned. For other parsers, these subdirectories are probably not helpful now that we provide recursive sub-dirs when --leave-temps is enabled. It's not quite as simple as removing the extra subdirectories, however. Certain parsers, like autoit, don't create very unique filenames and would result in file name collisions when --leave-temps is not enabled. The best thing to do would be to make sure each parser uses unique filenames and doesn't rely on cli_magic_scan_dir() to scan extracted content before removing the extra subdirectory. In the meantime, this commit gives the extra subdirectories meaningful names to improve readability. This commit also: - Provides the 'bmp' prefix for extracted PE icons. - Removes empty tmp subdirs when extracting rtf files, to eliminate clutter. - The PDF parser sometimes creates tmp files when decompressing streams before it knows if there is actually any content to decompress. This resulted in a large number of empty files. While it would be best to avoid creating empty files in the first place, that's not quite as as it sounds. This commit does the next best thing and deletes the tmp files if nothing was actually extracted, even if --leave-temps is enabled. - Removes the "scantemp" prefix for unnamed fmaps scanned with cli_magic_scan(). The 5-character hashes given to tmp files with prefixes resulted in occasional file name collisions when extracting certain file types with thousands of embedded files. - The VBA and TAR parsers mistakenly used NAME_MAX instead of PATH_MAX, resulting in truncated file paths and failed extraction when --leave-temps is enabled and a lot of recursion is in play. This commit switches them from NAME_MAX to PATH_MAX.
2020-03-27 16:06:22 -04:00
char fullname[PATH_MAX + 1];
snprintf(fullname, sizeof(fullname) - 1, "%s" PATHSEP "ppt%.8lx.doc",
dir, (long)lseek(fd, 0L, SEEK_CUR));
ofd = open(fullname, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_EXCL,
S_IWUSR | S_IRUSR);
if (ofd == -1) {
cli_warnmsg("ppt_unlzw: can't create %s\n", fullname);
return FALSE;
}
stream.zalloc = Z_NULL;
stream.zfree = Z_NULL;
stream.opaque = (void *)NULL;
stream.next_in = (Bytef *)inbuff;
stream.next_out = outbuff;
stream.avail_out = sizeof(outbuff);
stream.avail_in = MIN(length, PPT_LZW_BUFFSIZE);
if (cli_readn(fd, inbuff, (size_t)stream.avail_in) != (size_t)stream.avail_in) {
close(ofd);
cli_unlink(fullname);
return FALSE;
}
length -= stream.avail_in;
if (inflateInit(&stream) != Z_OK) {
close(ofd);
cli_unlink(fullname);
cli_warnmsg("ppt_unlzw: inflateInit failed\n");
return FALSE;
}
do {
if (stream.avail_out == 0) {
if (cli_writen(ofd, outbuff, PPT_LZW_BUFFSIZE) != PPT_LZW_BUFFSIZE) {
close(ofd);
inflateEnd(&stream);
return FALSE;
}
stream.next_out = outbuff;
stream.avail_out = PPT_LZW_BUFFSIZE;
}
if (stream.avail_in == 0) {
stream.next_in = inbuff;
stream.avail_in = MIN(length, PPT_LZW_BUFFSIZE);
if (cli_readn(fd, inbuff, (size_t)stream.avail_in) != (size_t)stream.avail_in) {
close(ofd);
inflateEnd(&stream);
return FALSE;
}
length -= stream.avail_in;
}
} while (inflate(&stream, Z_NO_FLUSH) == Z_OK);
if (cli_writen(ofd, outbuff, PPT_LZW_BUFFSIZE - stream.avail_out) != (size_t)(PPT_LZW_BUFFSIZE - stream.avail_out)) {
close(ofd);
inflateEnd(&stream);
return FALSE;
}
close(ofd);
return inflateEnd(&stream) == Z_OK;
}
static const char *
ppt_stream_iter(int fd, const char *dir)
{
atom_header_t atom_header;
while (ppt_read_atom_header(fd, &atom_header)) {
if (atom_header.length == 0)
return NULL;
if (atom_header.type == 0x1011) {
uint32_t length;
/* Skip over ID */
if (lseek(fd, sizeof(uint32_t), SEEK_CUR) == -1) {
cli_dbgmsg("ppt_stream_iter: seek failed\n");
return NULL;
}
length = atom_header.length - 4;
cli_dbgmsg("length: %d\n", (int)length);
if (!ppt_unlzw(dir, fd, length)) {
cli_dbgmsg("ppt_unlzw failed\n");
return NULL;
}
} else {
off_t offset = lseek(fd, 0, SEEK_CUR);
/* Check we don't wrap */
if ((offset + (off_t)atom_header.length) < offset) {
break;
}
offset += atom_header.length;
if (lseek(fd, offset, SEEK_SET) != offset) {
break;
}
}
}
return dir;
}
char *
cli_ppt_vba_read(int ifd, cli_ctx *ctx)
{
char *dir;
const char *ret;
/* Create a directory to store the extracted OLE2 objects */
Improve tmp sub-directory names At present many parsers create tmp subdirectories to store extracted files. For parsers like the vba parser, this is required as the directory is later scanned. For other parsers, these subdirectories are probably not helpful now that we provide recursive sub-dirs when --leave-temps is enabled. It's not quite as simple as removing the extra subdirectories, however. Certain parsers, like autoit, don't create very unique filenames and would result in file name collisions when --leave-temps is not enabled. The best thing to do would be to make sure each parser uses unique filenames and doesn't rely on cli_magic_scan_dir() to scan extracted content before removing the extra subdirectory. In the meantime, this commit gives the extra subdirectories meaningful names to improve readability. This commit also: - Provides the 'bmp' prefix for extracted PE icons. - Removes empty tmp subdirs when extracting rtf files, to eliminate clutter. - The PDF parser sometimes creates tmp files when decompressing streams before it knows if there is actually any content to decompress. This resulted in a large number of empty files. While it would be best to avoid creating empty files in the first place, that's not quite as as it sounds. This commit does the next best thing and deletes the tmp files if nothing was actually extracted, even if --leave-temps is enabled. - Removes the "scantemp" prefix for unnamed fmaps scanned with cli_magic_scan(). The 5-character hashes given to tmp files with prefixes resulted in occasional file name collisions when extracting certain file types with thousands of embedded files. - The VBA and TAR parsers mistakenly used NAME_MAX instead of PATH_MAX, resulting in truncated file paths and failed extraction when --leave-temps is enabled and a lot of recursion is in play. This commit switches them from NAME_MAX to PATH_MAX.
2020-03-27 16:06:22 -04:00
dir = cli_gentemp_with_prefix(ctx ? ctx->sub_tmpdir : NULL, "ppt-ole2-tmp");
if (dir == NULL)
return NULL;
if (mkdir(dir, 0700)) {
cli_errmsg("cli_ppt_vba_read: Can't create temporary directory %s\n", dir);
free(dir);
return NULL;
}
ret = ppt_stream_iter(ifd, dir);
if (ret == NULL) {
cli_rmdirs(dir);
free(dir);
return NULL;
}
return dir;
}
/*
* Word 6 macros
*/
typedef struct {
unsigned char unused[12];
uint32_t macro_offset;
uint32_t macro_len;
} mso_fib_t;
typedef struct macro_entry_tag {
uint32_t len;
uint32_t offset;
unsigned char key;
} macro_entry_t;
typedef struct macro_info_tag {
struct macro_entry_tag *entries;
uint16_t count;
} macro_info_t;
static int
word_read_fib(int fd, mso_fib_t *fib)
{
struct {
uint32_t offset;
uint32_t len;
} macro_details;
if (!seekandread(fd, 0x118, SEEK_SET, &macro_details, sizeof(macro_details))) {
cli_dbgmsg("read word_fib failed\n");
return FALSE;
}
fib->macro_offset = vba_endian_convert_32(macro_details.offset, FALSE);
fib->macro_len = vba_endian_convert_32(macro_details.len, FALSE);
return TRUE;
}
static int
word_read_macro_entry(int fd, macro_info_t *macro_info)
{
size_t msize;
uint16_t count = macro_info->count;
macro_entry_t *macro_entry;
#ifdef HAVE_PRAGMA_PACK
#pragma pack(1)
#endif
#ifdef HAVE_PRAGMA_PACK_HPPA
#pragma pack 1
#endif
struct macro {
unsigned char version;
unsigned char key;
unsigned char ignore[10];
uint32_t len __attribute__((packed));
uint32_t state __attribute__((packed));
uint32_t offset __attribute__((packed));
} * m;
const struct macro *n;
#ifdef HAVE_PRAGMA_PACK
#pragma pack()
#endif
#ifdef HAVE_PRAGMA_PACK_HPPA
#pragma pack
#endif
if (count == 0)
return TRUE;
msize = count * sizeof(struct macro);
m = cli_malloc(msize);
if (m == NULL) {
cli_errmsg("word_read_macro_entry: Unable to allocate memory for 'm'\n");
return FALSE;
}
if (cli_readn(fd, m, msize) != msize) {
free(m);
cli_warnmsg("read %u macro_entries failed\n", count);
return FALSE;
}
macro_entry = macro_info->entries;
n = m;
do {
macro_entry->key = n->key;
macro_entry->len = vba_endian_convert_32(n->len, FALSE);
macro_entry->offset = vba_endian_convert_32(n->offset, FALSE);
macro_entry++;
n++;
} while (--count > 0);
free(m);
return TRUE;
}
static macro_info_t *
word_read_macro_info(int fd, macro_info_t *macro_info)
{
if (!read_uint16(fd, &macro_info->count, FALSE)) {
cli_dbgmsg("read macro_info failed\n");
macro_info->count = 0;
return NULL;
}
cli_dbgmsg("macro count: %d\n", macro_info->count);
if (macro_info->count == 0)
return NULL;
macro_info->entries = (macro_entry_t *)cli_malloc(sizeof(macro_entry_t) * macro_info->count);
if (macro_info->entries == NULL) {
macro_info->count = 0;
cli_errmsg("word_read_macro_info: Unable to allocate memory for macro_info->entries\n");
return NULL;
}
if (!word_read_macro_entry(fd, macro_info)) {
free(macro_info->entries);
macro_info->count = 0;
return NULL;
}
return macro_info;
}
static int
word_skip_oxo3(int fd)
{
uint8_t count;
if (cli_readn(fd, &count, 1) != 1) {
cli_dbgmsg("read oxo3 record1 failed\n");
return FALSE;
}
cli_dbgmsg("oxo3 records1: %d\n", count);
if (!seekandread(fd, count * 14, SEEK_CUR, &count, 1)) {
cli_dbgmsg("read oxo3 record2 failed\n");
return FALSE;
}
if (count == 0) {
uint8_t twobytes[2];
if (cli_readn(fd, twobytes, 2) != 2) {
cli_dbgmsg("read oxo3 failed\n");
return FALSE;
}
if (twobytes[0] != 2) {
lseek(fd, -2, SEEK_CUR);
return TRUE;
}
count = twobytes[1];
}
if (count > 0)
if (lseek(fd, (count * 4) + 1, SEEK_CUR) == -1) {
cli_dbgmsg("lseek oxo3 failed\n");
return FALSE;
}
cli_dbgmsg("oxo3 records2: %d\n", count);
return TRUE;
}
2007-11-01 18:32:17 +00:00
static int
word_skip_menu_info(int fd)
{
uint16_t count;
if (!read_uint16(fd, &count, FALSE)) {
cli_dbgmsg("read menu_info failed\n");
return FALSE;
}
cli_dbgmsg("menu_info count: %d\n", count);
if (count)
if (lseek(fd, count * 12, SEEK_CUR) == -1)
return FALSE;
return TRUE;
}
2007-11-05 18:14:59 +00:00
static int
word_skip_macro_extnames(int fd)
{
int is_unicode, nbytes;
int16_t size;
if (!read_uint16(fd, (uint16_t *)&size, FALSE)) {
cli_dbgmsg("read macro_extnames failed\n");
return FALSE;
}
if (size == -1) { /* Unicode flag */
if (!read_uint16(fd, (uint16_t *)&size, FALSE)) {
cli_dbgmsg("read macro_extnames failed\n");
return FALSE;
}
is_unicode = 1;
} else
is_unicode = 0;
cli_dbgmsg("ext names size: 0x%x\n", size);
nbytes = size;
while (nbytes > 0) {
uint8_t length;
off_t offset;
if (cli_readn(fd, &length, 1) != 1) {
cli_dbgmsg("read macro_extnames failed\n");
return FALSE;
}
if (is_unicode)
offset = (off_t)length * 2 + 1;
else
offset = (off_t)length;
/* ignore numref as well */
if (lseek(fd, offset + sizeof(uint16_t), SEEK_CUR) == -1) {
cli_dbgmsg("read macro_extnames failed to seek\n");
return FALSE;
}
nbytes -= size;
}
return TRUE;
}
static int
word_skip_macro_intnames(int fd)
{
uint16_t count;
if (!read_uint16(fd, &count, FALSE)) {
cli_dbgmsg("read macro_intnames failed\n");
return FALSE;
}
cli_dbgmsg("intnames count: %u\n", (unsigned int)count);
while (count-- > 0) {
uint8_t length;
/* id */
if (!seekandread(fd, sizeof(uint16_t), SEEK_CUR, &length, sizeof(uint8_t))) {
cli_dbgmsg("skip_macro_intnames failed\n");
return FALSE;
}
/* Internal name, plus one byte of unknown data */
if (lseek(fd, length + 1, SEEK_CUR) == -1) {
cli_dbgmsg("skip_macro_intnames failed\n");
return FALSE;
}
}
return TRUE;
}
2007-12-17 20:22:11 +00:00
vba_project_t *
cli_wm_readdir(int fd)
{
int done;
off_t end_offset;
unsigned char info_id;
macro_info_t macro_info;
vba_project_t *vba_project;
mso_fib_t fib;
if (!word_read_fib(fd, &fib))
return NULL;
if (fib.macro_len == 0) {
cli_dbgmsg("wm_readdir: No macros detected\n");
/* Must be clean */
return NULL;
}
cli_dbgmsg("wm_readdir: macro offset: 0x%.4x\n", (int)fib.macro_offset);
cli_dbgmsg("wm_readdir: macro len: 0x%.4x\n\n", (int)fib.macro_len);
/* Go one past the start to ignore start_id */
if (lseek(fd, fib.macro_offset + 1, SEEK_SET) != (off_t)(fib.macro_offset + 1)) {
cli_dbgmsg("wm_readdir: lseek macro_offset failed\n");
return NULL;
}
end_offset = fib.macro_offset + fib.macro_len;
done = FALSE;
macro_info.entries = NULL;
macro_info.count = 0;
while ((lseek(fd, 0, SEEK_CUR) < end_offset) && !done) {
if (cli_readn(fd, &info_id, 1) != 1) {
cli_dbgmsg("wm_readdir: read macro_info failed\n");
break;
}
switch (info_id) {
case 0x01:
if (macro_info.count)
free(macro_info.entries);
word_read_macro_info(fd, &macro_info);
done = TRUE;
break;
case 0x03:
if (!word_skip_oxo3(fd))
done = TRUE;
break;
case 0x05:
if (!word_skip_menu_info(fd))
done = TRUE;
break;
case 0x10:
if (!word_skip_macro_extnames(fd))
done = TRUE;
break;
case 0x11:
if (!word_skip_macro_intnames(fd))
done = TRUE;
break;
case 0x40: /* end marker */
case 0x12: /* ??? */
done = TRUE;
break;
default:
cli_dbgmsg("wm_readdir: unknown type: 0x%x\n", info_id);
done = TRUE;
}
}
if (macro_info.count == 0)
return NULL;
vba_project = create_vba_project(macro_info.count, "", NULL);
if (vba_project) {
Code cleanup: Refactor to clean up formatting issues Refactored the clamscan code that determines 'what to scan' in order to clean up some very messy logic and also to get around a difference in how vscode and clang-format handle formatting #ifdef blocks in the middle of an else/if. In addition to refactoring, there is a slight behavior improvement. With this change, doing `clamscan blah -` will now scan `blah` and then also scan `stdin`. You can even do `clamscan - blah` to now scan `stdin` and then scan `blah`. Before, The `-` had to be the only "filename" argument in order to scan from stdin. In addition, added a bunch of extra empty lines or changing multi-line function calls to single-line function calls in order to get around a bug in clang-format with these two options do not playing nice together: - AlignConsecutiveAssignments: true - AlignAfterOpenBracket: true AlignAfterOpenBracket is not taking account the spaces inserted by AlignConsecutiveAssignments, so you end up with stuff like this: ```c bleeblah = 1; blah = function(arg1, arg2, arg3); // ^--- these args 4-left from where they should be. ``` VSCode, meanwhile, somehow fixes this whitespace issue so code that is correctly formatted by VSCode doesn't have this bug, meaning that: 1. The clang-format check in GH Actions fails. 2. We'd all have to stop using format-on-save in VSCode and accept the bug if we wanted those GH Actions tests to pass. Adding an empty line before variable assignments from multi-line function calls evades the buggy behavior. This commit should resolve the clang-format github action test failures, for now.
2022-03-10 20:55:13 -08:00
vba_project->length = (uint32_t *)cli_malloc(sizeof(uint32_t) * macro_info.count);
vba_project->key = (unsigned char *)cli_malloc(sizeof(unsigned char) * macro_info.count);
if ((vba_project->length != NULL) &&
(vba_project->key != NULL)) {
int i;
const macro_entry_t *m = macro_info.entries;
for (i = 0; i < macro_info.count; i++) {
vba_project->offset[i] = m->offset;
vba_project->length[i] = m->len;
vba_project->key[i] = m->key;
m++;
}
} else {
cli_errmsg("cli_wm_readdir: Unable to allocate memory for vba_project\n");
free(vba_project->name);
free(vba_project->colls);
free(vba_project->dir);
free(vba_project->offset);
if (vba_project->length)
free(vba_project->length);
if (vba_project->key)
free(vba_project->key);
free(vba_project);
vba_project = NULL;
}
}
free(macro_info.entries);
return vba_project;
}
unsigned char *
2007-12-18 14:32:47 +00:00
cli_wm_decrypt_macro(int fd, off_t offset, uint32_t len, unsigned char key)
{
unsigned char *buff;
if (len == 0)
return NULL;
2007-12-17 20:22:11 +00:00
if (fd < 0)
return NULL;
buff = (unsigned char *)cli_malloc(len);
if (buff == NULL) {
cli_errmsg("cli_wm_decrypt_macro: Unable to allocate memory for buff\n");
return NULL;
}
if (!seekandread(fd, offset, SEEK_SET, buff, len)) {
free(buff);
return NULL;
}
if (key) {
unsigned char *p;
for (p = buff; p < &buff[len]; p++)
*p ^= key;
}
return buff;
}
Improve tmp sub-directory names At present many parsers create tmp subdirectories to store extracted files. For parsers like the vba parser, this is required as the directory is later scanned. For other parsers, these subdirectories are probably not helpful now that we provide recursive sub-dirs when --leave-temps is enabled. It's not quite as simple as removing the extra subdirectories, however. Certain parsers, like autoit, don't create very unique filenames and would result in file name collisions when --leave-temps is not enabled. The best thing to do would be to make sure each parser uses unique filenames and doesn't rely on cli_magic_scan_dir() to scan extracted content before removing the extra subdirectory. In the meantime, this commit gives the extra subdirectories meaningful names to improve readability. This commit also: - Provides the 'bmp' prefix for extracted PE icons. - Removes empty tmp subdirs when extracting rtf files, to eliminate clutter. - The PDF parser sometimes creates tmp files when decompressing streams before it knows if there is actually any content to decompress. This resulted in a large number of empty files. While it would be best to avoid creating empty files in the first place, that's not quite as as it sounds. This commit does the next best thing and deletes the tmp files if nothing was actually extracted, even if --leave-temps is enabled. - Removes the "scantemp" prefix for unnamed fmaps scanned with cli_magic_scan(). The 5-character hashes given to tmp files with prefixes resulted in occasional file name collisions when extracting certain file types with thousands of embedded files. - The VBA and TAR parsers mistakenly used NAME_MAX instead of PATH_MAX, resulting in truncated file paths and failed extraction when --leave-temps is enabled and a lot of recursion is in play. This commit switches them from NAME_MAX to PATH_MAX.
2020-03-27 16:06:22 -04:00
/**
* @brief Keep reading bytes until we reach a NUL.
*
* @param fd File descriptor
* @return int Returns FALSE if none is found, else TRUE
*/
Improve tmp sub-directory names At present many parsers create tmp subdirectories to store extracted files. For parsers like the vba parser, this is required as the directory is later scanned. For other parsers, these subdirectories are probably not helpful now that we provide recursive sub-dirs when --leave-temps is enabled. It's not quite as simple as removing the extra subdirectories, however. Certain parsers, like autoit, don't create very unique filenames and would result in file name collisions when --leave-temps is not enabled. The best thing to do would be to make sure each parser uses unique filenames and doesn't rely on cli_magic_scan_dir() to scan extracted content before removing the extra subdirectory. In the meantime, this commit gives the extra subdirectories meaningful names to improve readability. This commit also: - Provides the 'bmp' prefix for extracted PE icons. - Removes empty tmp subdirs when extracting rtf files, to eliminate clutter. - The PDF parser sometimes creates tmp files when decompressing streams before it knows if there is actually any content to decompress. This resulted in a large number of empty files. While it would be best to avoid creating empty files in the first place, that's not quite as as it sounds. This commit does the next best thing and deletes the tmp files if nothing was actually extracted, even if --leave-temps is enabled. - Removes the "scantemp" prefix for unnamed fmaps scanned with cli_magic_scan(). The 5-character hashes given to tmp files with prefixes resulted in occasional file name collisions when extracting certain file types with thousands of embedded files. - The VBA and TAR parsers mistakenly used NAME_MAX instead of PATH_MAX, resulting in truncated file paths and failed extraction when --leave-temps is enabled and a lot of recursion is in play. This commit switches them from NAME_MAX to PATH_MAX.
2020-03-27 16:06:22 -04:00
static int skip_past_nul(int fd)
{
char *end;
char smallbuf[128];
do {
size_t nread = cli_readn(fd, smallbuf, sizeof(smallbuf));
if ((nread == 0) || (nread == (size_t)-1))
return FALSE;
end = memchr(smallbuf, '\0', nread);
if (end) {
if (lseek(fd, 1 + (end - smallbuf) - (off_t)nread, SEEK_CUR) < 0)
return FALSE;
return TRUE;
}
} while (1);
}
/*
* Read 2 bytes as a 16-bit number, host byte order. Return success or fail
*/
static int
2007-12-21 11:17:25 +00:00
read_uint16(int fd, uint16_t *u, int big_endian)
{
if (cli_readn(fd, u, sizeof(uint16_t)) != sizeof(uint16_t))
return FALSE;
*u = vba_endian_convert_16(*u, big_endian);
return TRUE;
}
/*
* Read 4 bytes as a 32-bit number, host byte order. Return success or fail
*/
static int
2007-12-21 11:17:25 +00:00
read_uint32(int fd, uint32_t *u, int big_endian)
{
if (cli_readn(fd, u, sizeof(uint32_t)) != sizeof(uint32_t))
return FALSE;
*u = vba_endian_convert_32(*u, big_endian);
return TRUE;
}
/*
* Miss some bytes then read a bit
*/
static int
seekandread(int fd, off_t offset, int whence, void *data, size_t len)
{
if (lseek(fd, offset, whence) == (off_t)-1) {
cli_dbgmsg("lseek failed\n");
return FALSE;
}
return cli_readn(fd, data, len) == len;
}
/*
* Create and initialise a vba_project structure
*/
static vba_project_t *
create_vba_project(int record_count, const char *dir, struct uniq *U)
{
vba_project_t *ret;
ret = (vba_project_t *)cli_calloc(1, sizeof(struct vba_project_tag));
if (ret == NULL) {
cli_errmsg("create_vba_project: Unable to allocate memory for vba project structure\n");
return NULL;
}
ret->name = (char **)cli_malloc(sizeof(char *) * record_count);
ret->colls = (uint32_t *)cli_malloc(sizeof(uint32_t) * record_count);
ret->dir = cli_strdup(dir);
ret->offset = (uint32_t *)cli_malloc(sizeof(uint32_t) * record_count);
if ((ret->colls == NULL) || (ret->name == NULL) || (ret->dir == NULL) || (ret->offset == NULL)) {
cli_free_vba_project(ret);
cli_errmsg("create_vba_project: Unable to allocate memory for vba project elements\n");
return NULL;
}
ret->count = record_count;
ret->U = U;
return ret;
}
/**
* @brief Free up the memory associated with the vba_project_t type.
*
* @param project A vba_project_t type allocated by one of these:
* - create_vba_project()
* - cli_wm_readdir()
* - cli_vba_readdir()
*/
void cli_free_vba_project(vba_project_t *vba_project)
{
if (vba_project) {
if (vba_project->dir)
free(vba_project->dir);
if (vba_project->colls)
free(vba_project->colls);
if (vba_project->name)
free(vba_project->name);
if (vba_project->offset)
free(vba_project->offset);
if (vba_project->length)
free(vba_project->length);
if (vba_project->key)
free(vba_project->key);
free(vba_project);
}
return;
}