clamav/unit_tests/check_str.c
Jonas Zaddach (jzaddach) d5a733ef90 XLM (Excel 4.0) macro detection and extraction
XLM is a macro language in Excel that was used before VBA (before
1996). It is still parsed and executed by modern Excel and is gaining
popularity with malware authors.

This patch adds rudimentary support for detecting and extracting
Excel 4.0 (XLM) macros.

The code is based on Didier Steven's plugin_biff for oletools.py.
2020-04-29 14:19:41 -07:00

286 lines
8.3 KiB
C

/*
* Unit tests for string functions.
*
* Copyright (C) 2013-2020 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
* Copyright (C) 2008-2013 Sourcefire, Inc.
*
* Authors: Török Edvin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <string.h>
#include <check.h>
#include "../libclamav/clamav.h"
#include "../libclamav/others.h"
#include "../libclamav/str.h"
#include "../libclamav/entconv.h"
#include "../libclamav/mbox.h"
#include "../libclamav/message.h"
#include "../libclamav/jsparse/textbuf.h"
#include "checks.h"
START_TEST(test_unescape_simple)
{
char *str = cli_unescape("");
ck_assert_msg(str && strlen(str) == 0, "cli_unescape empty string");
free(str);
str = cli_unescape("1");
ck_assert_msg(str && !strcmp(str, "1"), "cli_unescape one char");
free(str);
str = cli_unescape("tesT");
ck_assert_msg(str && !strcmp(str, "tesT"), "cli_unescape simple string");
free(str);
}
END_TEST
START_TEST(test_unescape_hex)
{
char *str = cli_unescape("%5a");
ck_assert_msg(str && !strcmp(str, "\x5a"), "cli_unescape hex");
free(str);
str = cli_unescape("%b5%8");
ck_assert_msg(str && !strcmp(str, "\xb5%8"), "cli_unescape truncated");
free(str);
str = cli_unescape("%b5%");
ck_assert_msg(str && !strcmp(str, "\xb5%"), "cli_unescape truncated/2");
free(str);
str = cli_unescape("%00");
ck_assert_msg(str && !strcmp(str, "\x1"), "cli_unescape %00");
free(str);
}
END_TEST
START_TEST(test_unescape_unicode)
{
char *str = cli_unescape("%u05D0");
/* unicode is converted to utf-8 representation */
ck_assert_msg(str && !strcmp(str, "\xd7\x90"), "cli_unescape unicode aleph");
free(str);
str = cli_unescape("%u00a2%u007f%u0080%u07ff%u0800%ue000");
ck_assert_msg(str && !strcmp(str, "\xc2\xa2\x7f\xc2\x80\xdf\xbf\xe0\xa0\x80\xee\x80\x80"),
"cli_unescape utf-8 test");
free(str);
str = cli_unescape("%%u123%u12%u1%u%u1234");
ck_assert_msg(str && !strcmp(str, "%%u123%u12%u1%u\xe1\x88\xb4"),
"cli_unescape unicode truncated");
free(str);
}
END_TEST
static struct text_buffer buf;
static void buf_setup(void)
{
memset(&buf, 0, sizeof(buf));
}
static void buf_teardown(void)
{
if (buf.data)
free(buf.data);
memset(&buf, 0, sizeof(buf));
}
START_TEST(test_append_len)
{
ck_assert_msg(textbuffer_append_len(&buf, "test", 3) != -1, "tbuf append");
ck_assert_msg(buf.data && !strncmp(buf.data, "tes", 3), "textbuffer_append_len");
errmsg_expected();
ck_assert_msg(textbuffer_append_len(&buf, "test", CLI_MAX_ALLOCATION) == -1, "tbuf append");
ck_assert_msg(buf.data && !strncmp(buf.data, "tes", 3), "textbuffer_append_len");
}
END_TEST
START_TEST(test_append)
{
ck_assert_msg(textbuffer_append(&buf, "test") != -1, "tbuf append");
ck_assert_msg(textbuffer_putc(&buf, '\0') != -1, "tbuf putc");
ck_assert_msg(buf.data && !strcmp(buf.data, "test"), "textbuffer_append");
}
END_TEST
START_TEST(test_putc)
{
ck_assert_msg(textbuffer_putc(&buf, '\x5a') != -1, "tbuf putc");
ck_assert_msg(buf.data && buf.data[0] == '\x5a', "textbuffer_putc");
}
END_TEST
START_TEST(test_normalize)
{
const char *str = "test\\0\\b\\t\\n\\v\\f\\r\\z\\x2a\\u1234test";
const char *expected = "test\x1\b\t\n\v\f\rz\x2a\xe1\x88\xb4test";
int rc;
rc = cli_textbuffer_append_normalize(&buf, str, strlen(str));
ck_assert_msg(rc != -1, "normalize");
ck_assert_msg(textbuffer_putc(&buf, '\0') != -1, "putc \\0");
ck_assert_msg(buf.data && !strcmp(buf.data, expected), "normalized text");
}
END_TEST
START_TEST(hex2str)
{
char *r;
const char inp1[] = "a00026";
const char out1[] = "\xa0\x00\x26";
const char inp2[] = "ag0026";
r = cli_hex2str(inp1);
ck_assert_msg(!!r, "cli_hex2str NULL");
ck_assert_msg(!memcmp(r, out1, sizeof(out1) - 1),
"cli_hex2str invalid output");
free(r);
r = cli_hex2str(inp2);
ck_assert_msg(!r, "cli_hex2str on invalid input");
}
END_TEST
static struct base64lines {
const char *line;
const char *decoded;
unsigned int len;
} base64tests[] = {
{"", "", 0},
{"Zg==", "f", 1},
{"Zm8=", "fo", 2},
{"Zm9v", "foo", 3},
{"Zm9vYg==", "foob", 4},
{"Zm9vYmFy", "foobar", 6},
/* with missing padding */
{"Zg", "f", 1},
{"Zm8", "fo", 2},
{"Zm9vYg", "foob", 4}};
START_TEST(test_base64)
{
unsigned char *ret, *ret2;
unsigned len;
unsigned char buf[1024];
const struct base64lines *test = &base64tests[_i];
message *m = messageCreate();
ck_assert_msg(!!m, "Unable to create message");
ret = decodeLine(m, BASE64, test->line, buf, sizeof(buf));
ck_assert_msg(!!ret, "unable to decode line");
ret2 = base64Flush(m, ret);
if (!ret2)
ret2 = ret;
*ret2 = '\0';
len = ret2 - buf;
ck_assert_msg(len == test->len, "invalid base64 decoded length: %u expected %u (%s)\n",
len, test->len, buf);
ck_assert_msg(!memcmp(buf, test->decoded, test->len),
"invalid base64 decoded data: %s, expected:%s\n",
buf, test->decoded);
messageDestroy(m);
}
END_TEST
static struct {
const char *u16;
const char *u8;
} u16_tests[] = {
{"\x74\x00\x65\x00\x73\x00\x74\x00\x00\x00", "test"},
{"\xff\xfe\x00", ""},
{"\x80\x00\x00", "\xc2\x80"},
{"\xff\x07\x00", "\xdf\xbf"},
{"\x00\x08\x00", "\xe0\xa0\x80"},
{"\xff\x0f\x00", "\xe0\xbf\xbf"},
{"\x00\x10\x00", "\xe1\x80\x80"},
{"\xff\xcf\x00", "\xec\xbf\xbf"},
{"\x00\xd0\x00", "\xed\x80\x80"},
{"\xff\xd7\x00", "\xed\x9f\xbf"},
{"\x00\xe0\x00", "\xee\x80\x80"},
{"\xff\xff\x00", "\xef\xbf\xbf"},
{"\x00\xd8\x00\xdc\x00", "\xf0\x90\x80\x80"},
{"\xbf\xd8\xff\xdf\x00", "\xf0\xbf\xbf\xbf"},
{"\xc0\xd8\x00\xdc\x00", "\xf1\x80\x80\x80"},
{"\xbf\xdb\xff\xdf\x00", "\xf3\xbf\xbf\xbf"},
{"\xc0\xdb\x00\xdc\x00", "\xf4\x80\x80\x80"},
{"\xff\xdb\xff\xdf\x00", "\xf4\x8f\xbf\xbf"},
{"\x00\xdc\x00\xd8\x00", "\xef\xbf\xbd\xef\xbf\xbd"}};
static unsigned u16_len(const char *s)
{
unsigned i;
for (i = 0; s[i] || s[i + 1]; i += 2) {
}
return i;
}
START_TEST(test_u16_u8)
{
char *result = cli_utf16_to_utf8(u16_tests[_i].u16, u16_len(u16_tests[_i].u16), E_UTF16_LE);
ck_assert_msg(!!result, "cli_utf16_to_utf8 non-null");
ck_assert_msg(!strcmp(result, u16_tests[_i].u8), "utf16_to_8 %d failed, expected: %s, got %s", _i, u16_tests[_i].u8, result);
free(result);
}
END_TEST
Suite *test_str_suite(void)
{
Suite *s = suite_create("str");
TCase *tc_cli_unescape, *tc_tbuf, *tc_str, *tc_decodeline;
tc_cli_unescape = tcase_create("cli_unescape");
suite_add_tcase(s, tc_cli_unescape);
tcase_add_test(tc_cli_unescape, test_unescape_simple);
tcase_add_test(tc_cli_unescape, test_unescape_unicode);
tcase_add_test(tc_cli_unescape, test_unescape_hex);
tc_tbuf = tcase_create("jsnorm textbuf functions");
suite_add_tcase(s, tc_tbuf);
tcase_add_checked_fixture(tc_tbuf, buf_setup, buf_teardown);
tcase_add_test(tc_tbuf, test_append_len);
tcase_add_test(tc_tbuf, test_append);
tcase_add_test(tc_tbuf, test_putc);
tcase_add_test(tc_tbuf, test_normalize);
tc_str = tcase_create("str functions");
suite_add_tcase(s, tc_str);
tcase_add_test(tc_str, hex2str);
tcase_add_loop_test(tc_str, test_u16_u8, 0, sizeof(u16_tests) / sizeof(u16_tests[0]));
tc_decodeline = tcase_create("decodeline");
suite_add_tcase(s, tc_decodeline);
tcase_add_loop_test(tc_decodeline, test_base64, 0, sizeof(base64tests) / sizeof(base64tests[0]));
return s;
}