Merge master to features/yara.

This commit is contained in:
Steven Morgan 2015-05-01 18:36:48 -04:00
commit a80453e6e9
66 changed files with 3142 additions and 1048 deletions

View file

@ -1,23 +0,0 @@
Copyright (c) 2001-2003 Allan Saddi <allan@saddi.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY ALLAN SADDI AND HIS CONTRIBUTORS ``AS IS''
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL ALLAN SADDI OR HIS CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.

203
ChangeLog
View file

@ -1,3 +1,206 @@
Mon, 27 Apr 12:00:00 EDT
-----------------------------------
* 0.98.7 Release.
Tue, 14 Apr 2015 15:53:17 EDT (klin)
-----------------------------------
* bb#11296 - various fixes to pdf string base64 string conversion
Mon, 13 Apr 2015 12:14:41 EDT (smorgan)
-----------------------------------
* bb11298 - look for TOC element name <unarchived-checksum>
(as a synonynm for <extracted-checksum>). Continue processing rather
than exit in the event of missing or error in TOC checksum specification.
Wed, 8 Apr 2015 15:51:04 EDT (smorgan)
-----------------------------------
* iso9660: remove unnecessaty parameter on iso_parse_dir() and reset return
code when scanall is in effect.
Wed, 1 Apr 2015 17:41:59 EDT (klin)
-----------------------------------
* pdf: correctly handle decoding, decryption, character set conversions,
and file properties collection(base64 encoded as needed).
Fri, 27 Mar 2015 13:21:49 EDT (klin)
-----------------------------------
* converted cb_file_props from using engine-based ctx to file-based ctx
Thu, 26 Mar 2015 12:24:02 EDT (smorgan)
-----------------------------------
* bb11281 - Reworked reverted upack.c crash patch to fix regression
false negatives.
Tue, 24 Mar 2015 12:06:57 EDT (klin)
-----------------------------------
* make check: added env check 'T' to set timeout
Mon, 23 Mar 2015 17:58:35 EDT (klin)
-----------------------------------
* bb#11282 - patch for code clean up in rebuildpe. Patch
supplied by Sebastian Andrzej Siewior.
Mon, 23 Mar 2015 13:04:54 EDT (klin)
-----------------------------------
* bb#11284 - fixed integer underflow in detecting W32.Polipos.A method.
Patch supplied by Sebastian Andrzej Siewior.
Mon, 16 Mar 2015 18:35:14 EDT (klin)
-----------------------------------
* updated documentation on document property collection
Mon, 16 Mar 2015 18:26:07 EDT (klin)
-----------------------------------
* added support for MS Office 2003 XML(msxml) document types and msxml
file properties collection.
Mon, 16 Mar 2015 13:11:56 EDT (klin)
-----------------------------------
* fixed converity issue ID 12109 buffer was not freed on rare error case
Mon, 16 Mar 2015 13:08:03 EDT (klin)
-----------------------------------
* fixed coverity ID 12110 12111 changed a the type of a value from unsigned
to signed due to possible negative values
Thu, 12 Mar 2015 19:06:23 EDT (smorgan)
-----------------------------------
* Fix for infinite loop on crafted xz file.
Wed, 11 Mar 2015 15:03:43 EDT (smorgan)
-----------------------------------
* bb11278 - was not detecting viruses on files inside iso9660.
Also fix up all-match logic.
Mon, 9 Mar 2015 13:02:25 EDT (smorgan)
-----------------------------------
* bb11274 - adds out of bounds check for petite packed files.
Patch from Sebastian Andrzej Siewior.
Wed, 4 Mar 2015 14:04:24 EDT (klin)
-----------------------------------
* updated example fileprop analysis bytecodes moved old example bytecodes
to examples/fileprop_analysis/old/
Wed, 4 Mar 2015 12:08:34 EDT (klin)
-----------------------------------
* backwards compatibility for target type 13 json scanning
Tue, 3 Mar 2015 17:47:55 EDT (klin)
-----------------------------------
* generates fmap from desc if no map is NULL
Tue, 3 Mar 2015 16:37:08 EDT (smorgan)
-----------------------------------
* Apply y0da cryptor patch sent in by Sebastian Andrzej Siewior.
Tue, 3 Mar 2015 16:12:48 EDT (klin)
-----------------------------------
* flevel updated to 80 (new bytecode hook type)
Tue, 3 Mar 2015 16:12:22 EDT (klin)
-----------------------------------
* clambc info option updated for new hook type
Tue, 3 Mar 2015 15:00:41 EDT (klin)
-----------------------------------
* added BC_PRECLASS hook support; replaces target type 13
Mon, 2 Mar 2015 19:06:23 EDT (klin)
-----------------------------------
* pdf string UTF-16 conversion no longer solely depends on ICONV reason:
no ICONV meant no conversion even though conversion function existed
Fri, 27 Feb 2015 15:23:51 EDT (klin)
-----------------------------------
* bb#11269 - bm matcher no longer sets scanning window offset reason:
certain segments could be hashed multiple times
Wed, 25 Feb 2015 14:55:21 EDT (klin)
-----------------------------------
* bb#11269 - hash does not compute on segments smaller than the maxpatlen
Tue, 24 Feb 2015 16:21:09 EDT (klin)
-----------------------------------
* bb#11267 - libclamav upx cover against hand crafted section ove patch
supplied bySebastian Andrzej Siewior.
Fri, 27 Feb 2015 16:57:19 EDT (smorgan)
-----------------------------------
* Patch for integer overflow checks for petite unpack code supplied by
Sebastian Andrzej Siewior.
Fri, 27 Feb 2015 16:54:55 EDT (smorgan)
-----------------------------------
* remove obsolete parameters from the clamd.conf man page: MailMaxRecursion,
ArchiveMaxFileSize, ArchiveMaxRecursion, ArchiveMaxFiles,
ArchiveMaxCompressionRatio, ArchiveBlockMax, ArchiveLimitMemoryUsage, Clamuko*.
Wed, 18 Feb 2015 15:23:54 EDT (klin)
-----------------------------------
* bb#11212 - fix MEW unpacker
Mon, 16 Feb 2015 11:46:21 EDT (smorgan)
-----------------------------------
* bb11264 - patch for 'possible' heap overflow submitted by the Debian team.
Tue, 10 Feb 2015 15:16:48 EDT (smorgan)
-----------------------------------
* bb11260: fix compile error when './configure --disable-pthreads' is specified.
Fri, 6 Feb 2015 14:59:43 EDT (klin)
-----------------------------------
* bb#11254 - removed built-in llvm configure check and added
--with-llvm-linking option to specify system-llvm linking method
Fri, 6 Feb 2015 13:22:35 EDT (klin)
-----------------------------------
* improved documentation on macro subsignatures
Wed, 4 Feb 2015 18:52:01 EDT (smorgan)
-----------------------------------
* fix documentation errors in example logical signature.
Fri, 30 Jan 2015 12:15:07 EDT (klin)
-----------------------------------
* bb#12887 - fixed an issue regarding (fd==-1) in WinAPI
Wed, 28 Jan 2015 11:20:35 EDT (klin)
-----------------------------------
* fixed Windows API SetOption/GetOption CLAM_LIMIT_RECURSION
Wed, 21 Jan 2015 11:41:07 EDT (klin)
-----------------------------------
* added ICONV to clamconf optional features report
Thu, 15 Jan 2015 15:15:01 EDT (klin)
-----------------------------------
* fixed an incorrect return value for magic_scandesc
Wed, 14 Jan 2015 09:25:47 EDT (klin)
-----------------------------------
* cleaned up configure help strings by using AS_HELP_STRING
Mon, 12 Jan 2015 13:45:36 EDT (klin)
-----------------------------------
* bb#11238 - added missing PDF preclass operations
> added whitespace fix for indirect references strings
> added PDF escape sequence handling (including octal)
Thu, 8 Jan 2015 09:48:20 EDT (klin)
-----------------------------------
* bb#11237 - fixed bug in building CUD file
Wed, 7 Jan 2015 04:46:15 EDT (smorgan)
-----------------------------------
* bb11233 - fix a strange bus error on Mac OS X PPC when using debug mode.
Mon, 22 Dec 2014 12:13:38 EDT (klin)
-----------------------------------
* bb#11226 - fixed gpt GUID debugging message
*** End of 0.98.6, Start of 0.98.7
Tue Dec 16 16:21:40 2014 EDT (swebb)
-------------------------------------

65
NEWS
View file

@ -1,36 +1,45 @@
0.98.6
0.98.7
------
ClamAV 0.98.6 is a bug fix release correcting the following:
ClamAV 0.98.7 is here! This release contains new scanning features
and bug fixes.
- library shared object revisions.
- installation issues on some Mac OS X and FreeBSD platforms.
- includes a patch from Sebastian Andrzej Siewior making
ClamAV pid files compatible with systemd.
- Fix a heap out of bounds condition with crafted Yoda's
crypter files. This issue was discovered by Felix Groebert
of the Google Security Team.
- Fix a heap out of bounds condition with crafted mew packer
files. This issue was discovered by Felix Groebert of the
Google Security Team.
- Fix a heap out of bounds condition with crafted upx packer
files. This issue was discovered by Kevin Szkudlapski of
Quarkslab.
- Fix a heap out of bounds condition with crafted upack packer
files. This issue was discovered by Sebastian Andrzej Siewior.
CVE-2014-9328.
- Compensate a crash due to incorrect compiler optimization when
handling crafted petite packer files. This issue was discovered
by Sebastian Andrzej Siewior.
Thanks to the following ClamAV community members for code submissions
and bug reporting included in ClamAV 0.98.6:
- Improvements to PDF processing: decryption, escape sequence
handling, and file property collection.
- Scanning/analysis of additional Microsoft Office 2003 XML format.
- Fix infinite loop condition on crafted y0da cryptor file. Identified
and patch suggested by Sebastian Andrzej Siewior. CVE-2015-2221.
- Fix crash on crafted petite packed file. Reported and patch
supplied by Sebastian Andrzej Siewior. CVE-2015-2222.
- Fix false negatives on files within iso9660 containers. This issue
was reported by Minzhuan Gong.
- Fix a couple crashes on crafted upack packed file. Identified and
patches supplied by Sebastian Andrzej Siewior.
- Fix a crash during algorithmic detection on crafted PE file.
Identified and patch supplied by Sebastian Andrzej Siewior.
- Fix an infinite loop condition on a crafted "xz" archive file.
This was reported by Dimitri Kirchner and Goulven Guiheux.
CVE-2015-2668.
- Fix compilation error after ./configure --disable-pthreads.
Reported and fix suggested by John E. Krokes.
- Apply upstream patch for possible heap overflow in Henry Spencer's
regex library. CVE-2015-2305.
- Fix crash in upx decoder with crafted file. Discovered and patch
supplied by Sebastian Andrzej Siewior. CVE-2015-2170.
- Fix segfault scanning certain HTML files. Reported with sample by
Kai Risku.
- Improve detections within xar/pkg files.
As always, we appreciate contributions of bug reports, code fixes,
and sample submission from the ClamAV community members:
Sebastian Andrzej Siewior
Felix Groebert
Kevin Szkudlapski
Mark Pizzolato
Daniel J. Luke
Minzhaun Gong
Dimitri Kirchner
Goulven Guiheux
John E. Krokes
Kai Risku
--
The ClamAV team (http://www.clamav.net/about.html#credits)

42
README
View file

@ -2,6 +2,48 @@ Note: This README/NEWS file refers to the source tarball. Some things described
here may not be available in binary packages.
--
0.98.7
------
ClamAV 0.98.7 is here! This release contains new scanning features
and bug fixes.
- Improvements to PDF processing: decryption, escape sequence
handling, and file property collection.
- Scanning/analysis of additional Microsoft Office 2003 XML format.
- Fix infinite loop condition on crafted y0da cryptor file. Identified
and patch suggested by Sebastian Andrzej Siewior. CVE-2015-2221.
- Fix crash on crafted petite packed file. Reported and patch
supplied by Sebastian Andrzej Siewior. CVE-2015-2222.
- Fix false negatives on files within iso9660 containers. This issue
was reported by Minzhuan Gong.
- Fix a couple crashes on crafted upack packed file. Identified and
patches supplied by Sebastian Andrzej Siewior.
- Fix a crash during algorithmic detection on crafted PE file.
Identified and patch supplied by Sebastian Andrzej Siewior.
- Fix an infinite loop condition on a crafted "xz" archive file.
This was reported by Dimitri Kirchner and Goulven Guiheux.
CVE-2015-2668.
- Fix compilation error after ./configure --disable-pthreads.
Reported and fix suggested by John E. Krokes.
- Apply upstream patch for possible heap overflow in Henry Spencer's
regex library. CVE-2015-2305.
- Fix crash in upx decoder with crafted file. Discovered and patch
supplied by Sebastian Andrzej Siewior. CVE-2015-2170.
- Fix segfault scanning certain HTML files. Reported with sample by
Kai Risku.
- Improve detections within xar/pkg files.
As always, we appreciate contributions of bug reports, code fixes,
and sample submission from the ClamAV community members:
Sebastian Andrzej Siewior
Minzhaun Gong
Dimitri Kirchner
Goulven Guiheux
John E. Krokes
Kai Risku
0.98.6
------

4
configure vendored
View file

@ -28291,7 +28291,7 @@ fi
if test "x$XML_LIBS" = "x"; then
$as_echo_n " dmg and xar : "
$as_echo_n " libxml2 : "
if test "x" = "xno"; then :
$as_echo "no (disabled)"
elif test "x" = "xyes"; then :
@ -28305,7 +28305,7 @@ fi
else
$as_echo_n " dmg and xar : "
$as_echo_n " libxml2 : "
if test "x" = "xno"; then :
$as_echo "yes, from $XML_HOME (disabled)"
elif test "x" = "xyes"; then :

View file

@ -241,9 +241,9 @@ else
CL_MSG_STATUS([pcre ],[$PCRE_HOME],[$have_pcre])
fi
if test "x$XML_LIBS" = "x"; then
CL_MSG_STATUS([dmg and xar ],[no],[])
CL_MSG_STATUS([libxml2 ],[no],[])
else
CL_MSG_STATUS([dmg and xar ],[yes, from $XML_HOME],[])
CL_MSG_STATUS([libxml2 ],[yes, from $XML_HOME],[])
fi
# Yep, downgrading the compiler avoids the bug too:

Binary file not shown.

View file

@ -379,9 +379,6 @@ Scan RFC1341 messages split over many emails. You will need to periodically clea
.br
Default: no
.TP
\fBMailMaxRecursion (OBSOLETE)\fR
\fBWARNING:\fR This option is no longer accepted. See \fBMaxRecursion\fR.
.TP
\fBPhishingSignatures BOOL\fR
With this option enabled ClamAV will try to detect phishing attempts by using signatures.
.br
@ -488,24 +485,6 @@ This option causes memory or nested map scans to dump the content to disk.
If you turn on this option, more data is written to disk and is available when the leave-temps option is enabled at the cost of more disk writes.
.br
Default: no
.TP
\fBArchiveMaxFileSize (OBSOLETE)\fR
\fBWARNING:\fR This option is no longer accepted. See \fBMaxFileSize\fR and \fBMaxScanSize\fR.
.TP
\fBArchiveMaxRecursion (OBSOLETE)\fR
\fBWARNING:\fR This option is no longer accepted. See \fBMaxRecursion\fR.
.TP
\fBArchiveMaxFiles (OBSOLETE)\fR
\fBWARNING:\fR This option is no longer accepted. See \fBMaxFiles\fR.
.TP
\fBArchiveMaxCompressionRatio (OBSOLETE)\fR
\fBWARNING:\fR This option is no longer accepted.
.TP
\fBArchiveBlockMax (OBSOLETE)\fR
\fBWARNING:\fR This option is no longer accepted.
.TP
\fBArchiveLimitMemoryUsage (OBSOLETE)\fR
\fBWARNING:\fR This option is no longer accepted.
.br
Default: no
.TP
@ -637,33 +616,6 @@ WARNING: setting this limit too high or disabling it may severely impact perform
.br
Default: 25M
.TP
\fBClamukoScanOnAccess (OBSOLETE)\fR
\fBWARNING:\fR This option is no longer accepted. See \fBScanOnAccess\fR.
.TP
\fBClamukoScannerCount (OBSOLETE)\fR
\fBWARNING:\fR This option is no longer accepted.
.TP
\fBClamukoScanOnOpen (OBSOLETE)\fR
\fBWARNING:\fR This option is no longer accepted.
.TP
\fBClamukoScanOnClose (OBSOLETE)\fR
\fBWARNING:\fR This option is no longer accepted.
.TP
\fBClamukoScanOnExec (OBSOLETE)\fR
\fBWARNING:\fR This option is no longer accepted.
.TP
\fBClamukoIncludePath (OBSOLETE)\fR
\fBWARNING:\fR This option is no longer accepted. See \fBOnAccessIncludePath\fR.
.TP
\fBClamukoExcludePath (OBSOLETE)\fR
\fBWARNING:\fR This option is no longer accepted. See \fBOnAccessExcludePath\fR.
.TP
\fBClamukoExcludeUID (OBSOLETE)\fR
\fBWARNING:\fR This option is no longer accepted. See \fBOnAccessExcludeUID\fR.
.TP
\fBClamukoMaxFileSize (OBSOLETE)\fR
\fBWARNING:\fR This option is no longer accepted. See \fBOnAccessMaxFileSize\fR.
.TP
\fBScanOnAccess BOOL\fR
This option enables on-access scanning (Linux only)
.br

View file

@ -1,26 +1,15 @@
VIRUSNAME_PREFIX("SUBMIT.contains")
VIRUSNAMES("EmbedPE")
/* Target type is 13, internal JSON properties */
TARGET(13)
/* Target type is 0, all relevant files */
TARGET(0)
/* Declares to run bytecode only for preclassification (affecting only preclass files) */
PRECLASS_HOOK_DECLARE
/* JSON API call will require FUNC_LEVEL_098_5 = 78 */
FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_5)
SIGNATURES_DECL_BEGIN
DECLARE_SIGNATURE(sig1)
SIGNATURES_DECL_END
SIGNATURES_DEF_BEGIN
/* search @offset 0 : '{ "Magic": "CLAMJSON' */
/* this can be readjusted for specific filetypes */
DEFINE_SIGNATURE(sig1, "0:7b20224d61676963223a2022434c414d4a534f4e")
SIGNATURES_END
bool logical_trigger(void)
{
return matches(Signatures.sig1);
}
/* PRECLASS_HOOK_DECLARE will require FUNC_LEVEL_098_7 = 80 */
FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_7)
#define STR_MAXLEN 256

View file

@ -3,26 +3,15 @@ VIRUSNAMES("CL_TYPE_MSWORD", "CL_TYPE_MSPPT", "CL_TYPE_MSXL",
"CL_TYPE_OOXML_WORD", "CL_TYPE_OOXML_PPT", "CL_TYPE_OOXML_XL",
"CL_TYPE_MSEXE", "CL_TYPE_PDF", "CL_TYPE_MSOLE2", "CL_TYPE_UNKNOWN", "InActive")
/* Target type is 13, internal JSON properties */
TARGET(13)
/* Target type is 0, all relevant files */
TARGET(0)
/* Declares to run bytecode only for preclassification (affecting only preclass files) */
PRECLASS_HOOK_DECLARE
/* JSON API call will require FUNC_LEVEL_098_5 = 78 */
FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_5)
SIGNATURES_DECL_BEGIN
DECLARE_SIGNATURE(sig1)
SIGNATURES_DECL_END
SIGNATURES_DEF_BEGIN
/* search @offset 0 : '{ "Magic": "CLAMJSON' */
/* this can be readjusted for specific filetypes */
DEFINE_SIGNATURE(sig1, "0:7b20224d61676963223a2022434c414d4a534f4e")
SIGNATURES_END
bool logical_trigger(void)
{
return matches(Signatures.sig1);
}
/* PRECLASS_HOOK_DECLARE will require FUNC_LEVEL_098_7 = 80 */
FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_7)
#define STR_MAXLEN 256

View file

@ -1,34 +1,51 @@
VIRUSNAME_PREFIX("SUBMIT.NotPDF")
VIRUSNAMES("InActive", "Submit")
/* Target type is 13, internal JSON properties */
TARGET(13)
/* Target type is 0, all relevant files */
TARGET(0)
/* Declares to run bytecode only for preclassification (affecting only preclass files) */
PRECLASS_HOOK_DECLARE
/* JSON API call will require FUNC_LEVEL_098_5 = 78 */
FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_5)
SIGNATURES_DECL_BEGIN
DECLARE_SIGNATURE(sig1)
DECLARE_SIGNATURE(sig2)
SIGNATURES_DECL_END
SIGNATURES_DEF_BEGIN
/* search @offset 0 : '{ "Magic": "CLAMJSON' */
/* this can be readjusted for specific filetypes */
DEFINE_SIGNATURE(sig1, "0:7b20224d61676963223a2022434c414d4a534f4e")
/* search '"RootFileType": "CL_TYPE_PDF"' */
DEFINE_SIGNATURE(sig2, "22526f6f7446696c6554797065223a2022434c5f545950455f50444622")
SIGNATURES_END
bool logical_trigger(void)
{
return matches(Signatures.sig1) && !matches(Signatures.sig2);
}
/* PRECLASS_HOOK_DECLARE will require FUNC_LEVEL_098_7 = 80 */
FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_7)
#define STR_MAXLEN 256
int entrypoint ()
{
foundVirus("Submit");
int32_t type, obj, strlen;
char str[STR_MAXLEN];
/* check is json is available, alerts on inactive (optional) */
if (!json_is_active()) {
return -1;
}
/* acquire array of internal contained objects */
obj = json_get_object("FileType", 8, 0);
if (obj <= 0) return -1;
/* acquire and check type */
type = json_get_type(obj);
if (type == JSON_TYPE_STRING) {
/* acquire string length, note +1 is for the NULL terminator */
strlen = json_get_string_length(obj)+1;
/* prevent buffer overflow */
if (strlen > STR_MAXLEN)
strlen = STR_MAXLEN;
/* acquire string data, note strlen includes NULL terminator */
if (json_get_string(str, strlen, obj)) {
/* debug print str (with '\n' and prepended message */
debug_print_str(str,strlen);
/* check the contained object's type */
if (!(strlen == 12) || !memcmp(str, "CL_TYPE_PDF", 12)) {
foundVirus("Submit");
}
}
}
return 0;
}

Binary file not shown.

View file

@ -0,0 +1,84 @@
VIRUSNAME_PREFIX("SUBMIT.contains")
VIRUSNAMES("EmbedPE")
/* Target type is 13, internal JSON properties */
TARGET(13)
/* JSON API call will require FUNC_LEVEL_098_5 = 78 */
FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_5)
SIGNATURES_DECL_BEGIN
DECLARE_SIGNATURE(sig1)
SIGNATURES_DECL_END
SIGNATURES_DEF_BEGIN
/* search @offset 0 : '{ "Magic": "CLAMJSON' */
/* this can be readjusted for specific filetypes */
DEFINE_SIGNATURE(sig1, "0:7b20224d61676963223a2022434c414d4a534f4e")
SIGNATURES_END
bool logical_trigger(void)
{
return matches(Signatures.sig1);
}
#define STR_MAXLEN 256
int entrypoint ()
{
int i;
int32_t type, obj, objarr, objit, arrlen, strlen;
char str[STR_MAXLEN];
/* check is json is available, alerts on inactive (optional) */
if (!json_is_active()) {
return -1;
}
/* acquire array of internal contained objects */
objarr = json_get_object("ContainedObjects", 16, 0);
type = json_get_type(objarr);
/* debug print uint (no '\n' or prepended message */
debug_print_uint(type);
if (type != JSON_TYPE_ARRAY) {
return -1;
}
/* check array length for iteration over elements */
arrlen = json_get_array_length(objarr);
for (i = 0; i < arrlen; ++i) {
/* acquire json object @ idx i */
objit = json_get_array_idx(i, objarr);
if (objit <= 0) continue;
/* acquire FileType object of the array element @ idx i */
obj = json_get_object("FileType", 8, objit);
if (obj <= 0) continue;
/* acquire and check type */
type = json_get_type(obj);
if (type == JSON_TYPE_STRING) {
/* acquire string length, note +1 is for the NULL terminator */
strlen = json_get_string_length(obj)+1;
/* prevent buffer overflow */
if (strlen > STR_MAXLEN)
strlen = STR_MAXLEN;
/* acquire string data, note strlen includes NULL terminator */
if (json_get_string(str, strlen, obj)) {
/* debug print str (with '\n' and prepended message */
debug_print_str(str,strlen);
/* check the contained object's type */
if (strlen == 14 && !memcmp(str, "CL_TYPE_MSEXE", 14)) {
//if (!strcmp(str, strlen, "CL_TYPE_MSEXE", strlen)) {
/* alert for submission */
foundVirus("EmbedPE");
return 0;
}
}
}
}
return 0;
}

View file

@ -0,0 +1,104 @@
VIRUSNAME_PREFIX("SUBMIT.filetype")
VIRUSNAMES("CL_TYPE_MSWORD", "CL_TYPE_MSPPT", "CL_TYPE_MSXL",
"CL_TYPE_OOXML_WORD", "CL_TYPE_OOXML_PPT", "CL_TYPE_OOXML_XL",
"CL_TYPE_MSEXE", "CL_TYPE_PDF", "CL_TYPE_MSOLE2", "CL_TYPE_UNKNOWN", "InActive")
/* Target type is 13, internal JSON properties */
TARGET(13)
/* JSON API call will require FUNC_LEVEL_098_5 = 78 */
FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_5)
SIGNATURES_DECL_BEGIN
DECLARE_SIGNATURE(sig1)
SIGNATURES_DECL_END
SIGNATURES_DEF_BEGIN
/* search @offset 0 : '{ "Magic": "CLAMJSON' */
/* this can be readjusted for specific filetypes */
DEFINE_SIGNATURE(sig1, "0:7b20224d61676963223a2022434c414d4a534f4e")
SIGNATURES_END
bool logical_trigger(void)
{
return matches(Signatures.sig1);
}
#define STR_MAXLEN 256
int entrypoint ()
{
int32_t objid, type, strlen;
char str[STR_MAXLEN];
/* check is json is available, alerts on inactive (optional) */
if (!json_is_active())
foundVirus("InActive");
/* acquire the filetype object */
objid = json_get_object("FileType", 8, 0);
if (objid <= 0) {
debug_print_str("json object has no filetype!", 28);
return 1;
}
type = json_get_type(objid);
if (type != JSON_TYPE_STRING) {
debug_print_str("json object filetype property is not string!", 44);
return 1;
}
/* acquire string length, note +1 is for the NULL terminator */
strlen = json_get_string_length(objid)+1;
/* prevent buffer overflow */
if (strlen > STR_MAXLEN)
strlen = STR_MAXLEN;
/* acquire string data, note strlen includes NULL terminator */
if (json_get_string(str, strlen, objid)) {
/* debug print str (with '\n' and prepended message */
debug_print_str(str,strlen);
/* check the contained object's filetype */
if (strlen == 14 && !memcmp(str, "CL_TYPE_MSEXE", 14)) {
foundVirus("CL_TYPE_MSEXE");
return 0;
}
if (strlen == 12 && !memcmp(str, "CL_TYPE_PDF", 12)) {
foundVirus("CL_TYPE_PDF");
return 0;
}
if (strlen == 19 && !memcmp(str, "CL_TYPE_OOXML_WORD", 19)) {
foundVirus("CL_TYPE_OOXML_WORD");
return 0;
}
if (strlen == 18 && !memcmp(str, "CL_TYPE_OOXML_PPT", 18)) {
foundVirus("CL_TYPE_OOXML_PPT");
return 0;
}
if (strlen == 17 && !memcmp(str, "CL_TYPE_OOXML_XL", 17)) {
foundVirus("CL_TYPE_OOXML_XL");
return 0;
}
if (strlen == 15 && !memcmp(str, "CL_TYPE_MSWORD", 15)) {
foundVirus("CL_TYPE_MSWORD");
return 0;
}
if (strlen == 14 && !memcmp(str, "CL_TYPE_MSPPT", 14)) {
foundVirus("CL_TYPE_MSPPT");
return 0;
}
if (strlen == 13 && !memcmp(str, "CL_TYPE_MSXL", 13)) {
foundVirus("CL_TYPE_MSXL");
return 0;
}
if (strlen == 15 && !memcmp(str, "CL_TYPE_MSOLE2", 15)) {
foundVirus("CL_TYPE_MSOLE2");
return 0;
}
foundVirus("CL_TYPE_UNKNOWN");
return 0;
}
return 0;
}

View file

@ -0,0 +1,34 @@
VIRUSNAME_PREFIX("SUBMIT.NotPDF")
VIRUSNAMES("InActive", "Submit")
/* Target type is 13, internal JSON properties */
TARGET(13)
/* JSON API call will require FUNC_LEVEL_098_5 = 78 */
FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_5)
SIGNATURES_DECL_BEGIN
DECLARE_SIGNATURE(sig1)
DECLARE_SIGNATURE(sig2)
SIGNATURES_DECL_END
SIGNATURES_DEF_BEGIN
/* search @offset 0 : '{ "Magic": "CLAMJSON' */
/* this can be readjusted for specific filetypes */
DEFINE_SIGNATURE(sig1, "0:7b20224d61676963223a2022434c414d4a534f4e")
/* search '"RootFileType": "CL_TYPE_PDF"' */
DEFINE_SIGNATURE(sig2, "22526f6f7446696c6554797065223a2022434c5f545950455f50444622")
SIGNATURES_END
bool logical_trigger(void)
{
return matches(Signatures.sig1) && !matches(Signatures.sig2);
}
#define STR_MAXLEN 256
int entrypoint ()
{
foundVirus("Submit");
return 0;
}

View file

@ -0,0 +1,134 @@
VIRUSNAME_PREFIX("SUBMIT.PE")
VIRUSNAMES("Root", "Embedded", "RootEmbedded")
/* Target type is 13, internal JSON properties */
TARGET(13)
/* JSON API call will require FUNC_LEVEL_098_5 = 78 */
FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_5)
SIGNATURES_DECL_BEGIN
DECLARE_SIGNATURE(sig1)
DECLARE_SIGNATURE(sig2)
SIGNATURES_DECL_END
SIGNATURES_DEF_BEGIN
/* search @offset 0 : '{ "Magic": "CLAMJSON' */
/* this can be readjusted for specific filetypes */
DEFINE_SIGNATURE(sig1, "0:7b20224d61676963223a2022434c414d4a534f4e")
/* search '"FileType": "CL_TYPE_MSEXE"' */
DEFINE_SIGNATURE(sig2, "2246696c6554797065223a2022434c5f545950455f4d5345584522")
SIGNATURES_END
bool logical_trigger(void)
{
return matches(Signatures.sig1) && matches(Signatures.sig2);
}
#define STR_MAXLEN 256
int entrypoint ()
{
int32_t i, root = 0, embedded = 0;
int32_t type, obj, strlen, objarr, objit, arrlen;
char str[STR_MAXLEN];
/* check is json is available, alerts on inactive (optional) */
if (!json_is_active()) {
return -1;
}
/* acquire array of internal contained objects */
obj = json_get_object("FileType", 8, 0);
if (obj <= 0) return -1;
/* acquire and check type */
type = json_get_type(obj);
if (type == JSON_TYPE_STRING) {
/* acquire string length, note +1 is for the NULL terminator */
strlen = json_get_string_length(obj)+1;
/* prevent buffer overflow */
if (strlen > STR_MAXLEN)
strlen = STR_MAXLEN;
/* acquire string data, note strlen includes NULL terminator */
if (json_get_string(str, strlen, obj)) {
/* debug print str (with '\n' and prepended message */
debug_print_str(str,strlen);
/* check the contained object's type */
if (strlen == 14 && !memcmp(str, "CL_TYPE_MSEXE", 14)) {
//if (!strcmp(str, strlen, "CL_TYPE_MSEXE", strlen)) {
/* alert for submission */
root = 1;
}
}
}
debug_print_uint(root);
/* acquire array of internal contained objects */
objarr = json_get_object("ContainedObjects", 16, 0);
if (objarr <= 0) {
if (root)
foundVirus("Root");
return 0;
}
type = json_get_type(objarr);
/* debug print uint (no '\n' or prepended message */
debug_print_uint(type);
if (type != JSON_TYPE_ARRAY) {
return -1;
}
/* check array length for iteration over elements */
arrlen = json_get_array_length(objarr);
for (i = 0; i < arrlen; ++i) {
/* acquire json object @ idx i */
objit = json_get_array_idx(i, objarr);
if (objit <= 0) continue;
/* acquire FileType object of the array element @ idx i */
obj = json_get_object("FileType", 8, objit);
if (obj <= 0) continue;
/* acquire and check type */
type = json_get_type(obj);
if (type == JSON_TYPE_STRING) {
/* acquire string length, note +1 is for the NULL terminator */
strlen = json_get_string_length(obj)+1;
/* prevent buffer overflow */
if (strlen > STR_MAXLEN)
strlen = STR_MAXLEN;
/* acquire string data, note strlen includes NULL terminator */
if (json_get_string(str, strlen, obj)) {
/* debug print str (with '\n' and prepended message */
debug_print_str(str,strlen);
/* check the contained object's type */
if (strlen == 14 && !memcmp(str, "CL_TYPE_MSEXE", 14)) {
//if (!strcmp(str, strlen, "CL_TYPE_MSEXE", strlen)) {
/* alert for submission */
embedded = 1;
break;
}
}
}
}
debug_print_uint(root);
debug_print_uint(embedded);
if (root && embedded) {
foundVirus("RootEmbedded");
}
else if (root) {
foundVirus("Root");
}
else if (embedded) {
foundVirus("Embedded");
}
return 0;
}

View file

@ -0,0 +1,28 @@
VIRUSNAME_PREFIX("SUBMIT")
VIRUSNAMES("Sandbox")
/* Target type is 13, internal JSON properties */
TARGET(13)
/* JSON API call will require FUNC_LEVEL_098_5 = 78 */
FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_5)
SIGNATURES_DECL_BEGIN
DECLARE_SIGNATURE(sig1)
SIGNATURES_DECL_END
SIGNATURES_DEF_BEGIN
/* search @offset 0 : '{ "Magic": "CLAMJSON' */
/* this can be readjusted for specific filetypes */
DEFINE_SIGNATURE(sig1, "0:7b20224d61676963223a2022434c414d4a534f4e")
SIGNATURES_END
bool logical_trigger(void)
{
return matches(Signatures.sig1);
}
int entrypoint ()
{
return 0;
}

View file

@ -1,29 +1,15 @@
VIRUSNAME_PREFIX("SUBMIT.PE")
VIRUSNAMES("Root", "Embedded", "RootEmbedded")
/* Target type is 13, internal JSON properties */
TARGET(13)
/* Target type is 0, all relevant files */
TARGET(0)
/* Declares to run bytecode only for preclassification (affecting only preclass files) */
PRECLASS_HOOK_DECLARE
/* JSON API call will require FUNC_LEVEL_098_5 = 78 */
FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_5)
SIGNATURES_DECL_BEGIN
DECLARE_SIGNATURE(sig1)
DECLARE_SIGNATURE(sig2)
SIGNATURES_DECL_END
SIGNATURES_DEF_BEGIN
/* search @offset 0 : '{ "Magic": "CLAMJSON' */
/* this can be readjusted for specific filetypes */
DEFINE_SIGNATURE(sig1, "0:7b20224d61676963223a2022434c414d4a534f4e")
/* search '"FileType": "CL_TYPE_MSEXE"' */
DEFINE_SIGNATURE(sig2, "2246696c6554797065223a2022434c5f545950455f4d5345584522")
SIGNATURES_END
bool logical_trigger(void)
{
return matches(Signatures.sig1) && matches(Signatures.sig2);
}
/* PRECLASS_HOOK_DECLARE will require FUNC_LEVEL_098_7 = 80 */
FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_7)
#define STR_MAXLEN 256

View file

@ -1,26 +1,15 @@
VIRUSNAME_PREFIX("SUBMIT")
VIRUSNAMES("Sandbox")
/* Target type is 13, internal JSON properties */
TARGET(13)
/* Target type is 0, all relevant files */
TARGET(0)
/* Declares to run bytecode only for preclassification (affecting only preclass files) */
PRECLASS_HOOK_DECLARE
/* JSON API call will require FUNC_LEVEL_098_5 = 78 */
FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_5)
SIGNATURES_DECL_BEGIN
DECLARE_SIGNATURE(sig1)
SIGNATURES_DECL_END
SIGNATURES_DEF_BEGIN
/* search @offset 0 : '{ "Magic": "CLAMJSON' */
/* this can be readjusted for specific filetypes */
DEFINE_SIGNATURE(sig1, "0:7b20224d61676963223a2022434c414d4a534f4e")
SIGNATURES_END
bool logical_trigger(void)
{
return matches(Signatures.sig1);
}
/* PRECLASS_HOOK_DECLARE will require FUNC_LEVEL_098_7 = 80 */
FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_7)
int entrypoint ()
{

View file

@ -462,7 +462,11 @@ libclamav_la_SOURCES = \
matcher-pcre.c \
matcher-pcre.h \
regex_pcre.c \
regex_pcre.h
regex_pcre.h \
msxml.c \
msxml.h \
msxml_parser.c \
msxml_parser.h
libclamav_la_SOURCES += bignum.h\
bignum_fast.h\

View file

@ -243,7 +243,8 @@ am_libclamav_la_OBJECTS = libclamav_la-matcher-ac.lo \
libclamav_la-yara_hash.lo libclamav_la-yara_grammar.lo \
libclamav_la-yara_lexer.lo libclamav_la-yara_parser.lo \
libclamav_la-msdoc.lo libclamav_la-matcher-pcre.lo \
libclamav_la-regex_pcre.lo libclamav_la-fp_add.lo \
libclamav_la-regex_pcre.lo libclamav_la-msxml.lo \
libclamav_la-msxml_parser.lo libclamav_la-fp_add.lo \
libclamav_la-fp_add_d.lo libclamav_la-fp_addmod.lo \
libclamav_la-fp_cmp.lo libclamav_la-fp_cmp_d.lo \
libclamav_la-fp_cmp_mag.lo libclamav_la-fp_sub.lo \
@ -839,10 +840,10 @@ libclamav_la_SOURCES = matcher-ac.c matcher-ac.h matcher-bm.c \
yara_hash.c yara_hash.h yara_grammar.y yara_lexer.l \
yara_lexer.h yara_parser.c yara_parser.h yara_clam.h msdoc.c \
msdoc.h matcher-pcre.c matcher-pcre.h regex_pcre.c \
regex_pcre.h bignum.h bignum_fast.h \
tomsfastmath/addsub/fp_add.c tomsfastmath/addsub/fp_add_d.c \
tomsfastmath/addsub/fp_addmod.c tomsfastmath/addsub/fp_cmp.c \
tomsfastmath/addsub/fp_cmp_d.c \
regex_pcre.h msxml.c msxml.h msxml_parser.c msxml_parser.h \
bignum.h bignum_fast.h tomsfastmath/addsub/fp_add.c \
tomsfastmath/addsub/fp_add_d.c tomsfastmath/addsub/fp_addmod.c \
tomsfastmath/addsub/fp_cmp.c tomsfastmath/addsub/fp_cmp_d.c \
tomsfastmath/addsub/fp_cmp_mag.c tomsfastmath/addsub/fp_sub.c \
tomsfastmath/addsub/fp_sub_d.c tomsfastmath/addsub/fp_submod.c \
tomsfastmath/addsub/s_fp_add.c tomsfastmath/addsub/s_fp_sub.c \
@ -1196,6 +1197,8 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-msdoc.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-msexpand.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-mspack.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-msxml.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-msxml_parser.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-nulsft.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-ole2_extract.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-ooxml.Plo@am__quote@
@ -2311,6 +2314,20 @@ libclamav_la-regex_pcre.lo: regex_pcre.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -c -o libclamav_la-regex_pcre.lo `test -f 'regex_pcre.c' || echo '$(srcdir)/'`regex_pcre.c
libclamav_la-msxml.lo: msxml.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -MT libclamav_la-msxml.lo -MD -MP -MF $(DEPDIR)/libclamav_la-msxml.Tpo -c -o libclamav_la-msxml.lo `test -f 'msxml.c' || echo '$(srcdir)/'`msxml.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libclamav_la-msxml.Tpo $(DEPDIR)/libclamav_la-msxml.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='msxml.c' object='libclamav_la-msxml.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -c -o libclamav_la-msxml.lo `test -f 'msxml.c' || echo '$(srcdir)/'`msxml.c
libclamav_la-msxml_parser.lo: msxml_parser.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -MT libclamav_la-msxml_parser.lo -MD -MP -MF $(DEPDIR)/libclamav_la-msxml_parser.Tpo -c -o libclamav_la-msxml_parser.lo `test -f 'msxml_parser.c' || echo '$(srcdir)/'`msxml_parser.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libclamav_la-msxml_parser.Tpo $(DEPDIR)/libclamav_la-msxml_parser.Plo
@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='msxml_parser.c' object='libclamav_la-msxml_parser.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -c -o libclamav_la-msxml_parser.lo `test -f 'msxml_parser.c' || echo '$(srcdir)/'`msxml_parser.c
libclamav_la-fp_add.lo: tomsfastmath/addsub/fp_add.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -MT libclamav_la-fp_add.lo -MD -MP -MF $(DEPDIR)/libclamav_la-fp_add.Tpo -c -o libclamav_la-fp_add.lo `test -f 'tomsfastmath/addsub/fp_add.c' || echo '$(srcdir)/'`tomsfastmath/addsub/fp_add.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libclamav_la-fp_add.Tpo $(DEPDIR)/libclamav_la-fp_add.Plo

View file

@ -2970,7 +2970,13 @@ void cli_bytecode_describe(const struct cli_bc *bc)
puts("logical only");
break;
case BC_PE_UNPACKER:
puts("PE hook");
puts("PE unpacker hook");
break;
case BC_PE_ALL:
puts("all PE hook");
break;
case BC_PRECLASS:
puts("preclass hook");
break;
default:
printf("Unknown (type %u)", bc->kind);
@ -3007,6 +3013,12 @@ void cli_bytecode_describe(const struct cli_bc *bc)
else
puts("all PE files!");
break;
case BC_PRECLASS:
if (bc->lsig)
puts("PRECLASS files matching logical signature");
else
puts("all PRECLASS files!");
break;
default:
puts("N/A (unknown type)\n");
break;

View file

@ -61,6 +61,9 @@ enum BytecodeKind {
/** specifies a PE hook, executes at a predetermined point in PE parsing for PE files,
* both packed and unpacked files */
BC_PE_ALL,
/** specifies a PRECLASS hook, executes at the end of file property collection and
* operates on the original file targeted for property collection */
BC_PRECLASS,
_BC_LAST_HOOK
};
@ -97,12 +100,13 @@ enum FunctionalityLevels {
FUNC_LEVEL_097_6 = 67, /**< LibClamAV release 0.97.6 */
FUNC_LEVEL_097_7 = 68, /**< LibClamAV release 0.97.7 */
FUNC_LEVEL_097_8 = 69, /**< LibClamAV release 0.97.8 */
FUNC_LEVEL_098_1 = 76, /**< LibClamAV release 0.98.2 */ /*last syncing to clamav*/
FUNC_LEVEL_098_1 = 76, /**< LibClamAV release 0.98.1 */ /*last syncing to clamav*/
FUNC_LEVEL_098_2 = 77, /**< LibClamAV release 0.98.2 */
FUNC_LEVEL_098_3 = 77, /**< LibClamAV release 0.98.3 */
FUNC_LEVEL_098_4 = 77, /**< LibClamAV release 0.98.4 */
FUNC_LEVEL_098_5 = 79, /**< LibClamAV release 0.98.5: JSON reading API requires this minimum level */
FUNC_LEVEL_098_6 = 79, /**< LibClamAV release 0.98.6 */
FUNC_LEVEL_098_7 = 80, /**< LibClamAV release 0.98.7: BC_PRECLASS bytecodes require minimum level */
FUNC_LEVEL_100 = 100 /*future release candidate*/
};
@ -111,7 +115,7 @@ enum FunctionalityLevels {
* Phase of PDF parsing used for PDF Hooks
*/
enum pdf_phase {
PDF_PHASE_NONE, /* not a PDF */
PDF_PHASE_NONE, /**< not a PDF */
PDF_PHASE_PARSED, /**< after parsing a PDF, object flags can be set etc. */
PDF_PHASE_POSTDUMP, /**< after an obj was dumped and scanned */
PDF_PHASE_END, /**< after the pdf scan finished */
@ -1123,14 +1127,14 @@ int32_t get_file_reliability(void);
/* ----------------- END 0.96.4 APIs ---------------------------------- */
/* ----------------- BEGIN 0.98.4 APIs -------------------------------- */
/* ----------------- JSON Parsing APIs -------------------------------- */
/*
/**
\group_json
* @return 0 - json is disabled or option not specified
* @return 1 - json is active and properties are available
*/
int32_t json_is_active(void);
/*
/**
\group_json
* @return objid of json object with specified name
* @return 0 if json object of specified name cannot be found
@ -1142,7 +1146,7 @@ int32_t json_is_active(void);
*/
int32_t json_get_object(const int8_t* name, int32_t name_len, int32_t objid);
/*
/**
\group_json
* @return type (json_type) of json object specified
* @return -1 if type unknown or invalid id
@ -1150,7 +1154,7 @@ int32_t json_get_object(const int8_t* name, int32_t name_len, int32_t objid);
*/
int32_t json_get_type(int32_t objid);
/*
/**
\group_json
* @return number of elements in the json array of objid
* @return -1 if an error has occurred
@ -1159,7 +1163,7 @@ int32_t json_get_type(int32_t objid);
*/
int32_t json_get_array_length(int32_t objid);
/*
/**
\group_json
* @return objid of json object at idx of json array of objid
* @return 0 if invalid idx
@ -1170,7 +1174,7 @@ int32_t json_get_array_length(int32_t objid);
*/
int32_t json_get_array_idx(int32_t idx, int32_t objid);
/*
/**
\group_json
* @return length of json string of objid, not including terminating null-character
* @return -1 if an error has occurred
@ -1179,7 +1183,7 @@ int32_t json_get_array_idx(int32_t idx, int32_t objid);
*/
int32_t json_get_string_length(int32_t objid);
/*
/**
\group_json
* @return number of characters transferred (capped by str_len),
* including terminating null-character
@ -1192,20 +1196,21 @@ int32_t json_get_string_length(int32_t objid);
*/
int32_t json_get_string(int8_t* str, int32_t str_len, int32_t objid);
/*
/**
\group_json
* @return boolean value of queried objid; will force other types to boolean
* @param[in] objid - id value of json object to query
*/
int32_t json_get_boolean(int32_t objid);
/*
/**
\group_json
* @return integer value of queried objid; will force other types to integer
* @param[in] objid - id value of json object to query
*/
int32_t json_get_int(int32_t objid);
//int64_t json_get_int64(int32_t objid);
/* bytecode does not support double type */
//double json_get_double(int32_t objid);

View file

@ -402,7 +402,7 @@ extern void cl_engine_set_clcb_meta(struct cl_engine *engine, clcb_meta callback
/* File properties callback */
typedef int (*clcb_file_props)(const char *j_propstr, int rc, void *cbdata);
extern void cl_engine_set_clcb_file_props(struct cl_engine *engine, clcb_file_props callback, void * cbdata);
extern void cl_engine_set_clcb_file_props(struct cl_engine *engine, clcb_file_props callback);
/* Statistics/intelligence gathering callbacks */
extern void cl_engine_set_stats_set_cbdata(struct cl_engine *engine, void *cbdata);

View file

@ -115,7 +115,13 @@ char *cl_base64_encode(void *data, size_t len)
size_t elen;
b64 = BIO_new(BIO_f_base64());
if (!(b64))
return NULL;
bio = BIO_new(BIO_s_mem());
if (!(bio)) {
BIO_free(b64);
return NULL;
}
bio = BIO_push(b64, bio);
BIO_write(bio, data, len);

View file

@ -119,6 +119,8 @@ static const struct ftmap_s {
{ "CL_TYPE_OOXML_XL", CL_TYPE_OOXML_XL },
{ "CL_TYPE_INTERNAL", CL_TYPE_INTERNAL },
{ "CL_TYPE_XDP", CL_TYPE_XDP },
{ "CL_TYPE_XML_WORD", CL_TYPE_XML_WORD },
{ "CL_TYPE_XML_XL", CL_TYPE_XML_XL },
{ NULL, CL_TYPE_IGNORED }
};

View file

@ -108,6 +108,8 @@ typedef enum {
CL_TYPE_GPT,
CL_TYPE_APM,
CL_TYPE_XDP,
CL_TYPE_XML_WORD,
CL_TYPE_XML_XL,
CL_TYPE_IGNORED /* please don't add anything below */
} cli_file_t;

View file

@ -160,7 +160,8 @@ static const char *ftypes_int[] = {
"0:0:377f0683002de218:SQLite WAL:CL_TYPE_ANY:CL_TYPE_IGNORED",
"0:0:53514c69746520666f726d6174203300:SQLite database:CL_TYPE_ANY:CL_TYPE_IGNORED",
"0:0:d9d505f920a163d7:SQLite journal:CL_TYPE_ANY:CL_TYPE_IGNORED",
"0:0:435753:SWF (compressed):CL_TYPE_ANY:CL_TYPE_SWF:71",
"0:0:5a5753:SWF (LZMA compressed):CL_TYPE_ANY:CL_TYPE_SWF:81",
"0:0:435753:SWF (zlib compressed):CL_TYPE_ANY:CL_TYPE_SWF:71",
"0:0:465753:SWF (uncompressed):CL_TYPE_ANY:CL_TYPE_SWF:71",
"0:0:4d53434600000000:MS CAB:CL_TYPE_ANY:CL_TYPE_MSCAB",
"1:*:4d53434600000000:CAB-SFX:CL_TYPE_ANY:CL_TYPE_CABSFX",
@ -182,6 +183,10 @@ static const char *ftypes_int[] = {
"1:0:4552{510}504D0000:Disk Image - Apple Partition Map:CL_TYPE_ANY:CL_TYPE_APM:77",
"0:0:7b20224d61676963223a2022434c414d4a534f4e763022:Internal properties:CL_TYPE_ANY:CL_TYPE_INTERNAL:78",
"1:*:3c7864703a786470:Adobe XDP - Embedded PDF:CL_TYPE_ANY:CL_TYPE_XDP:79",
"1:0:3c3f786d6c2076657273696f6e3d22312e3022{0-1024}3c776f7264446f63756d656e74:Microsoft Word 2003 XML Document:CL_TYPE_ANY:CL_TYPE_XML_WORD:80",
"1:0:3c3f786d6c2076657273696f6e3d22312e3022{0-1024}3c??3a776f7264446f63756d656e74:Microsoft Word 2003 XML Document:CL_TYPE_ANY:CL_TYPE_XML_WORD:80",
"1:0:3c3f786d6c2076657273696f6e3d22312e3022{0-1024}3c576f726b626f6f6b:Microsoft Excel 2003 XML Document:CL_TYPE_ANY:CL_TYPE_XML_XL:80",
"1:0:3c3f786d6c2076657273696f6e3d22312e3022{0-1024}3c??3a576f726b626f6f6b:Microsoft Excel 2003 XML Document:CL_TYPE_ANY:CL_TYPE_XML_XL:80",
NULL
};

View file

@ -47,7 +47,7 @@
#include <ifaddrs.h>
#endif
#if defined(SIOCGIFHWADDR)
#if defined(SIOCGIFHWADDR) && !defined(__GNU__)
#if defined(_AIX)
#include <sys/ndd_var.h>
#include <sys/kinfo.h>
@ -116,7 +116,7 @@ struct device *get_devices(void)
uint8_t *mac;
int sock;
#if defined(SIOCGIFHWADDR)
#if defined(SIOCGIFHWADDR) && !defined(__GNU__)
struct ifreq ifr;
#else
struct sockaddr_dl *sdl;
@ -155,7 +155,7 @@ struct device *get_devices(void)
* Instead, Linux uses its own ioctl. This code only runs if we're not Linux,
* Windows, or FreeBSD.
*/
#if !defined(SIOCGIFHWADDR)
#if !defined(SIOCGIFHWADDR) || defined(__GNU__)
for (i=0; i < ndevices; i++) {
if (!(strcmp(devices[i].name, addr->ifa_name))) {
sdl = (struct sockaddr_dl *)(addr->ifa_addr);
@ -180,7 +180,7 @@ struct device *get_devices(void)
}
/* This is the Linux version of getting the MAC addresses */
#if defined(SIOCGIFHWADDR)
#if defined(SIOCGIFHWADDR) && !defined(__GNU__)
for (i=0; i < ndevices; i++) {
if (!(devices[i].name))
continue;

View file

@ -118,6 +118,7 @@ static char *iso_string(iso9660_t *iso, const void *src, unsigned int len) {
static int iso_parse_dir(iso9660_t *iso, unsigned int block, unsigned int len) {
cli_ctx *ctx = iso->ctx;
int ret = CL_CLEAN;
int viruses_found = 0;
if(len < 34) {
cli_dbgmsg("iso_parse_dir: Directory too small, skipping\n");
@ -182,10 +183,13 @@ static int iso_parse_dir(iso9660_t *iso, unsigned int block, unsigned int len) {
filesz = cli_readint32(dir+10);
cli_dbgmsg("iso_parse_dir: %s '%s': off %x - size %x - flags %x - unit size %x - gap size %x - volume %u\n", (dir[25] & 2) ? "Directory" : "File", iso->buf, fileoff, filesz, dir[25], dir[26], dir[27], cli_readint32(&dir[28]) & 0xffff);
if(cli_matchmeta(ctx, iso->buf, filesz, filesz, 0, 0, 0, NULL) == CL_VIRUS) {
ret = CL_VIRUS;
break;
}
ret = cli_matchmeta(ctx, iso->buf, filesz, filesz, 0, 0, 0, NULL);
if (ret == CL_VIRUS) {
viruses_found = 1;
if (!SCAN_ALL)
break;
ret = CL_CLEAN;
}
if(dir[26] || dir[27])
cli_dbgmsg("iso_parse_dir: Skipping interleaved file\n");
@ -199,6 +203,12 @@ static int iso_parse_dir(iso9660_t *iso, unsigned int block, unsigned int len) {
else
ret = iso_scan_file(iso, fileoff, filesz);
}
if (ret == CL_VIRUS) {
viruses_found = 1;
if (!SCAN_ALL)
break;
ret = CL_CLEAN;
}
}
dirsz -= entrysz;
dir += entrysz;
@ -206,6 +216,8 @@ static int iso_parse_dir(iso9660_t *iso, unsigned int block, unsigned int len) {
fmap_unneed_ptr(*ctx->fmap, dir_orig, iso->blocksz);
}
if (viruses_found == 1)
return CL_VIRUS;
return ret;
}

View file

@ -46,6 +46,21 @@ int cli_json_timeout_cycle_check(cli_ctx *ctx, int *toval)
return CL_SUCCESS;
}
int cli_json_parse_error(json_object *root, const char *errstr)
{
json_object *perr;
if (!root)
return CL_SUCCESS; /* CL_ENULLARG? */
perr = cli_jsonarray(root, "ParseErrors");
if (perr == NULL) {
return CL_EMEM;
}
return cli_jsonstr(perr, NULL, errstr);
}
int cli_jsonnull(json_object *obj, const char* key)
{
json_type objty;

View file

@ -37,6 +37,7 @@
#define JSON_TIMEOUT_SKIP_CYCLES 3
int cli_json_timeout_cycle_check(cli_ctx *ctx, int *toval);
int cli_json_parse_error(json_object *root, const char *errstr);
int cli_jsonnull(json_object *obj, const char* key);
int cli_jsonstr(json_object *obj, const char* key, const char* s);

View file

@ -245,7 +245,7 @@ void cli_bm_free(struct cli_matcher *root)
}
}
int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cli_bm_patt **patt, const struct cli_matcher *root, uint32_t offset, const struct cli_target_info *info, struct cli_bm_off *offdata, uint32_t *viroffset)
int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cli_bm_patt **patt, const struct cli_matcher *root, uint32_t offset, const struct cli_target_info *info, struct cli_bm_off *offdata, cli_ctx *ctx)
{
uint32_t i, j, off, off_min, off_max;
uint8_t found, pchain, shift;
@ -253,7 +253,7 @@ int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
struct cli_bm_patt *p;
const unsigned char *bp, *pt;
unsigned char prefix;
int ret;
int ret, viruses_found = 0;
if(!root || !root->bm_shift)
return CL_CLEAN;
@ -285,8 +285,11 @@ int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
if(offdata) {
off = offset + i - BM_MIN_LENGTH + BM_BLOCK_SIZE;
for(; offdata->pos < offdata->cnt && off >= offdata->offtab[offdata->pos]; offdata->pos++);
if(offdata->pos == offdata->cnt || off >= offdata->offtab[offdata->pos])
if(offdata->pos == offdata->cnt || off >= offdata->offtab[offdata->pos]) {
if (viruses_found)
return CL_VIRUS;
return CL_CLEAN;
}
i += offdata->offtab[offdata->pos] - off;
} else {
i++;
@ -377,12 +380,18 @@ int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
}
if(virname) {
*virname = p->virname;
if(viroffset)
*viroffset = offset + i + j - BM_MIN_LENGTH + BM_BLOCK_SIZE;
if(ctx != NULL && SCAN_ALL) {
cli_append_virus(ctx, *virname);
//*viroffset = offset + i + j - BM_MIN_LENGTH + BM_BLOCK_SIZE;
}
}
if(patt)
*patt = p;
return CL_VIRUS;
viruses_found = 1;
if(ctx != NULL && !SCAN_ALL)
return CL_VIRUS;
}
p = p->next;
}
@ -392,8 +401,11 @@ int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
if(offdata) {
off = offset + i - BM_MIN_LENGTH + BM_BLOCK_SIZE;
for(; offdata->pos < offdata->cnt && off >= offdata->offtab[offdata->pos]; offdata->pos++);
if(offdata->pos == offdata->cnt || off >= offdata->offtab[offdata->pos])
if(offdata->pos == offdata->cnt || off >= offdata->offtab[offdata->pos]) {
if (viruses_found)
return CL_VIRUS;
return CL_CLEAN;
}
i += offdata->offtab[offdata->pos] - off;
} else {
i += shift;
@ -401,5 +413,7 @@ int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
}
if (viruses_found)
return CL_VIRUS;
return CL_CLEAN;
}

View file

@ -25,6 +25,7 @@
#include "filetypes.h"
#include "cltypes.h"
#include "fmap.h"
#include "others.h"
#define BM_BOUNDARY_EOL 1
@ -47,7 +48,7 @@ int cli_bm_addpatt(struct cli_matcher *root, struct cli_bm_patt *pattern, const
int cli_bm_init(struct cli_matcher *root);
int cli_bm_initoff(const struct cli_matcher *root, struct cli_bm_off *data, const struct cli_target_info *info);
void cli_bm_freeoff(struct cli_bm_off *data);
int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cli_bm_patt **patt, const struct cli_matcher *root, uint32_t offset, const struct cli_target_info *info, struct cli_bm_off *offdata, uint32_t *viroffset);
int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cli_bm_patt **patt, const struct cli_matcher *root, uint32_t offset, const struct cli_target_info *info, struct cli_bm_off *offdata, cli_ctx *ctx);
void cli_bm_free(struct cli_matcher *root);
#endif

View file

@ -104,7 +104,6 @@ static inline int matcher_run(const struct cli_matcher *root,
fmap_t *map,
struct cli_bm_off *offdata,
struct cli_pcre_off *poffdata,
uint32_t *viroffset,
cli_ctx *ctx)
{
int ret, tmp;
@ -143,17 +142,20 @@ static inline int matcher_run(const struct cli_matcher *root,
/* Don't use prefiltering for BM offset mode, since BM keeps tracks
* of offsets itself, and doesn't work if we skip chunks of input
* data */
ret = cli_bm_scanbuff(orig_buffer, orig_length, virname, NULL, root, orig_offset, tinfo, offdata, viroffset);
ret = cli_bm_scanbuff(orig_buffer, orig_length, virname, NULL, root, orig_offset, tinfo, offdata, ctx);
} else {
ret = cli_bm_scanbuff(buffer, length, virname, NULL, root, offset, tinfo, offdata, viroffset);
ret = cli_bm_scanbuff(buffer, length, virname, NULL, root, offset, tinfo, offdata, ctx);
}
if (ret == CL_VIRUS) {
if (ctx) {
if (ret != CL_CLEAN) {
if (ret != CL_VIRUS)
return ret;
/* else (ret == CL_VIRUS) */
if (SCAN_ALL)
viruses_found = 1;
else {
cli_append_virus(ctx, *virname);
if (SCAN_ALL)
viruses_found++;
else
return ret;
return ret;
}
}
}
@ -253,7 +255,7 @@ int cli_scanbuff(const unsigned char *buffer, uint32_t length, uint32_t offset,
if(!acdata && (ret = cli_ac_initdata(&mdata, troot->ac_partsigs, troot->ac_lsigs, troot->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN)))
return ret;
ret = matcher_run(troot, buffer, length, &virname, acdata ? (acdata[0]): (&mdata), offset, NULL, ftype, NULL, AC_SCAN_VIR, PCRE_SCAN_BUFF, NULL, *ctx->fmap, NULL, NULL, NULL, ctx);
ret = matcher_run(troot, buffer, length, &virname, acdata ? (acdata[0]): (&mdata), offset, NULL, ftype, NULL, AC_SCAN_VIR, PCRE_SCAN_BUFF, NULL, *ctx->fmap, NULL, NULL, ctx);
if(!acdata)
cli_ac_freedata(&mdata);
@ -273,7 +275,7 @@ int cli_scanbuff(const unsigned char *buffer, uint32_t length, uint32_t offset,
if(!acdata && (ret = cli_ac_initdata(&mdata, groot->ac_partsigs, groot->ac_lsigs, groot->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN)))
return ret;
ret = matcher_run(groot, buffer, length, &virname, acdata ? (acdata[1]): (&mdata), offset, NULL, ftype, NULL, AC_SCAN_VIR, PCRE_SCAN_BUFF, NULL, *ctx->fmap, NULL, NULL, NULL, ctx);
ret = matcher_run(groot, buffer, length, &virname, acdata ? (acdata[1]): (&mdata), offset, NULL, ftype, NULL, AC_SCAN_VIR, PCRE_SCAN_BUFF, NULL, *ctx->fmap, NULL, NULL, ctx);
if(!acdata)
cli_ac_freedata(&mdata);
@ -813,7 +815,6 @@ int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli
fmap_t *map = *ctx->fmap;
struct cli_matcher *hdb, *fp;
const char *virname = NULL;
uint32_t viroffset = 0;
uint32_t viruses_found = 0;
void *md5ctx, *sha1ctx, *sha256ctx;
@ -993,8 +994,7 @@ int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli
if(troot) {
virname = NULL;
viroffset = 0;
ret = matcher_run(troot, buff, bytes, &virname, &tdata, offset, &info, ftype, ftoffset, acmode, PCRE_SCAN_FMAP, acres, map, bm_offmode ? &toff : NULL, &tpoff, &viroffset, ctx);
ret = matcher_run(troot, buff, bytes, &virname, &tdata, offset, &info, ftype, ftoffset, acmode, PCRE_SCAN_FMAP, acres, map, bm_offmode ? &toff : NULL, &tpoff, ctx);
if (virname) {
/* virname already appended by matcher_run */
@ -1024,8 +1024,7 @@ int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli
if(!ftonly) {
virname = NULL;
viroffset = 0;
ret = matcher_run(groot, buff, bytes, &virname, &gdata, offset, &info, ftype, ftoffset, acmode, PCRE_SCAN_FMAP, acres, map, NULL, &gpoff, &viroffset, ctx);
ret = matcher_run(groot, buff, bytes, &virname, &gdata, offset, &info, ftype, ftoffset, acmode, PCRE_SCAN_FMAP, acres, map, NULL, &gpoff, ctx);
if (virname) {
/* virname already appended by matcher_run */
@ -1054,7 +1053,9 @@ int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli
type = ret;
}
if(hdb) {
/* if (bytes <= (maxpatlen * (offset!=0))), it means the last window finished the file hashing *
* since the last window is responsible for adding intersection between windows (maxpatlen) */
if(hdb && (bytes > (maxpatlen * (offset!=0)))) {
const void *data = buff + maxpatlen * (offset!=0);
uint32_t data_len = bytes - maxpatlen * (offset!=0);
@ -1067,11 +1068,6 @@ int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli
}
}
if(SCAN_ALL && viroffset) {
offset = viroffset;
continue;
}
if(bytes < SCANBUFF)
break;

View file

@ -3,7 +3,7 @@
*
* Copyright (C) 2007-2013 Sourcefire, Inc.
*
* Authors: Trog
* Authors: Kevin Lin
*
* This program is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License version 2 as published by the

View file

@ -3,7 +3,7 @@
*
* Copyright (C) 2007-2008 Sourcefire, Inc.
*
* Authors: Trog
* Authors: Kevin Lin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as

285
libclamav/msxml.c Normal file
View file

@ -0,0 +1,285 @@
/*
* Extract component parts of MS XML files (e.g. MS Office 2003 XML Documents)
*
* Copyright (C) 2007-2013 Sourcefire, Inc.
*
* Authors: Kevin Lin
*
* This program is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License version 2 as published by the
* Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include "clamav.h"
#include "others.h"
#include "conv.h"
#include "json_api.h"
#include "msxml.h"
#include "msxml_parser.h"
#if HAVE_LIBXML2
#ifdef _WIN32
#ifndef LIBXML_WRITER_ENABLED
#define LIBXML_WRITER_ENABLED 1
#endif
#endif
#include <libxml/xmlreader.h>
#define MSXML_VERBIOSE 0
#if MSXML_VERBIOSE
#define cli_msxmlmsg(...) cli_dbgmsg(__VA_ARGS__)
#else
#define cli_msxmlmsg(...)
#endif
#define MSXML_READBUFF SCANBUFF
static const struct key_entry msxml_keys[] = {
{ "worddocument", "WordDocument", MSXML_JSON_ROOT | MSXML_JSON_ATTRIB },
{ "workbook", "Workbook", MSXML_JSON_ROOT | MSXML_JSON_ATTRIB },
{ "bindata", "BinaryData", MSXML_SCAN_B64 | MSXML_JSON_COUNT | MSXML_JSON_ROOT },
{ "documentproperties", "DocumentProperties", MSXML_JSON_ROOT },
{ "author", "Author", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "lastauthor", "LastAuthor", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "revision", "Revision", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "totaltime", "TotalTime", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "created", "Created", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "lastsaved", "LastSaved", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "pages", "Pages", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "words", "Words", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "characters", "Characters", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "lines", "Lines", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "paragraph", "Paragraph", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "characterswithspaces", "CharactersWithSpaces", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "version", "Version", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "allowpng", "AllowPNG", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "fonts", "Fonts", MSXML_IGNORE_ELEM },
{ "styles", "Styles", MSXML_IGNORE_ELEM }
};
static size_t num_msxml_keys = sizeof(msxml_keys) / sizeof(struct key_entry);
enum msxml_state {
MSXML_STATE_NORMAL = 0,
MSXML_STATE_ENTITY_START_1,
MSXML_STATE_ENTITY_START_2,
MSXML_STATE_ENTITY_HEX,
MSXML_STATE_ENTITY_DEC,
MSXML_STATE_ENTITY_CLOSE,
MSXML_STATE_ENTITY_NONE
};
struct msxml_cbdata {
enum msxml_state state;
fmap_t *map;
const unsigned char *window;
off_t winpos, mappos;
size_t winsize;
};
static inline size_t msxml_read_cb_new_window(struct msxml_cbdata *cbdata)
{
const unsigned char *new_window = NULL;
off_t new_mappos;
size_t bytes;
if (cbdata->mappos == cbdata->map->len) {
cli_msxmlmsg("msxml_read_cb: fmap REALLY EOF\n");
return 0;
}
new_mappos = cbdata->mappos + cbdata->winsize;
bytes = MIN(cbdata->map->len - new_mappos, MSXML_READBUFF);
if (!bytes) {
cbdata->window = NULL;
cbdata->winpos = 0;
cbdata->mappos = cbdata->map->len;
cbdata->winsize = 0;
cli_msxmlmsg("msxml_read_cb: fmap EOF\n");
return 0;
}
new_window = fmap_need_off_once(cbdata->map, new_mappos, bytes);
if (!new_window) {
cli_errmsg("msxml_read_cb: cannot acquire new window for fmap\n");
return -1;
}
cbdata->window = new_window;
cbdata->winpos = 0;
cbdata->mappos = new_mappos;
cbdata->winsize = bytes;
cli_msxmlmsg("msxml_read_cb: acquired new window @ [%llu(+%llu)(max:%llu)]\n",
(long long unsigned)cbdata->mappos, (long long unsigned)(cbdata->mappos + cbdata->winsize),
(long long unsigned)cbdata->map->len);
return bytes;
}
int msxml_read_cb(void *ctx, char *buffer, int len)
{
struct msxml_cbdata *cbdata = (struct msxml_cbdata *)ctx;
size_t wbytes, rbytes;
int winret;
cli_msxmlmsg("msxml_read_cb called\n");
/* initial iteration */
if (!cbdata->window) {
if ((winret = msxml_read_cb_new_window(cbdata)) <= 0)
return winret;
}
cli_msxmlmsg("msxml_read_cb: requested %d bytes from offset %llu\n", len, (long long unsigned)(cbdata->mappos+cbdata->winpos));
wbytes = 0;
rbytes = cbdata->winsize - cbdata->winpos;
/* copying loop with preprocessing */
while (wbytes < len) {
const unsigned char *read_from;
char *write_to = buffer + wbytes;
enum msxml_state *state;
#if MSXML_VERBIOSE
size_t written;
#endif
if (!rbytes) {
if ((winret = msxml_read_cb_new_window(cbdata)) < 0)
return winret;
if (winret == 0) {
cli_msxmlmsg("msxml_read_cb: propagating fmap EOF [%llu]\n", (long long unsigned)wbytes);
return (int)wbytes;
}
rbytes = cbdata->winsize;
}
#if MSXML_VERBIOSE
written = MIN(rbytes, len - wbytes);
cli_msxmlmsg("msxml_read_cb: copying from window [%llu(+%llu)] %llu->~%llu\n",
(long long unsigned)(cbdata->winsize - rbytes), (long long unsigned)cbdata->winsize,
(long long unsigned)cbdata->winpos, (long long unsigned)(cbdata->winpos + written));
#endif
read_from = cbdata->window + cbdata->winpos;
state = &(cbdata->state);
while (rbytes > 0 && wbytes < len) {
switch (*state) {
case MSXML_STATE_NORMAL:
if ((*read_from) == '&')
*state = MSXML_STATE_ENTITY_START_1;
break;
case MSXML_STATE_ENTITY_START_1:
if ((*read_from) == '#')
*state = MSXML_STATE_ENTITY_START_2;
else
*state = MSXML_STATE_NORMAL;
break;
case MSXML_STATE_ENTITY_START_2:
if ((*read_from) == 'x')
*state = MSXML_STATE_ENTITY_HEX;
else if (((*read_from) >= '0') && ((*read_from) <= '9'))
*state = MSXML_STATE_ENTITY_DEC;
else
*state = MSXML_STATE_NORMAL;
break;
case MSXML_STATE_ENTITY_HEX:
if ((((*read_from) >= '0') && ((*read_from) <= '9')) ||
(((*read_from) >= 'a') && ((*read_from) <= 'f')) ||
(((*read_from) >= 'A') && ((*read_from) <= 'F'))) {}
else
*state = MSXML_STATE_ENTITY_CLOSE;
break;
case MSXML_STATE_ENTITY_DEC:
if (((*read_from) >= '0') && ((*read_from) <= '9')) {}
else
*state = MSXML_STATE_ENTITY_CLOSE;
break;
default:
cli_errmsg("unknown *state: %d\n", *state);
}
if (*state == MSXML_STATE_ENTITY_CLOSE) {
if ((*read_from) != ';') {
cli_msxmlmsg("msxml_read_cb: detected unterminated character entity @ winoff %d\n",
(int)(read_from - cbdata->window));
(*write_to++) = ';';
wbytes++;
}
*state = MSXML_STATE_NORMAL;
if (wbytes >= len)
break;
}
*(write_to++) = *(read_from++);
rbytes--;
wbytes++;
}
}
cbdata->winpos = cbdata->winsize - rbytes;
return (int)wbytes;
}
#endif
int cli_scanmsxml(cli_ctx *ctx)
{
#if HAVE_LIBXML2
struct msxml_cbdata cbdata;
xmlTextReaderPtr reader = NULL;
int state, ret = CL_SUCCESS;
cli_dbgmsg("in cli_scanmsxml()\n");
if (!ctx)
return CL_ENULLARG;
memset(&cbdata, 0, sizeof(cbdata));
cbdata.map = *ctx->fmap;
reader = xmlReaderForIO(msxml_read_cb, NULL, &cbdata, "msxml.xml", NULL, CLAMAV_MIN_XMLREADER_FLAGS);
if (!reader) {
cli_dbgmsg("cli_scanmsxml: cannot intialize xmlReader\n");
#if HAVE_JSON
ret = cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_XML_READER_IO");
#endif
return ret; // libxml2 failed!
}
ret = cli_msxml_parse_document(ctx, reader, msxml_keys, num_msxml_keys, 1);
xmlTextReaderClose(reader);
xmlFreeTextReader(reader);
return ret;
#else
UNUSEDPARAM(ctx);
cli_dbgmsg("in cli_scanmsxml()\n");
cli_dbgmsg("cli_scanmsxml: scanning msxml documents requires libxml2!\n");
return CL_SUCCESS;
#endif
}

34
libclamav/msxml.h Normal file
View file

@ -0,0 +1,34 @@
/*
* Extract component parts of MS XML files (e.g. MS Office 2003 XML Documents)
*
* Copyright (C) 2007-2008 Sourcefire, Inc.
*
* Authors: Kevin Lin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef __MSXML_H
#define __MSXML_H
#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif
#include "others.h"
int cli_scanmsxml(cli_ctx *ctx);
#endif /* __MSXML_H */

543
libclamav/msxml_parser.c Normal file
View file

@ -0,0 +1,543 @@
/*
* Extract component parts of MS XML files (e.g. MS Office 2003 XML Documents)
*
* Copyright (C) 2007-2013 Sourcefire, Inc.
*
* Authors: Kevin Lin
*
* This program is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License version 2 as published by the
* Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include "clamav.h"
#include "others.h"
#include "conv.h"
#include "scanners.h"
#include "json_api.h"
#include "msxml_parser.h"
#if HAVE_LIBXML2
#ifdef _WIN32
#ifndef LIBXML_WRITER_ENABLED
#define LIBXML_WRITER_ENABLED 1
#endif
#endif
#include <libxml/xmlreader.h>
#define MSXML_VERBIOSE 0
#if MSXML_VERBIOSE
#define cli_msxmlmsg(...) cli_dbgmsg(__VA_ARGS__)
#else
#define cli_msxmlmsg(...)
#endif
#define check_state(state) \
do { \
if (state == -1) { \
cli_warnmsg("check_state[msxml]: CL_EPARSE @ ln%d\n", __LINE__); \
return CL_EPARSE; \
} \
else if (state == 0) { \
cli_dbgmsg("check_state[msxml]: CL_BREAK @ ln%d\n", __LINE__); \
return CL_BREAK; \
} \
} while(0)
struct key_entry blank_key = { NULL, NULL, 0 };
static const struct key_entry *msxml_check_key(struct msxml_ctx *mxctx, const xmlChar *key, size_t keylen)
{
unsigned i;
if (keylen > MSXML_JSON_STRLEN_MAX-1) {
cli_dbgmsg("msxml_check_key: key name too long\n");
return &blank_key;
}
for (i = 0; i < mxctx->num_keys; ++i) {
if (keylen == strlen(mxctx->keys[i].key) && !strncasecmp((char *)key, mxctx->keys[i].key, keylen)) {
return &mxctx->keys[i];
}
}
return &blank_key;
}
static void msxml_error_handler(void* arg, const char* msg, xmlParserSeverities severity, xmlTextReaderLocatorPtr locator)
{
int line = xmlTextReaderLocatorLineNumber(locator);
xmlChar *URI = xmlTextReaderLocatorBaseURI(locator);
switch (severity) {
case XML_PARSER_SEVERITY_WARNING:
case XML_PARSER_SEVERITY_VALIDITY_WARNING:
cli_warnmsg("%s:%d: parser warning : %s", (char*)URI, line, msg);
break;
case XML_PARSER_SEVERITY_ERROR:
case XML_PARSER_SEVERITY_VALIDITY_ERROR:
cli_warnmsg("%s:%d: parser error : %s", (char*)URI, line, msg);
break;
default:
cli_dbgmsg("%s:%d: unknown severity : %s", (char*)URI, line, msg);
break;
}
free(URI);
}
#if HAVE_JSON
static int msxml_is_int(const char *value, size_t len, int32_t *val)
{
long val2;
char *endptr = NULL;
val2 = strtol(value, &endptr, 10);
if (endptr != value+len) {
return 0;
}
*val = (int32_t)(val2 & 0x0000ffff);
return 1;
}
static int msxml_parse_value(json_object *wrkptr, const char *arrname, const xmlChar *node_value)
{
json_object *newobj, *arrobj;
int val;
if (!wrkptr)
return CL_ENULLARG;
arrobj = cli_jsonarray(wrkptr, arrname);
if (arrobj == NULL) {
return CL_EMEM;
}
if (msxml_is_int((const char *)node_value, xmlStrlen(node_value), &val)) {
newobj = json_object_new_int(val);
}
else if (!xmlStrcmp(node_value, (const xmlChar *)"true")) {
newobj = json_object_new_boolean(1);
}
else if (!xmlStrcmp(node_value, (const xmlChar *)"false")) {
newobj = json_object_new_boolean(0);
}
else {
newobj = json_object_new_string((const char *)node_value);
}
if (NULL == newobj) {
cli_errmsg("msxml_parse_value: no memory for json value for [%s]\n", arrname);
return CL_EMEM;
}
json_object_array_add(arrobj, newobj);
return CL_SUCCESS;
}
#endif /* HAVE_JSON */
static int msxml_parse_element(struct msxml_ctx *mxctx, xmlTextReaderPtr reader, int rlvl, void *jptr)
{
const xmlChar *element_name = NULL;
const xmlChar *node_name = NULL, *node_value = NULL;
const struct key_entry *keyinfo;
int ret, virus = 0, state, node_type, endtag = 0;
cli_ctx *ctx = mxctx->ctx;
#if HAVE_JSON
json_object *parent = (json_object *)jptr;
json_object *thisjobj = NULL;
#else
void *thisjobj = NULL;
#endif
cli_msxmlmsg("in msxml_parse_element @ layer %d\n", rlvl);
/* check recursion level */
if (rlvl >= MSXML_RECLEVEL_MAX) {
cli_dbgmsg("msxml_parse_element: reached msxml json recursion limit\n");
#if HAVE_JSON
if (mxctx->mode) {
int tmp = cli_json_parse_error(mxctx->root, "MSXML_RECURSIVE_LIMIT");
if (tmp != CL_SUCCESS)
return tmp;
}
#endif
/* skip it */
state = xmlTextReaderNext(reader);
check_state(state);
return CL_SUCCESS;
}
/* acquire element type */
node_type = xmlTextReaderNodeType(reader);
if (node_type == -1)
return CL_EPARSE;
node_name = xmlTextReaderConstLocalName(reader);
node_value = xmlTextReaderConstValue(reader);
/* branch on node type */
switch (node_type) {
case XML_READER_TYPE_ELEMENT:
cli_msxmlmsg("msxml_parse_element: ELEMENT %s [%d]: %s\n", node_name, node_type, node_value);
/* storing the element name for verification/collection */
element_name = node_name;
if (!element_name) {
cli_dbgmsg("msxml_parse_element: element tag node nameless\n");
#if HAVE_JSON
if (mxctx->mode) {
int tmp = cli_json_parse_error(mxctx->root, "MSXML_NAMELESS_ELEMENT");
if (tmp != CL_SUCCESS)
return tmp;
}
#endif
return CL_EPARSE; /* no name, nameless */
}
/* determine if the element is interesting */
keyinfo = msxml_check_key(mxctx, element_name, xmlStrlen(element_name));
cli_msxmlmsg("key: %s\n", keyinfo->key);
cli_msxmlmsg("name: %s\n", keyinfo->name);
cli_msxmlmsg("type: 0x%x\n", keyinfo->type);
/* element and contents are ignored */
if (keyinfo->type & MSXML_IGNORE_ELEM) {
cli_msxmlmsg("msxml_parse_element: IGNORING ELEMENT %s\n", keyinfo->name);
state = xmlTextReaderNext(reader);
check_state(state);
return CL_SUCCESS;
}
#if HAVE_JSON
if (mxctx->mode && (keyinfo->type & MSXML_JSON_TRACK)) {
if (keyinfo->type & MSXML_JSON_ROOT)
thisjobj = cli_jsonobj(mxctx->root, keyinfo->name);
else if (keyinfo->type & MSXML_JSON_WRKPTR)
thisjobj = cli_jsonobj(parent, keyinfo->name);
if (!thisjobj) {
return CL_EMEM;
}
cli_msxmlmsg("msxml_parse_element: generated json object [%s]\n", keyinfo->name);
/* count this element */
if (thisjobj && (keyinfo->type & MSXML_JSON_COUNT)) {
json_object *counter = NULL;
if (!json_object_object_get_ex(thisjobj, "Count", &counter)) { /* object not found */
cli_jsonint(thisjobj, "Count", 1);
} else {
int value = json_object_get_int(counter);
cli_jsonint(thisjobj, "Count", value+1);
}
cli_msxmlmsg("msxml_parse_element: retrieved json object [Count]\n");
}
/* handle attributes */
if (thisjobj && (keyinfo->type & MSXML_JSON_ATTRIB)) {
state = xmlTextReaderHasAttributes(reader);
if (state == 1) {
json_object *attributes;
const xmlChar *name, *value;
attributes = cli_jsonobj(thisjobj, "Attributes");
if (!attributes) {
return CL_EPARSE;
}
cli_msxmlmsg("msxml_parse_element: retrieved json object [Attributes]\n");
while (xmlTextReaderMoveToNextAttribute(reader) == 1) {
name = xmlTextReaderConstLocalName(reader);
value = xmlTextReaderConstValue(reader);
cli_msxmlmsg("\t%s: %s\n", name, value);
cli_jsonstr(attributes, name, (const char *)value);
}
}
else if (state == -1)
return CL_EPARSE;
}
}
#endif
/* check self-containment */
state = xmlTextReaderMoveToElement(reader);
if (state == -1)
return CL_EPARSE;
state = xmlTextReaderIsEmptyElement(reader);
if (state == 1) {
cli_msxmlmsg("msxml_parse_element: SELF-CLOSING\n");
state = xmlTextReaderNext(reader);
check_state(state);
return CL_SUCCESS;
} else if (state == -1)
return CL_EPARSE;
/* advance to first content node */
state = xmlTextReaderRead(reader);
check_state(state);
while (!endtag) {
#if HAVE_JSON
if (mxctx->mode && (cli_json_timeout_cycle_check(mxctx->ctx, &(mxctx->toval)) != CL_SUCCESS))
return CL_ETIMEOUT;
#endif
node_type = xmlTextReaderNodeType(reader);
if (node_type == -1)
return CL_EPARSE;
switch (node_type) {
case XML_READER_TYPE_ELEMENT:
ret = msxml_parse_element(mxctx, reader, rlvl+1, thisjobj);
if (ret != CL_SUCCESS || (!SCAN_ALL && ret == CL_VIRUS)) {
return ret;
} else if (SCAN_ALL && ret == CL_VIRUS) {
virus = 1;
}
break;
case XML_READER_TYPE_TEXT:
node_value = xmlTextReaderConstValue(reader);
cli_msxmlmsg("TEXT: %s\n", node_value);
#if HAVE_JSON
if (thisjobj && (keyinfo->type & MSXML_JSON_VALUE)) {
ret = msxml_parse_value(thisjobj, "Value", node_value);
if (ret != CL_SUCCESS)
return ret;
cli_msxmlmsg("msxml_parse_element: added json value [%s: %s]\n", keyinfo->name, (const char *)node_value);
}
#endif
/* scanning protocol for embedded objects encoded in base64 */
if (keyinfo->type & MSXML_SCAN_B64) {
char name[1024];
char *decoded, *tempfile = name;
size_t decodedlen;
int of;
cli_msxmlmsg("BINARY DATA!\n");
decoded = (char *)cl_base64_decode((char *)node_value, strlen((const char *)node_value), NULL, &decodedlen, 0);
if (!decoded) {
cli_warnmsg("msxml_parse_element: failed to decode base64-encoded binary data\n");
state = xmlTextReaderRead(reader);
check_state(state);
break;
}
if ((ret = cli_gentempfd(ctx->engine->tmpdir, &tempfile, &of)) != CL_SUCCESS) {
cli_warnmsg("msxml_parse_element: failed to create temporary file %s\n", tempfile);
free(decoded);
return ret;
}
if(cli_writen(of, decoded, decodedlen) != (int)decodedlen) {
free(decoded);
close(of);
return CL_EWRITE;
}
free(decoded);
cli_dbgmsg("msxml_parse_element: extracted binary data to %s\n", tempfile);
ret = cli_magic_scandesc(of, ctx);
close(of);
if (ctx && !(ctx->engine->keeptmp))
cli_unlink(tempfile);
free(tempfile);
if (ret != CL_SUCCESS || (!SCAN_ALL && ret == CL_VIRUS)) {
return ret;
} else if (SCAN_ALL && ret == CL_VIRUS) {
virus = 1;
}
}
/* advance to next node */
state = xmlTextReaderRead(reader);
check_state(state);
break;
case XML_READER_TYPE_SIGNIFICANT_WHITESPACE:
/* advance to next node */
state = xmlTextReaderRead(reader);
check_state(state);
break;
case XML_READER_TYPE_END_ELEMENT:
cli_msxmlmsg("in msxml_parse_element @ layer %d closed\n", rlvl);
node_name = xmlTextReaderConstLocalName(reader);
if (!node_name) {
cli_dbgmsg("msxml_parse_element: element end tag node nameless\n");
return CL_EPARSE; /* no name, nameless */
}
if (xmlStrcmp(element_name, node_name)) {
cli_dbgmsg("msxml_parse_element: element tag does not match end tag %s != %s\n", element_name, node_name);
return CL_EFORMAT;
}
/* advance to next element tag */
state = xmlTextReaderRead(reader);
check_state(state);
endtag = 1;
break;
default:
node_name = xmlTextReaderConstLocalName(reader);
node_value = xmlTextReaderConstValue(reader);
cli_dbgmsg("msxml_parse_element: unhandled xml secondary node %s [%d]: %s\n", node_name, node_type, node_value);
state = xmlTextReaderNext(reader);
check_state(state);
return (virus ? CL_VIRUS : CL_SUCCESS);
}
}
break;
case XML_READER_TYPE_PROCESSING_INSTRUCTION:
cli_msxmlmsg("msxml_parse_element: PROCESSING INSTRUCTION %s [%d]: %s\n", node_name, node_type, node_value);
break;
case XML_READER_TYPE_SIGNIFICANT_WHITESPACE:
cli_msxmlmsg("msxml_parse_element: SIGNIFICANT WHITESPACE %s [%d]: %s\n", node_name, node_type, node_value);
break;
case XML_READER_TYPE_END_ELEMENT:
cli_msxmlmsg("msxml_parse_element: END ELEMENT %s [%d]: %s\n", node_name, node_type, node_value);
return (virus ? CL_VIRUS : CL_SUCCESS);
default:
cli_dbgmsg("msxml_parse_element: unhandled xml primary node %s [%d]: %s\n", node_name, node_type, node_value);
}
return (virus ? CL_VIRUS : CL_SUCCESS);
}
/* reader intialization and closing handled by caller */
int cli_msxml_parse_document(cli_ctx *ctx, xmlTextReaderPtr reader, const struct key_entry *keys, const size_t num_keys, int mode)
{
struct msxml_ctx mxctx;
int state, virus = 0, ret = CL_SUCCESS;
mxctx.ctx = ctx;
mxctx.keys = keys;
mxctx.num_keys = num_keys;
#if HAVE_JSON
mxctx.mode = mode;
if (mode) {
mxctx.root = ctx->wrkproperty;
/* JSON Sanity Check */
if (!mxctx.root)
mxctx.mode = 0;
mxctx.toval = 0;
}
#endif
/* Error Handler */
xmlTextReaderSetErrorHandler(reader, NULL, NULL); /* xml default handler */
//xmlTextReaderSetErrorHandler(reader, msxml_error_handler, NULL);
/* Main Processing Loop */
while ((state = xmlTextReaderRead(reader)) == 1) {
#if HAVE_JSON
if (mxctx.mode && (cli_json_timeout_cycle_check(mxctx.ctx, &(mxctx.toval)) != CL_SUCCESS))
return CL_ETIMEOUT;
ret = msxml_parse_element(&mxctx, reader, 0, mxctx.root);
#else
ret = msxml_parse_element(&mxctx, reader, 0, NULL);
#endif
if (ret == CL_SUCCESS);
else if (SCAN_ALL && ret == CL_VIRUS) {
/* non-allmatch simply propagates it down to return through ret */
virus = 1;
} else if (ret == CL_VIRUS || ret == CL_ETIMEOUT || ret == CL_BREAK) {
cli_dbgmsg("cli_msxml_parse_document: encountered halt event in parsing xml document\n");
break;
} else {
cli_warnmsg("cli_msxml_parse_document: encountered issue in parsing xml document\n");
break;
}
}
if (state == -1)
ret = CL_EPARSE;
#if HAVE_JSON
/* Parse General Error Handler */
if (mxctx.mode) {
int tmp = CL_SUCCESS;
switch(ret) {
case CL_SUCCESS:
case CL_BREAK: /* OK */
break;
case CL_VIRUS:
tmp = cli_json_parse_error(mxctx.root, "MSXML_INTR_VIRUS");
break;
case CL_ETIMEOUT:
tmp = cli_json_parse_error(mxctx.root, "MSXML_INTR_TIMEOUT");
break;
case CL_EPARSE:
tmp = cli_json_parse_error(mxctx.root, "MSXML_ERROR_XMLPARSER");
break;
case CL_EMEM:
tmp = cli_json_parse_error(mxctx.root, "MSXML_ERROR_OUTOFMEM");
break;
case CL_EFORMAT:
tmp = cli_json_parse_error(mxctx.root, "MSXML_ERROR_MALFORMED");
break;
default:
tmp = cli_json_parse_error(mxctx.root, "MSXML_ERROR_OTHER");
break;
}
if (tmp)
return tmp;
}
#endif
/* non-critical return supression */
if (ret == CL_ETIMEOUT || ret == CL_BREAK)
ret = CL_SUCCESS;
/* important but non-critical suppression */
if (ret == CL_EPARSE) {
cli_dbgmsg("cli_msxml_parse_document: suppressing parsing error to continue scan\n");
ret = CL_SUCCESS;
}
return (virus ? CL_VIRUS : ret);
}
#endif /* HAVE_LIBXML2 */

80
libclamav/msxml_parser.h Normal file
View file

@ -0,0 +1,80 @@
/*
* Extract component parts of MS XML files (e.g. MS Office 2003 XML Documents)
*
* Copyright (C) 2007-2013 Sourcefire, Inc.
*
* Authors: Kevin Lin
*
* This program is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License version 2 as published by the
* Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifndef __MSXML_PARSER_H
#define __MSXML_PARSER_H
#if HAVE_LIBXML2
#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif
#include "others.h"
#include "json_api.h"
#ifdef _WIN32
#ifndef LIBXML_WRITER_ENABLED
#define LIBXML_WRITER_ENABLED 1
#endif
#endif
#include <libxml/xmlreader.h>
#define MSXML_RECLEVEL_MAX 20
#define MSXML_JSON_STRLEN_MAX 128
struct key_entry {
/* how */
#define MSXML_IGNORE 0x00
#define MSXML_IGNORE_ELEM 0x01
#define MSXML_SCAN_B64 0x02
/* where */
#define MSXML_JSON_ROOT 0x04
#define MSXML_JSON_WRKPTR 0x08
#define MSXML_JSON_TRACK (MSXML_JSON_ROOT | MSXML_JSON_WRKPTR)
/* what */
#define MSXML_JSON_COUNT 0x10
#define MSXML_JSON_VALUE 0x20
#define MSXML_JSON_ATTRIB 0x40
const char *key;
const char *name;
int type;
};
struct msxml_ctx {
cli_ctx *ctx;
const struct key_entry *keys;
size_t num_keys;
#if HAVE_JSON
json_object *root;
int mode, toval;
#endif
};
int cli_msxml_parse_document(cli_ctx *ctx, xmlTextReaderPtr reader, const struct key_entry *keys, const size_t num_keys, int mode);
#endif /* HAVE_LIBXML2 */
#endif /* __MSXML_PARSER_H */

View file

@ -32,6 +32,7 @@
#include <stdlib.h>
#include <errno.h>
#include <conv.h>
#include <zlib.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
@ -955,6 +956,168 @@ handler_enum(ole2_header_t * hdr, property_t * prop, const char *dir, cli_ctx *
return CL_SUCCESS;
}
static int
likely_mso_stream(int fd)
{
off_t fsize;
unsigned char check[2];
fsize = lseek(fd, 0, SEEK_END);
if (fsize == -1) {
cli_dbgmsg("likely_mso_stream: call to lseek() failed\n");
return 0;
} else if (fsize < 6) {
return 0;
}
if (lseek(fd, 4, SEEK_SET) == -1) {
cli_dbgmsg("likely_mso_stream: call to lseek() failed\n");
return 0;
}
if (cli_readn(fd, check, 2) != 2) {
cli_dbgmsg("likely_mso_stream: reading from fd failed\n");
return 0;
}
if (check[0] == 0x78 && check[1] == 0x9C)
return 1;
return 0;
}
static int
scan_mso_stream(int fd, cli_ctx *ctx)
{
int zret, ofd, ret = CL_SUCCESS;
fmap_t *input;
off_t off_in = 0;
size_t count, outsize = 0;
z_stream zstrm;
char *tmpname;
uint32_t prefix;
unsigned char inbuf[FILEBUFF], outbuf[FILEBUFF];
/* fmap the input file for easier manipulation */
if (fd < 0) {
cli_dbgmsg("scan_mso_stream: Invalid file descriptor argument\n");
return CL_ENULLARG;
} else {
STATBUF statbuf;
if (FSTAT(fd, &statbuf) == -1) {
cli_dbgmsg("scan_mso_stream: Can't stat file descriptor\n");
return CL_ESTAT;
}
input = fmap(fd, 0, statbuf.st_size);
if (!input) {
cli_dbgmsg("scan_mso_stream: Failed to get fmap for input stream\n");
return CL_EMAP;
}
}
/* reserve tempfile for output and scanning */
if ((ret = cli_gentempfd(ctx->engine->tmpdir, &tmpname, &ofd)) != CL_SUCCESS) {
cli_errmsg("scan_mso_stream: Can't generate temporary file\n");
funmap(input);
return ret;
}
/* initialize zlib inflation stream */
memset(&zstrm, 0, sizeof(zstrm));
zstrm.zalloc = Z_NULL;
zstrm.zfree = Z_NULL;
zstrm.opaque = Z_NULL;
zstrm.next_in = inbuf;
zstrm.next_out = outbuf;
zstrm.avail_in = 0;
zstrm.avail_out = FILEBUFF;
zret = inflateInit(&zstrm);
if (zret != Z_OK) {
cli_dbgmsg("scan_mso_stream: Can't initialize zlib inflation stream\n");
ret = CL_EUNPACK;
goto mso_end;
}
/* extract 32-bit prefix */
if (fmap_readn(input, &prefix, off_in, sizeof(prefix)) != sizeof(prefix)) {
cli_dbgmsg("scan_mso_stream: Can't extract 4-byte prefix\n");
ret = CL_EREAD;
goto mso_end;
}
off_in += sizeof(uint32_t);
cli_dbgmsg("scan_mso_stream: stream prefix = %08x(%d)\n", prefix, prefix);
/* inflation loop */
do {
if (zstrm.avail_in == 0) {
zstrm.next_in = inbuf;
ret = fmap_readn(input, inbuf, off_in, FILEBUFF);
if (ret < 0) {
cli_errmsg("scan_mso_stream: Error reading MSO file\n");
ret = CL_EUNPACK;
goto mso_end;
}
if (!ret)
break;
zstrm.avail_in = ret;
off_in += ret;
}
zret = inflate(&zstrm, Z_SYNC_FLUSH);
count = FILEBUFF - zstrm.avail_out;
if (count) {
if (cli_checklimits("MSO", ctx, outsize + count, 0, 0) != CL_SUCCESS)
break;
if (cli_writen(ofd, outbuf, count) != count) {
cli_errmsg("scan_mso_stream: Can't write to file %s\n", tmpname);
ret = CL_EWRITE;
goto mso_end;
}
outsize += count;
}
zstrm.next_out = outbuf;
zstrm.avail_out = FILEBUFF;
} while(zret == Z_OK);
/* post inflation checks */
if (zret != Z_STREAM_END && zret != Z_OK) {
if (outsize == 0) {
cli_infomsg(ctx, "scan_mso_stream: Error decompressing MSO file. No data decompressed.\n");
ret = CL_EUNPACK;
goto mso_end;
}
cli_infomsg(ctx, "scan_mso_stream: Error decompressing MSO file. Scanning what was decompressed.\n");
}
cli_dbgmsg("scan_mso_stream: Decompressed to %s, size %d\n", tmpname, outsize);
if (outsize != prefix) {
cli_warnmsg("scan_mso_stream: declared prefix != inflated stream size, %llu != %llu\n",
(long long unsigned)prefix, (long long unsigned)outsize);
} else {
cli_dbgmsg("scan_mso_stream: declared prefix == inflated stream size, %llu == %llu\n",
(long long unsigned)prefix, (long long unsigned)outsize);
}
/* scanning inflated stream */
ret = cli_magic_scandesc(ofd, ctx);
/* clean-up */
mso_end:
zret = inflateEnd(&zstrm);
if (zret != Z_OK)
ret = CL_EUNPACK;
close(ofd);
if(ctx && !ctx->engine->keeptmp)
if (cli_unlink(tmpname))
ret = CL_EUNLINK;
free(tmpname);
funmap(input);
return ret;
}
static int
handler_otf(ole2_header_t * hdr, property_t * prop, const char *dir, cli_ctx * ctx)
@ -962,7 +1125,7 @@ handler_otf(ole2_header_t * hdr, property_t * prop, const char *dir, cli_ctx * c
char *tempfile;
unsigned char *buff;
int32_t current_block, len, offset;
int ofd, ret;
int ofd, is_mso, ret;
bitset_t *blk_bitset;
UNUSEDPARAM(dir);
@ -1061,6 +1224,7 @@ handler_otf(ole2_header_t * hdr, property_t * prop, const char *dir, cli_ctx * c
}
}
is_mso = likely_mso_stream(ofd);
if (lseek(ofd, 0, SEEK_SET) == -1) {
close(ofd);
if (ctx && !(ctx->engine->keeptmp))
@ -1112,8 +1276,18 @@ handler_otf(ole2_header_t * hdr, property_t * prop, const char *dir, cli_ctx * c
}
#endif
/* Normal File Scan */
ret = cli_magic_scandesc(ofd, ctx);
if (is_mso < 0) {
ret = CL_ESEEK;
} else if (is_mso) {
/* MSO Stream Scan */
ret = scan_mso_stream(ofd, ctx);
/* CONSIDER: running cli_magic_scandesc in the chance of MSO fp? */
//if (ret != CL_SUCCESS || ret != CL_VIRUS)
//ret = cli_magic_scandesc(ofd, ctx);
} else {
/* Normal File Scan */
ret = cli_magic_scandesc(ofd, ctx);
}
close(ofd);
free(buff);
cli_bitset_free(blk_bitset);

View file

@ -31,7 +31,7 @@
#include "json.h"
#endif
#include "json_api.h"
#include "msxml_parser.h"
#include "ooxml.h"
#if HAVE_LIBXML2
@ -43,366 +43,59 @@
#include <libxml/xmlreader.h>
#endif
#define OOXML_DEBUG 0
#if HAVE_LIBXML2 && HAVE_JSON
#define OOXML_JSON_RECLEVEL 16
#define OOXML_JSON_RECLEVEL_MAX 5
#define OOXML_JSON_STRLEN_MAX 100
static const struct key_entry ooxml_keys[] = {
{ "coreproperties", "CoreProperties", MSXML_JSON_ROOT | MSXML_JSON_ATTRIB },
{ "title", "Title", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "subject", "Subject", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "creator", "Author", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "keywords", "Keywords", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "comments", "Comments", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "description", "Description", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "lastmodifiedby", "LastAuthor", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "revision", "Revision", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "created", "Created", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "modified", "Modified", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "category", "Category", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "contentstatus", "ContentStatus", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
#define check_state(state) \
do { \
if (state == -1) { \
cli_warnmsg("check_state: CL_EPARSE @ ln%d\n", __LINE__); \
return CL_EPARSE; \
} \
else if (state == 0) { \
cli_dbgmsg("check_state: CL_BREAK @ ln%d\n", __LINE__); \
return CL_BREAK; \
} \
} while(0)
{ "properties", "ExtendedProperties", MSXML_JSON_ROOT | MSXML_JSON_ATTRIB },
{ "application", "Application", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "appversion", "AppVersion", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "characters", "Characters", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "characterswithspaces", "CharactersWithSpaces", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "company", "Company", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "digsig", "DigSig", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "docsecurity", "DocSecurity", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
//{ "headingpairs", "HeadingPairs", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "hiddenslides", "HiddenSlides", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "hlinks", "HLinks", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "hyperlinkbase", "HyperlinkBase", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "hyperlinkschanged", "HyperlinksChanged", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "lines", "Lines", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "linksuptodate", "LinksUpToDate", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "manager", "Manager", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "mmclips", "MultimediaClips", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "notes", "Notes", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "pages", "Pages", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "paragraphs", "Paragraphs", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "presentationformat", "PresentationFormat", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
//{ "properties", "Properties", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "scalecrop", "ScaleCrop", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "shareddoc", "SharedDocs", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "slides", "Slides", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "template", "Template", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
//{ "titleofparts", "TitleOfParts", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "totaltime", "TotalTime", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "words", "Words", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
static int ooxml_is_int(const char *value, size_t len, int32_t *val)
{
long val2;
char *endptr = NULL;
val2 = strtol(value, &endptr, 10);
if (endptr != value+len) {
return 0;
}
*val = (int32_t)(val2 & 0x0000ffff);
return 1;
}
static int ooxml_add_parse_error(json_object *wrkptr, const xmlChar *errstr)
{
json_object *perr;
if (!wrkptr)
return CL_ENULLARG;
perr = cli_jsonarray(wrkptr, "ParseErrors");
if (perr == NULL) {
return CL_EMEM;
}
return cli_jsonstr(perr, NULL, errstr);
}
static int ooxml_parse_value(json_object *wrkptr, const char *arrname, const xmlChar *node_value)
{
json_object *newobj, *arrobj;
int val;
if (!wrkptr)
return CL_ENULLARG;
arrobj = cli_jsonarray(wrkptr, arrname);
if (arrobj == NULL) {
return CL_EMEM;
}
if (ooxml_is_int((const char *)node_value, xmlStrlen(node_value), &val)) {
newobj = json_object_new_int(val);
}
else if (!xmlStrcmp(node_value, (const xmlChar *)"true")) {
newobj = json_object_new_boolean(1);
}
else if (!xmlStrcmp(node_value, (const xmlChar *)"false")) {
newobj = json_object_new_boolean(0);
}
else {
newobj = json_object_new_string((const char *)node_value);
}
if (NULL == newobj) {
cli_errmsg("ooxml_parse_value: no memory for json value for [%s]\n", arrname);
return CL_EMEM;
}
json_object_array_add(arrobj, newobj);
return CL_SUCCESS;
}
static const char *ooxml_keys[] = {
"coreproperties",
"title",
"subject",
"creator",
"keywords",
"comments",
"description",
"lastmodifiedby",
"revision",
"created",
"modified",
"category",
"contentstatus",
"properties",
"application",
"appversion",
"characters",
"characterswithspaces",
"company",
"digsig",
"docsecurity",
//"headingpairs",
"hiddenslides",
"hlinks",
"hyperlinkbase",
"hyperlinkschanged",
"lines",
"linksuptodate",
"manager",
"mmclips",
"notes",
"pages",
"paragraphs",
"presentationformat",
"properties",
"scalecrop",
"shareddoc",
"slides",
"template",
//"titlesofparts",
"totaltime",
"words"
/* Should NOT Exist */
{ "bindata", "BinaryData", MSXML_SCAN_B64 | MSXML_JSON_COUNT | MSXML_JSON_ROOT }
};
static const char *ooxml_json_keys[] = {
"CoreProperties",
"Title",
"Subject",
"Author",
"Keywords",
"Comments",
"Description",
"LastAuthor",
"Revision",
"Created",
"Modified",
"Category",
"ContentStatus",
"ExtendedProperties",
"Application",
"AppVersion",
"Characters",
"CharactersWithSpaces",
"Company",
"DigSig",
"DocSecurity",
//"HeadingPairs",
"HiddenSlides",
"HLinks",
"HyperlinkBase",
"HyperlinksChanged",
"Lines",
"LinksUpToDate",
"Manager",
"MultimediaClips",
"Notes",
"Pages",
"Paragraphs",
"PresentationFormat",
"Properties",
"ScaleCrop",
"SharedDoc",
"Slides",
"Template",
//"TitlesOfParts",
"TotalTime",
"Words"
};
static size_t num_ooxml_keys = 40; //42
static const char *ooxml_check_key(const char* key, size_t keylen)
{
unsigned i;
if (keylen > OOXML_JSON_STRLEN_MAX-1) {
cli_dbgmsg("ooxml_check_key: key name too long\n");
return NULL;
}
for (i = 0; i < num_ooxml_keys; ++i) {
//cli_dbgmsg("%d %d %s %s %s %s\n", keylen, strlen(ooxml_keys[i]), key, keycmp, ooxml_keys[i], ooxml_json_keys[i]);
if (keylen == strlen(ooxml_keys[i]) && !strncasecmp(key, ooxml_keys[i], keylen)) {
return ooxml_json_keys[i];
}
}
return NULL;
}
static int ooxml_parse_element(cli_ctx *ctx, xmlTextReaderPtr reader, json_object *wrkptr, int rlvl, json_object *root)
{
const char *element_tag = NULL, *end_tag = NULL;
const xmlChar *node_name = NULL, *node_value = NULL;
json_object *thisjobj = NULL;
int node_type, ret = CL_SUCCESS, endtag = 0, toval = 0, state = 1;
cli_dbgmsg("in ooxml_parse_element @ layer %d\n", rlvl);
/* check recursion level */
if (rlvl >= OOXML_JSON_RECLEVEL_MAX) {
cli_dbgmsg("ooxml_parse_element: reached ooxml json recursion limit\n");
cli_jsonbool(root, "HitRecursiveLimit", 1);
/* skip it */
state = xmlTextReaderNext(reader);
check_state(state);
return CL_SUCCESS;
}
/* acquire element type */
node_type = xmlTextReaderNodeType(reader);
if (node_type == -1)
return CL_EPARSE;
if (node_type != XML_READER_TYPE_ELEMENT) {
cli_dbgmsg("ooxml_parse_element: first node typed %d, not %d\n", node_type, XML_READER_TYPE_ELEMENT);
return CL_EFORMAT; /* first type is not an element */
}
node_name = xmlTextReaderConstLocalName(reader);
if (!node_name) {
cli_dbgmsg("ooxml_parse_element: element tag node nameless\n");
return CL_EPARSE; /* no name, nameless */
}
element_tag = ooxml_check_key((const char *)node_name, xmlStrlen(node_name));
if (!element_tag) {
cli_dbgmsg("ooxml_parse_element: invalid element tag [%s]\n", node_name);
/* skip it */
state = xmlTextReaderNext(reader);
check_state(state);
return CL_SUCCESS;
}
/* generate json object */
thisjobj = cli_jsonobj(wrkptr, element_tag);
if (!thisjobj) {
return CL_EMEM;
}
cli_dbgmsg("ooxml_parse_element: generated json object [%s]\n", element_tag);
if (rlvl == 0)
root = thisjobj;
/* handle attributes */
state = xmlTextReaderHasAttributes(reader);
if (state == 1) {
json_object *attributes;
attributes = cli_jsonobj(thisjobj, "Attributes");
if (!attributes) {
return CL_EPARSE;
}
cli_dbgmsg("ooxml_parse_element: retrieved json object [Attributes]\n");
while (xmlTextReaderMoveToNextAttribute(reader) == 1) {
const xmlChar *name, *value;
name = xmlTextReaderConstLocalName(reader);
value = xmlTextReaderConstValue(reader);
if (name == NULL || value == NULL) continue;
cli_dbgmsg("%s: %s\n", name, value);
cli_jsonstr(attributes, name, (const char *)value);
}
}
else if (state == -1)
return CL_EPARSE;
state = xmlTextReaderIsEmptyElement(reader);
if (state == 1) {
state = xmlTextReaderNext(reader);
check_state(state);
return CL_SUCCESS;
}
else if (state == -1)
return CL_EPARSE;
/* advance to first content node */
state = xmlTextReaderRead(reader);
check_state(state);
/* parse until the end element tag */
while (!endtag) {
if (cli_json_timeout_cycle_check(ctx, &toval) != CL_SUCCESS) {
return CL_ETIMEOUT;
}
node_type = xmlTextReaderNodeType(reader);
if (node_type == -1)
return CL_EPARSE;
switch (node_type) {
case XML_READER_TYPE_ELEMENT:
ret = ooxml_parse_element(ctx, reader, thisjobj, rlvl+1, root);
if (ret != CL_SUCCESS) {
return ret;
}
break;
case XML_READER_TYPE_END_ELEMENT:
cli_dbgmsg("in ooxml_parse_element @ layer %d closed\n", rlvl);
node_name = xmlTextReaderConstLocalName(reader);
if (!node_name) {
cli_dbgmsg("ooxml_parse_element: element end tag node nameless\n");
return CL_EPARSE; /* no name, nameless */
}
end_tag = ooxml_check_key((const char *)node_name, xmlStrlen(node_name));
if (!end_tag) {
cli_dbgmsg("ooxml_parse_element: invalid element end tag [%s]\n", node_name);
return CL_EFORMAT; /* unrecognized element tag */
}
if (strncmp(element_tag, end_tag, strlen(element_tag))) {
cli_dbgmsg("ooxml_parse_element: element tag does not match end tag\n");
return CL_EFORMAT;
}
/* advance to next element tag */
state = xmlTextReaderRead(reader);
check_state(state);
endtag = 1;
break;
case XML_READER_TYPE_TEXT:
node_value = xmlTextReaderConstValue(reader);
ret = ooxml_parse_value(thisjobj, "Value", node_value);
if (ret != CL_SUCCESS)
return ret;
cli_dbgmsg("ooxml_parse_element: added json value [%s: %s]\n", element_tag, node_value);
/* advance to next element tag */
state = xmlTextReaderRead(reader);
check_state(state);
break;
default:
#if OOXML_DEBUG
node_name = xmlTextReaderConstLocalName(reader);
node_value = xmlTextReaderConstValue(reader);
cli_dbgmsg("ooxml_parse_element: unhandled xml node %s [%d]: %s\n", node_name, node_type, node_value);
#endif
state = xmlTextReaderNext(reader);
check_state(state);
return CL_SUCCESS;
}
}
return CL_SUCCESS;
}
static size_t num_ooxml_keys = sizeof(ooxml_keys) / sizeof(struct key_entry);
static int ooxml_updatelimits(int fd, cli_ctx *ctx)
{
@ -433,12 +126,7 @@ static int ooxml_parse_document(int fd, cli_ctx *ctx)
return CL_SUCCESS; // internal error from libxml2
}
/* move reader to first element */
if (xmlTextReaderRead(reader) != 1) {
return CL_SUCCESS; /* libxml2 failed */
}
ret = ooxml_parse_element(ctx, reader, ctx->wrkproperty, 0, NULL);
ret = cli_msxml_parse_document(ctx, reader, ooxml_keys, num_ooxml_keys, 1);
if (ret != CL_SUCCESS && ret != CL_ETIMEOUT && ret != CL_BREAK)
cli_warnmsg("ooxml_parse_document: encountered issue in parsing properties document\n");
@ -455,9 +143,9 @@ static int ooxml_core_cb(int fd, cli_ctx *ctx)
cli_dbgmsg("in ooxml_core_cb\n");
ret = ooxml_parse_document(fd, ctx);
if (ret == CL_EPARSE)
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_CORE_XMLPARSER");
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_CORE_XMLPARSER");
else if (ret == CL_EFORMAT)
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_CORE_MALFORMED");
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_CORE_MALFORMED");
return ret;
}
@ -469,9 +157,9 @@ static int ooxml_extn_cb(int fd, cli_ctx *ctx)
cli_dbgmsg("in ooxml_extn_cb\n");
ret = ooxml_parse_document(fd, ctx);
if (ret == CL_EPARSE)
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_EXTN_XMLPARSER");
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_EXTN_XMLPARSER");
else if (ret == CL_EFORMAT)
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_EXTN_MALFORMED");
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_EXTN_MALFORMED");
return ret;
}
@ -499,7 +187,7 @@ static int ooxml_content_cb(int fd, cli_ctx *ctx)
reader = xmlReaderForFd(fd, "[Content_Types].xml", NULL, CLAMAV_MIN_XMLREADER_FLAGS);
if (reader == NULL) {
cli_dbgmsg("ooxml_content_cb: xmlReaderForFd error for ""[Content_Types].xml""\n");
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_XML_READER_FD");
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_XML_READER_FD");
ctx->scansize = sav_scansize;
ctx->scannedfiles = sav_scannedfiles;
@ -608,37 +296,37 @@ static int ooxml_content_cb(int fd, cli_ctx *ctx)
if (core) {
cli_jsonint(ctx->wrkproperty, "CorePropertiesFileCount", core);
if (core > 1)
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_MULTIPLE_CORE_PROPFILES");
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_MULTIPLE_CORE_PROPFILES");
}
else if (!mcore)
cli_dbgmsg("cli_process_ooxml: file does not contain core properties file\n");
if (mcore) {
cli_jsonint(ctx->wrkproperty, "CorePropertiesMissingFileCount", mcore);
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_MISSING_CORE_PROPFILES");
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_MISSING_CORE_PROPFILES");
}
if (extn) {
cli_jsonint(ctx->wrkproperty, "ExtendedPropertiesFileCount", extn);
if (extn > 1)
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_MULTIPLE_EXTN_PROPFILES");
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_MULTIPLE_EXTN_PROPFILES");
}
else if (!mextn)
cli_dbgmsg("cli_process_ooxml: file does not contain extended properties file\n");
if (mextn) {
cli_jsonint(ctx->wrkproperty, "ExtendedPropertiesMissingFileCount", mextn);
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_MISSING_EXTN_PROPFILES");
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_MISSING_EXTN_PROPFILES");
}
if (cust) {
cli_jsonint(ctx->wrkproperty, "CustomPropertiesFileCount", cust);
if (cust > 1)
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_MULTIPLE_CUSTOM_PROPFILES");
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_MULTIPLE_CUSTOM_PROPFILES");
}
else if (!mcust)
cli_dbgmsg("cli_process_ooxml: file does not contain custom properties file\n");
if (mcust) {
cli_jsonint(ctx->wrkproperty, "CustomPropertiesMissingFileCount", mcust);
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_MISSING_CUST_PROPFILES");
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_MISSING_CUST_PROPFILES");
}
if (dsig) {
@ -694,7 +382,7 @@ int cli_process_ooxml(cli_ctx *ctx)
uint32_t loff = 0;
int tmp = CL_SUCCESS;
cli_dbgmsg("in cli_processooxml\n");
cli_dbgmsg("in cli_process_ooxml\n");
if (!ctx) {
return CL_ENULLARG;
}
@ -702,35 +390,35 @@ int cli_process_ooxml(cli_ctx *ctx)
/* find "[Content Types].xml" */
tmp = unzip_search_single(ctx, "[Content_Types].xml", 18, &loff);
if (tmp == CL_ETIMEOUT) {
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_TIMEOUT");
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_TIMEOUT");
return CL_ETIMEOUT;
}
else if (tmp != CL_VIRUS) {
cli_dbgmsg("cli_process_ooxml: failed to find ""[Content_Types].xml""!\n");
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_NO_CONTENT_TYPES");
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_NO_CONTENT_TYPES");
return CL_EFORMAT;
}
cli_dbgmsg("cli_process_ooxml: found ""[Content_Types].xml"" @ %x\n", loff);
tmp = unzip_single_internal(ctx, loff, ooxml_content_cb);
if (tmp == CL_ETIMEOUT)
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_TIMEOUT");
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_TIMEOUT");
else if (tmp == CL_EMEM)
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_OUTOFMEM");
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_OUTOFMEM");
else if (tmp == CL_EMAXSIZE)
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_EMAXSIZE");
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_EMAXSIZE");
else if (tmp == CL_EMAXFILES)
ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_EMAXFILES");
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_EMAXFILES");
return tmp;
#else
UNUSEDPARAM(ctx);
cli_dbgmsg("in cli_processooxml\n");
cli_dbgmsg("in cli_process_ooxml\n");
#if !HAVE_LIBXML2
cli_dbgmsg("cli_process_ooxml: libxml2 needs to enabled!");
cli_dbgmsg("cli_process_ooxml: libxml2 needs to enabled!\n");
#endif
#if !HAVE_JSON
cli_dbgmsg("cli_process_ooxml: libjson needs to enabled!");
cli_dbgmsg("cli_process_ooxml: libjson needs to enabled!\n");
#endif
return CL_SUCCESS;
#endif

View file

@ -770,7 +770,6 @@ struct cl_settings *cl_engine_settings_copy(const struct cl_engine *engine)
settings->cb_hash = engine->cb_hash;
settings->cb_meta = engine->cb_meta;
settings->cb_file_props = engine->cb_file_props;
settings->cb_file_props_data = engine->cb_file_props_data;
settings->engine_options = engine->engine_options;
settings->cb_stats_add_sample = engine->cb_stats_add_sample;
@ -843,7 +842,6 @@ int cl_engine_settings_apply(struct cl_engine *engine, const struct cl_settings
engine->cb_hash = settings->cb_hash;
engine->cb_meta = settings->cb_meta;
engine->cb_file_props = settings->cb_file_props;
engine->cb_file_props_data = settings->cb_file_props_data;
engine->cb_stats_add_sample = settings->cb_stats_add_sample;
engine->cb_stats_remove_sample = settings->cb_stats_remove_sample;
@ -1363,8 +1361,7 @@ void cl_engine_set_clcb_meta(struct cl_engine *engine, clcb_meta callback)
engine->cb_meta = callback;
}
void cl_engine_set_clcb_file_props(struct cl_engine *engine, clcb_file_props callback, void * cbdata)
void cl_engine_set_clcb_file_props(struct cl_engine *engine, clcb_file_props callback)
{
engine->cb_file_props = callback;
engine->cb_file_props_data = cbdata;
}

View file

@ -67,7 +67,7 @@
* in re-enabling affected modules.
*/
#define CL_FLEVEL 80
#define CL_FLEVEL 81
#define CL_FLEVEL_DCONF CL_FLEVEL
#define CL_FLEVEL_SIGTOOL CL_FLEVEL
@ -315,7 +315,6 @@ struct cl_engine {
clcb_hash cb_hash;
clcb_meta cb_meta;
clcb_file_props cb_file_props;
void *cb_file_props_data;
/* Used for bytecode */
struct cli_all_bc bcs;
@ -391,7 +390,6 @@ struct cl_settings {
clcb_hash cb_hash;
clcb_meta cb_meta;
clcb_file_props cb_file_props;
void *cb_file_props_data;
/* Engine max settings */
uint64_t maxembeddedpe; /* max size to scan MSEXE for PE */

View file

@ -59,6 +59,7 @@
#include "arc4.h"
#include "rijndael.h"
#include "textnorm.h"
#include "conv.h"
#include "json_api.h"
#ifdef CL_DEBUG
@ -746,7 +747,7 @@ static void aes_decrypt(const unsigned char *in, off_t *length, unsigned char *q
}
static char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, off_t *length, enum enc_method enc_method)
char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, off_t *length, enum enc_method enc_method)
{
unsigned char *key, *q, result[16];
unsigned n;
@ -846,7 +847,7 @@ static char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, of
return (char *)q;
}
static enum enc_method get_enc_method(struct pdf_struct *pdf, struct pdf_obj *obj)
enum enc_method get_enc_method(struct pdf_struct *pdf, struct pdf_obj *obj)
{
if (obj->flags & (1 << OBJ_EMBEDDED_FILE))
return pdf->enc_method_embeddedfile;
@ -2244,7 +2245,7 @@ static enum enc_method parse_enc_method(const char *dict, unsigned len, const ch
return ret;
}
static void pdf_handle_enc(struct pdf_struct *pdf)
void pdf_handle_enc(struct pdf_struct *pdf)
{
struct pdf_obj *obj;
uint32_t len, n, R, P, length, EM = 1, i, oulen;
@ -3214,8 +3215,12 @@ static void Author_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfnam
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
return;
if (!(pdf->stats.author))
pdf->stats.author = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Author", NULL);
if (!(pdf->stats.author)) {
pdf->stats.author = cli_calloc(1, sizeof(struct pdf_stats_entry));
if (!(pdf->stats.author))
return;
pdf->stats.author->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Author", NULL, &(pdf->stats.author->meta));
}
}
#endif
@ -3230,8 +3235,12 @@ static void Creator_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfna
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
return;
if (!(pdf->stats.creator))
pdf->stats.creator = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Creator", NULL);
if (!(pdf->stats.creator)) {
pdf->stats.creator = cli_calloc(1, sizeof(struct pdf_stats_entry));
if (!(pdf->stats.creator))
return;
pdf->stats.creator->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Creator", NULL, &(pdf->stats.creator->meta));
}
}
#endif
@ -3246,8 +3255,12 @@ static void ModificationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, str
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
return;
if (!(pdf->stats.modificationdate))
pdf->stats.modificationdate = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/ModDate", NULL);
if (!(pdf->stats.modificationdate)) {
pdf->stats.modificationdate = cli_calloc(1, sizeof(struct pdf_stats_entry));
if (!(pdf->stats.modificationdate))
return;
pdf->stats.modificationdate->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/ModDate", NULL, &(pdf->stats.modificationdate->meta));
}
}
#endif
@ -3262,8 +3275,12 @@ static void CreationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
return;
if (!(pdf->stats.creationdate))
pdf->stats.creationdate = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/CreationDate", NULL);
if (!(pdf->stats.creationdate)) {
pdf->stats.creationdate = cli_calloc(1, sizeof(struct pdf_stats_entry));
if (!(pdf->stats.creationdate))
return;
pdf->stats.creationdate->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/CreationDate", NULL, &(pdf->stats.creationdate->meta));
}
}
#endif
@ -3278,8 +3295,12 @@ static void Producer_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfn
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
return;
if (!(pdf->stats.producer))
pdf->stats.producer = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Producer", NULL);
if (!(pdf->stats.producer)) {
pdf->stats.producer = cli_calloc(1, sizeof(struct pdf_stats_entry));
if (!(pdf->stats.producer))
return;
pdf->stats.producer->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Producer", NULL, &(pdf->stats.producer->meta));
}
}
#endif
@ -3294,8 +3315,12 @@ static void Title_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
return;
if (!(pdf->stats.title))
pdf->stats.title = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Title", NULL);
if (!(pdf->stats.title)) {
pdf->stats.title = cli_calloc(1, sizeof(struct pdf_stats_entry));
if (!(pdf->stats.title))
return;
pdf->stats.title->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Title", NULL, &(pdf->stats.title->meta));
}
}
#endif
@ -3310,8 +3335,12 @@ static void Keywords_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfn
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
return;
if (!(pdf->stats.keywords))
pdf->stats.keywords = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Keywords", NULL);
if (!(pdf->stats.keywords)) {
pdf->stats.keywords = cli_calloc(1, sizeof(struct pdf_stats_entry));
if (!(pdf->stats.keywords))
return;
pdf->stats.keywords->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Keywords", NULL, &(pdf->stats.keywords->meta));
}
}
#endif
@ -3326,8 +3355,12 @@ static void Subject_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfna
if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
return;
if (!(pdf->stats.subject))
pdf->stats.subject = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Subject", NULL);
if (!(pdf->stats.subject)) {
pdf->stats.subject = cli_calloc(1, sizeof(struct pdf_stats_entry));
if (!(pdf->stats.subject))
return;
pdf->stats.subject->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Subject", NULL, &(pdf->stats.subject->meta));
}
}
#endif
@ -3511,22 +3544,182 @@ static void pdf_export_json(struct pdf_struct *pdf)
goto cleanup;
}
if (pdf->stats.author)
cli_jsonstr(pdfobj, "Author", pdf->stats.author);
if (pdf->stats.creator)
cli_jsonstr(pdfobj, "Creator", pdf->stats.creator);
if (pdf->stats.producer)
cli_jsonstr(pdfobj, "Producer", pdf->stats.producer);
if (pdf->stats.modificationdate)
cli_jsonstr(pdfobj, "ModificationDate", pdf->stats.modificationdate);
if (pdf->stats.creationdate)
cli_jsonstr(pdfobj, "CreationDate", pdf->stats.creationdate);
if (pdf->stats.title)
cli_jsonstr(pdfobj, "Title", pdf->stats.title);
if (pdf->stats.subject)
cli_jsonstr(pdfobj, "Subject", pdf->stats.subject);
if (pdf->stats.keywords)
cli_jsonstr(pdfobj, "Keywords", pdf->stats.keywords);
if (pdf->stats.author) {
if (!pdf->stats.author->meta.success) {
char *out = pdf_finalize_string(pdf, pdf->stats.author->meta.obj, pdf->stats.author->data, pdf->stats.author->meta.length);
if (out) {
free(pdf->stats.author->data);
pdf->stats.author->data = out;
pdf->stats.author->meta.length = strlen(out);
pdf->stats.author->meta.success = 1;
}
}
if (pdf->stats.author->meta.success && cli_isutf8(pdf->stats.author->data, pdf->stats.author->meta.length)) {
cli_jsonstr(pdfobj, "Author", pdf->stats.author->data);
} else if (pdf->stats.author->data && pdf->stats.author->meta.length) {
char *b64 = cl_base64_encode(pdf->stats.author->data, pdf->stats.author->meta.length);
cli_jsonstr(pdfobj, "Author", b64);
cli_jsonbool(pdfobj, "Author_base64", 1);
free(b64);
} else {
cli_jsonstr(pdfobj, "Author", "");
}
}
if (pdf->stats.creator) {
if (!pdf->stats.creator->meta.success) {
char *out = pdf_finalize_string(pdf, pdf->stats.creator->meta.obj, pdf->stats.creator->data, pdf->stats.creator->meta.length);
if (out) {
free(pdf->stats.creator->data);
pdf->stats.creator->data = out;
pdf->stats.creator->meta.length = strlen(out);
pdf->stats.creator->meta.success = 1;
}
}
if (pdf->stats.creator->meta.success && cli_isutf8(pdf->stats.creator->data, pdf->stats.creator->meta.length)) {
cli_jsonstr(pdfobj, "Creator", pdf->stats.creator->data);
} else if (pdf->stats.creator->data && pdf->stats.creator->meta.length) {
char *b64 = cl_base64_encode(pdf->stats.creator->data, pdf->stats.creator->meta.length);
cli_jsonstr(pdfobj, "Creator", b64);
cli_jsonbool(pdfobj, "Creator_base64", 1);
free(b64);
} else {
cli_jsonstr(pdfobj, "Creator", "");
}
}
if (pdf->stats.producer) {
if (!pdf->stats.producer->meta.success) {
char *out = pdf_finalize_string(pdf, pdf->stats.producer->meta.obj, pdf->stats.producer->data, pdf->stats.producer->meta.length);
if (out) {
free(pdf->stats.producer->data);
pdf->stats.producer->data = out;
pdf->stats.producer->meta.length = strlen(out);
pdf->stats.producer->meta.success = 1;
}
}
if (pdf->stats.producer->meta.success && cli_isutf8(pdf->stats.producer->data, pdf->stats.producer->meta.length)) {
cli_jsonstr(pdfobj, "Producer", pdf->stats.producer->data);
} else if (pdf->stats.producer->data && pdf->stats.producer->meta.length) {
char *b64 = cl_base64_encode(pdf->stats.producer->data, pdf->stats.producer->meta.length);
cli_jsonstr(pdfobj, "Producer", b64);
cli_jsonbool(pdfobj, "Producer_base64", 1);
free(b64);
} else {
cli_jsonstr(pdfobj, "Producer", "");
}
}
if (pdf->stats.modificationdate) {
if (!pdf->stats.modificationdate->meta.success) {
char *out = pdf_finalize_string(pdf, pdf->stats.modificationdate->meta.obj, pdf->stats.modificationdate->data, pdf->stats.modificationdate->meta.length);
if (out) {
free(pdf->stats.modificationdate->data);
pdf->stats.modificationdate->data = out;
pdf->stats.modificationdate->meta.length = strlen(out);
pdf->stats.modificationdate->meta.success = 1;
}
}
if (pdf->stats.modificationdate->meta.success && cli_isutf8(pdf->stats.modificationdate->data, pdf->stats.modificationdate->meta.length)) {
cli_jsonstr(pdfobj, "ModificationDate", pdf->stats.modificationdate->data);
} else if (pdf->stats.modificationdate->data && pdf->stats.modificationdate->meta.length) {
char *b64 = cl_base64_encode(pdf->stats.modificationdate->data, pdf->stats.modificationdate->meta.length);
cli_jsonstr(pdfobj, "ModificationDate", b64);
cli_jsonbool(pdfobj, "ModificationDate_base64", 1);
free(b64);
} else {
cli_jsonstr(pdfobj, "ModificationDate", "");
}
}
if (pdf->stats.creationdate) {
if (!pdf->stats.creationdate->meta.success) {
char *out = pdf_finalize_string(pdf, pdf->stats.creationdate->meta.obj, pdf->stats.creationdate->data, pdf->stats.creationdate->meta.length);
if (out) {
free(pdf->stats.creationdate->data);
pdf->stats.creationdate->data = out;
pdf->stats.creationdate->meta.length = strlen(out);
pdf->stats.creationdate->meta.success = 1;
}
}
if (pdf->stats.creationdate->meta.success && cli_isutf8(pdf->stats.creationdate->data, pdf->stats.creationdate->meta.length)) {
cli_jsonstr(pdfobj, "CreationDate", pdf->stats.creationdate->data);
} else if (pdf->stats.creationdate->data && pdf->stats.creationdate->meta.length) {
char *b64 = cl_base64_encode(pdf->stats.creationdate->data, pdf->stats.creationdate->meta.length);
cli_jsonstr(pdfobj, "CreationDate", b64);
cli_jsonbool(pdfobj, "CreationDate_base64", 1);
free(b64);
} else {
cli_jsonstr(pdfobj, "CreationDate", "");
}
}
if (pdf->stats.title) {
if (!pdf->stats.title->meta.success) {
char *out = pdf_finalize_string(pdf, pdf->stats.title->meta.obj, pdf->stats.title->data, pdf->stats.title->meta.length);
if (out) {
free(pdf->stats.title->data);
pdf->stats.title->data = out;
pdf->stats.title->meta.length = strlen(out);
pdf->stats.title->meta.success = 1;
}
}
if (pdf->stats.title->meta.success && cli_isutf8(pdf->stats.title->data, pdf->stats.title->meta.length)) {
cli_jsonstr(pdfobj, "Title", pdf->stats.title->data);
} else if (pdf->stats.title->data && pdf->stats.title->meta.length) {
char *b64 = cl_base64_encode(pdf->stats.title->data, pdf->stats.title->meta.length);
cli_jsonstr(pdfobj, "Title", b64);
cli_jsonbool(pdfobj, "Title_base64", 1);
free(b64);
} else {
cli_jsonstr(pdfobj, "Title", "");
}
}
if (pdf->stats.subject) {
if (!pdf->stats.subject->meta.success) {
char *out = pdf_finalize_string(pdf, pdf->stats.subject->meta.obj, pdf->stats.subject->data, pdf->stats.subject->meta.length);
if (out) {
free(pdf->stats.subject->data);
pdf->stats.subject->data = out;
pdf->stats.subject->meta.length = strlen(out);
pdf->stats.subject->meta.success = 1;
}
}
if (pdf->stats.subject->meta.success && cli_isutf8(pdf->stats.subject->data, pdf->stats.subject->meta.length)) {
cli_jsonstr(pdfobj, "Subject", pdf->stats.subject->data);
} else if (pdf->stats.subject->data && pdf->stats.subject->meta.length) {
char *b64 = cl_base64_encode(pdf->stats.subject->data, pdf->stats.subject->meta.length);
cli_jsonstr(pdfobj, "Subject", b64);
cli_jsonbool(pdfobj, "Subject_base64", 1);
free(b64);
} else {
cli_jsonstr(pdfobj, "Subject", "");
}
}
if (pdf->stats.keywords) {
if (!pdf->stats.keywords->meta.success) {
char *out = pdf_finalize_string(pdf, pdf->stats.keywords->meta.obj, pdf->stats.keywords->data, pdf->stats.keywords->meta.length);
if (out) {
free(pdf->stats.keywords->data);
pdf->stats.keywords->data = out;
pdf->stats.keywords->meta.length = strlen(out);
pdf->stats.keywords->meta.success = 1;
}
}
if (pdf->stats.keywords->meta.success && cli_isutf8(pdf->stats.keywords->data, pdf->stats.keywords->meta.length)) {
cli_jsonstr(pdfobj, "Keywords", pdf->stats.keywords->data);
} else if (pdf->stats.keywords->data && pdf->stats.keywords->meta.length) {
char *b64 = cl_base64_encode(pdf->stats.keywords->data, pdf->stats.keywords->meta.length);
cli_jsonstr(pdfobj, "Keywords", b64);
cli_jsonbool(pdfobj, "Keywords_base64", 1);
free(b64);
} else {
cli_jsonstr(pdfobj, "Keywords", "");
}
}
if (pdf->stats.ninvalidobjs)
cli_jsonint(pdfobj, "InvalidObjectCount", pdf->stats.ninvalidobjs);
if (pdf->stats.njs)
@ -3589,6 +3782,8 @@ static void pdf_export_json(struct pdf_struct *pdf)
cli_jsonbool(pdfobj, "Encrypted", 1);
if (pdf->flags & (1 << DECRYPTABLE_PDF))
cli_jsonbool(pdfobj, "Decryptable", 1);
else
cli_jsonbool(pdfobj, "Decryptable", 0);
}
for (i=0; i < pdf->nobjs; i++) {
@ -3605,41 +3800,57 @@ static void pdf_export_json(struct pdf_struct *pdf)
cleanup:
if ((pdf->stats.author)) {
if (pdf->stats.author->data)
free(pdf->stats.author->data);
free(pdf->stats.author);
pdf->stats.author = NULL;
}
if (pdf->stats.creator) {
if (pdf->stats.creator->data)
free(pdf->stats.creator->data);
free(pdf->stats.creator);
pdf->stats.creator = NULL;
}
if (pdf->stats.producer) {
if (pdf->stats.producer->data)
free(pdf->stats.producer->data);
free(pdf->stats.producer);
pdf->stats.producer = NULL;
}
if (pdf->stats.modificationdate) {
if (pdf->stats.modificationdate->data)
free(pdf->stats.modificationdate->data);
free(pdf->stats.modificationdate);
pdf->stats.modificationdate = NULL;
}
if (pdf->stats.creationdate) {
if (pdf->stats.creationdate->data)
free(pdf->stats.creationdate->data);
free(pdf->stats.creationdate);
pdf->stats.creationdate = NULL;
}
if (pdf->stats.title) {
if (pdf->stats.title->data)
free(pdf->stats.title->data);
free(pdf->stats.title);
pdf->stats.title = NULL;
}
if (pdf->stats.subject) {
if (pdf->stats.subject->data)
free(pdf->stats.subject->data);
free(pdf->stats.subject);
pdf->stats.subject = NULL;
}
if (pdf->stats.keywords) {
if (pdf->stats.keywords->data)
free(pdf->stats.keywords->data);
free(pdf->stats.keywords);
pdf->stats.keywords = NULL;
}

View file

@ -62,6 +62,17 @@ struct pdf_dict {
struct pdf_dict_node *tail;
};
struct pdf_stats_entry {
char *data;
/* populated by pdf_parse_string */
struct pdf_stats_metadata {
int length;
struct pdf_obj *obj;
int success; /* if finalize succeeds */
} meta;
};
struct pdf_stats {
int32_t ninvalidobjs; /* Number of invalid objects */
int32_t njs; /* Number of javascript objects */
@ -88,14 +99,14 @@ struct pdf_stats {
int32_t nrichmedia; /* Number of RichMedia objects */
int32_t nacroform; /* Number of AcroForm objects */
int32_t nxfa; /* Number of XFA objects */
char *author; /* Author of the PDF */
char *creator; /* Application used to create the PDF */
char *producer; /* Application used to produce the PDF */
char *creationdate; /* Date the PDF was created */
char *modificationdate; /* Date the PDF was modified */
char *title; /* Title of the PDF */
char *subject; /* Subject of the PDF */
char *keywords; /* Keywords of the PDF */
struct pdf_stats_entry *author; /* Author of the PDF */
struct pdf_stats_entry *creator; /* Application used to create the PDF */
struct pdf_stats_entry *producer; /* Application used to produce the PDF */
struct pdf_stats_entry *creationdate; /* Date the PDF was created */
struct pdf_stats_entry *modificationdate; /* Date the PDF was modified */
struct pdf_stats_entry *title; /* Title of the PDF */
struct pdf_stats_entry *subject; /* Subject of the PDF */
struct pdf_stats_entry *keywords; /* Keywords of the PDF */
};
@ -144,7 +155,12 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
int pdf_findobj(struct pdf_struct *pdf);
struct pdf_obj *find_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t objid);
char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar);
void pdf_handle_enc(struct pdf_struct *pdf);
char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, off_t *length, enum enc_method enc_method);
enum enc_method get_enc_method(struct pdf_struct *pdf, struct pdf_obj *obj);
char *pdf_finalize_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *in, size_t len);
char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar, struct pdf_stats_metadata *stats);
struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsz, char *begin, char **endchar);
struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsz, char *begin, char **endchar);
int is_object_reference(char *begin, char **endchar, uint32_t *id);

View file

@ -68,82 +68,33 @@
#include "rijndael.h"
#include "textnorm.h"
#include "json_api.h"
#include "conv.h"
char *pdf_convert_utf(char *begin, size_t sz);
char *pdf_convert_utf(char *begin, size_t sz)
{
char *res=NULL;
char *buf, *outbuf;
#if HAVE_ICONV
char *buf, *outbuf, *p1, *p2;
size_t sz2, inlen, outlen, i;
char *p1, *p2;
size_t inlen, outlen, i;
char *encodings[] = {
"UTF-16",
NULL
};
iconv_t cd;
#endif
buf = cli_calloc(1, sz);
buf = cli_calloc(1, sz+1);
if (!(buf))
return NULL;
memcpy(buf, begin, sz);
/* convert PDF specific escape sequences, like octal sequences */
sz2 = 0;
for (i = 0; i < sz; ++i) {
if ((i+1 < sz) && begin[i] == '\\') {
if ((i+3 < sz) &&
(isdigit(begin[i+1]) && isdigit(begin[i+2]) && isdigit(begin[i+3]))) {
/* octal sequence */
char octal[4], *check;
unsigned long value;
memcpy(octal, &begin[i+1], 3);
octal[3] = '\0';
value = (char)strtoul(octal, &check, 8);
/* check if all characters were converted */
if (check == &octal[3])
buf[sz2++] = value;
i += 3;
} else {
/* other sequences */
switch(begin[i+1]) {
case 'n':
buf[sz2++] = 0x0a;
break;
case 'r':
buf[sz2++] = 0x0d;
break;
case 't':
buf[sz2++] = 0x09;
break;
case 'b':
buf[sz2++] = 0x08;
break;
case 'f':
buf[sz2++] = 0x0c;
break;
case '(':
buf[sz2++] = 0x28;
break;
case ')':
buf[sz2++] = 0x29;
break;
case '\\':
buf[sz2++] = 0x5c;
break;
default:
/* IGNORE THE REVERSE SOLIDUS - PDF3000-2008 */
break;
}
}
} else
buf[sz2++] = begin[i];
}
//memcpy(buf, begin, sz);
#if HAVE_ICONV
p1 = buf;
p2 = outbuf = cli_calloc(1, sz2+1);
p2 = outbuf = cli_calloc(1, sz+1);
if (!(outbuf)) {
free(buf);
return NULL;
@ -152,7 +103,7 @@ char *pdf_convert_utf(char *begin, size_t sz)
for (i=0; encodings[i] != NULL; i++) {
p1 = buf;
p2 = outbuf;
inlen = outlen = sz2;
inlen = outlen = sz;
cd = iconv_open("UTF-8", encodings[i]);
if (cd == (iconv_t)(-1)) {
@ -162,32 +113,31 @@ char *pdf_convert_utf(char *begin, size_t sz)
iconv(cd, (char **)(&p1), &inlen, &p2, &outlen);
if (outlen == sz2) {
if (outlen == sz) {
/* Decoding unsuccessful right from the start */
iconv_close(cd);
continue;
}
outbuf[sz2 - outlen] = '\0';
outbuf[sz - outlen] = '\0';
res = strdup(outbuf);
iconv_close(cd);
break;
}
#else
outbuf = cli_utf16_to_utf8(buf, sz, UTF16_BOM);
if (!outbuf) {
free(buf);
return NULL;
}
res = strdup(outbuf);
#endif
free(buf);
free(outbuf);
return res;
#else
res = cli_calloc(sz+1, 1);
if ((res)) {
memcpy(res, begin, sz);
res[sz] = '\0';
}
return res;
#endif
}
int is_object_reference(char *begin, char **endchar, uint32_t *id)
@ -274,13 +224,154 @@ int is_object_reference(char *begin, char **endchar, uint32_t *id)
return 0;
}
char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar)
static char *pdf_decrypt_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *in, off_t *length)
{
enum enc_method enc;
/* handled only once in cli_pdf() */
//pdf_handle_enc(pdf);
if (pdf->flags & (1 << DECRYPTABLE_PDF)) {
enc = get_enc_method(pdf, obj);
return decrypt_any(pdf, obj->id, in, length, enc);
}
return NULL;
}
char *pdf_finalize_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *in, size_t len)
{
char *wrkstr, *output = NULL;
size_t wrklen = len, outlen;
unsigned int i, likelyutf = 0;
if (!in)
return NULL;
/* get a working copy */
wrkstr = cli_calloc(len+1, sizeof(char));
if (!wrkstr)
return NULL;
memcpy(wrkstr, in, len);
//cli_errmsg("pdf_final: start(%d): %s\n", wrklen, wrkstr);
/* convert PDF specific escape sequences, like octal sequences */
/* TODO: replace the escape sequences directly in the wrkstr */
if (strchr(wrkstr, '\\')) {
output = cli_calloc(wrklen+1, sizeof(char));
if (!output)
return NULL;
outlen = 0;
for (i = 0; i < wrklen; ++i) {
if ((i+1 < wrklen) && wrkstr[i] == '\\') {
if ((i+3 < wrklen) &&
(isdigit(wrkstr[i+1]) && isdigit(wrkstr[i+2]) && isdigit(wrkstr[i+3]))) {
/* octal sequence */
char octal[4], *check;
unsigned long value;
memcpy(octal, &wrkstr[i+1], 3);
octal[3] = '\0';
value = (char)strtoul(octal, &check, 8);
/* check if all characters were converted */
if (check == &octal[3])
output[outlen++] = value;
i += 3; /* 4 with for loop [\ddd] */
} else {
/* other sequences */
switch(wrkstr[i+1]) {
case 'n':
output[outlen++] = 0x0a;
break;
case 'r':
output[outlen++] = 0x0d;
break;
case 't':
output[outlen++] = 0x09;
break;
case 'b':
output[outlen++] = 0x08;
break;
case 'f':
output[outlen++] = 0x0c;
break;
case '(':
output[outlen++] = 0x28;
break;
case ')':
output[outlen++] = 0x29;
break;
case '\\':
output[outlen++] = 0x5c;
break;
default:
/* IGNORE THE REVERSE SOLIDUS - PDF3000-2008 */
break;
}
i += 1; /* 2 with for loop [\c] */
}
} else {
output[outlen++] = wrkstr[i];
}
}
free(wrkstr);
wrkstr = cli_strdup(output);
free(output);
wrklen = outlen;
}
//cli_errmsg("pdf_final: escaped(%d): %s\n", wrklen, wrkstr);
/* check for encryption and decrypt */
if (pdf->flags & (1 << ENCRYPTED_PDF))
{
off_t tmpsz = (off_t)wrklen;
output = pdf_decrypt_string(pdf, obj, wrkstr, &tmpsz);
outlen = (size_t)tmpsz;
free(wrkstr);
if (output) {
wrkstr = cli_calloc(outlen+1, sizeof(char));
if (!wrkstr) {
free(output);
return NULL;
}
memcpy(wrkstr, output, outlen);
free(output);
wrklen = outlen;
} else {
return NULL;
}
}
//cli_errmsg("pdf_final: decrypt(%d): %s\n", wrklen, wrkstr);
/* check for UTF-* and convert to UTF-8 */
for (i = 0; i < wrklen; ++i) {
if (((unsigned char)wrkstr[i] > (unsigned char)0x7f) || (wrkstr[i] == '\0')) {
likelyutf = 1;
break;
}
}
if (likelyutf) {
output = pdf_convert_utf(wrkstr, wrklen);
free(wrkstr);
wrkstr = output;
}
//cli_errmsg("pdf_final: postutf(%d): %s\n", wrklen, wrkstr);
return wrkstr;
}
char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar, struct pdf_stats_metadata *meta)
{
const char *q = objstart;
char *p1, *p2;
size_t len, checklen;
char *res;
int likelyutf = 0;
char *res = NULL;
uint32_t objid;
size_t i;
@ -294,8 +385,6 @@ char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
* Fourth, Attempt to decode from UTF-* to UTF-8
*/
res = NULL;
if (str) {
checklen = strlen(str);
@ -407,26 +496,29 @@ char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
switch (*p3) {
case '(':
case '<':
res = pdf_parse_string(pdf, obj, p3, objsize2, NULL, NULL);
free(begin);
res = pdf_parse_string(pdf, obj, p3, objsize2, NULL, NULL, meta);
break;
default:
for (i=0; i < objsize2; i++) {
if (p3[i] >= 0x7f) {
likelyutf=1;
break;
}
}
res = likelyutf ? pdf_convert_utf(p3, objsize2) : NULL;
if (!(res)) {
res = begin;
res = pdf_finalize_string(pdf, obj, begin, objsize2);
if (!res) {
res = cli_calloc(1, objsize2+1);
if (!(res))
return NULL;
memcpy(res, begin, objsize2);
res[objsize2] = '\0';
} else {
free(begin);
if (meta) {
meta->length = objsize2;
meta->obj = obj;
meta->success = 0;
}
} else if (meta) {
meta->length = strlen(res);
meta->obj = obj;
meta->success = 1;
}
}
free(begin);
}
close(fd);
@ -471,9 +563,6 @@ char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
while (p2 < objstart + objsize) {
int shouldbreak=0;
if (!likelyutf && (*((unsigned char *)p2) > (unsigned char)0x7f || *p2 == '\0'))
likelyutf = 1;
switch (*p2) {
case '\\':
p2++;
@ -496,22 +585,25 @@ char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
len = (size_t)(p2 - p1) + 1;
if (likelyutf == 0) {
/* We're not UTF-*, so just make a copy of the string and return that */
res = pdf_finalize_string(pdf, obj, p1, len);
if (!res) {
res = cli_calloc(1, len+1);
if (!(res))
return NULL;
memcpy(res, p1, len);
res[len] = '\0';
if (endchar)
*endchar = p2;
return res;
if (meta) {
meta->length = len;
meta->obj = obj;
meta->success = 0;
}
} else if (meta) {
meta->length = strlen(res);
meta->obj = obj;
meta->success = 1;
}
res = pdf_convert_utf(p1, len);
if (res && endchar)
*endchar = p2;
@ -672,7 +764,7 @@ struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, siz
switch (begin[0]) {
case '(':
val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &p1);
val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &p1, NULL);
begin = p1+2;
break;
case '[':
@ -688,7 +780,7 @@ struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, siz
}
}
val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &p1);
val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &p1, NULL);
begin = p1+2;
break;
default:
@ -870,7 +962,7 @@ struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, s
/* Not a dictionary. Intentially fall through. */
case '(':
val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &begin);
val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &begin, NULL);
begin += 2;
break;
case '[':

View file

@ -1642,7 +1642,7 @@ int cli_scanpe(cli_ctx *ctx)
if(exe_sections[0].rsz > CLI_MAX_ALLOCATION)
break;
if(!exe_sections[0].rsz)
if(exe_sections[0].rsz < 5)
break;
if(!(code=fmap_need_off_once(map, exe_sections[0].raw, exe_sections[0].rsz)))
break;
@ -2457,7 +2457,21 @@ int cli_scanpe(cli_ctx *ctx)
for(i = 0 ; i < nsections; i++) {
if(exe_sections[i].raw) {
if(!exe_sections[i].rsz || (unsigned int)fmap_readn(map, dest + exe_sections[i].rva - min, exe_sections[i].raw, exe_sections[i].ursz) != exe_sections[i].ursz) {
unsigned int r_ret;
if (!exe_sections[i].rsz)
goto out_no_petite;
if (!CLI_ISCONTAINED(dest, dsize,
dest + exe_sections[i].rva - min,
exe_sections[i].ursz))
goto out_no_petite;
r_ret = fmap_readn(map, dest + exe_sections[i].rva - min,
exe_sections[i].raw,
exe_sections[i].ursz);
if (r_ret != exe_sections[i].ursz) {
out_no_petite:
free(exe_sections);
free(dest);
return CL_CLEAN;

View file

@ -393,6 +393,11 @@ int petite_inflate2x_1to9(char *buf, uint32_t minrva, uint32_t bufsz, struct cli
free(usects);
return 1;
}
if (backbytes >= INT_MAX / 2) {
free(usects);
cli_dbgmsg("Petite: probably invalid file\n");
return 1;
}
backbytes = backbytes*2 + oob;
if ( (oob = doubledl(&ssrc, &mydl, buf, bufsz)) == -1 ) {
free(usects);
@ -409,6 +414,11 @@ int petite_inflate2x_1to9(char *buf, uint32_t minrva, uint32_t bufsz, struct cli
free(usects);
return 1;
}
if (backbytes >= INT_MAX / 2) {
free(usects);
cli_dbgmsg("Petite: probably invalid file\n");
return 1;
}
backbytes = backbytes*2 + oob;
backsize--;
} while (backsize);

View file

@ -146,63 +146,63 @@ int cli_rebuildpe_align(char *buffer, struct cli_exe_section *sections, int sect
if(datasize > CLI_MAX_ALLOCATION)
return 0;
if((pefile = (char *) cli_calloc(rawbase+datasize, 1))) {
memcpy(pefile, HEADERS, 0x148);
pefile = (char *) cli_calloc(rawbase+datasize, 1);
if(!pefile)
return 0;
datasize = PESALIGN(rawbase, 0x1000);
memcpy(pefile, HEADERS, 0x148);
fakepe = (struct IMAGE_PE_HEADER *)(pefile+0xd0);
fakepe->NumberOfSections = EC16(sects+gotghost);
fakepe->AddressOfEntryPoint = EC32(ep);
fakepe->ImageBase = EC32(base);
fakepe->SizeOfHeaders = EC32(rawbase);
memset(pefile+0x148, 0, 0x80);
cli_writeint32(pefile+0x148+0x10, ResRva);
cli_writeint32(pefile+0x148+0x14, ResSize);
curpe = pefile+0x148+0x80;
datasize = PESALIGN(rawbase, 0x1000);
if (gotghost) {
fakepe = (struct IMAGE_PE_HEADER *)(pefile+0xd0);
fakepe->NumberOfSections = EC16(sects+gotghost);
fakepe->AddressOfEntryPoint = EC32(ep);
fakepe->ImageBase = EC32(base);
fakepe->SizeOfHeaders = EC32(rawbase);
memset(pefile+0x148, 0, 0x80);
cli_writeint32(pefile+0x148+0x10, ResRva);
cli_writeint32(pefile+0x148+0x14, ResSize);
curpe = pefile+0x148+0x80;
if (gotghost) {
snprintf(curpe, 8, "empty");
cli_writeint32(curpe+8, sections[0].rva-datasize); /* vsize */
cli_writeint32(curpe+12, datasize); /* rva */
cli_writeint32(curpe+0x24, 0xffffffff);
curpe+=40;
datasize+=PESALIGN(sections[0].rva-datasize, 0x1000);
}
}
for (i=0; i < sects; i++) {
for (i=0; i < sects; i++) {
snprintf(curpe, 8, ".clam%.2d", i+1);
if (!align) {
cli_writeint32(curpe+8, sections[i].vsz);
cli_writeint32(curpe+12, sections[i].rva);
cli_writeint32(curpe+16, sections[i].rsz);
cli_writeint32(curpe+20, rawbase);
cli_writeint32(curpe+8, sections[i].vsz);
cli_writeint32(curpe+12, sections[i].rva);
cli_writeint32(curpe+16, sections[i].rsz);
cli_writeint32(curpe+20, rawbase);
} else {
cli_writeint32(curpe+8, PESALIGN(sections[i].vsz, align));
cli_writeint32(curpe+12, PESALIGN(sections[i].rva, align));
cli_writeint32(curpe+16, PESALIGN(sections[i].rsz, align));
cli_writeint32(curpe+20, rawbase);
cli_writeint32(curpe+8, PESALIGN(sections[i].vsz, align));
cli_writeint32(curpe+12, PESALIGN(sections[i].rva, align));
cli_writeint32(curpe+16, PESALIGN(sections[i].rsz, align));
cli_writeint32(curpe+20, rawbase);
}
/* already zeroed
cli_writeint32(curpe+24, 0);
cli_writeint32(curpe+28, 0);
cli_writeint32(curpe+32, 0);
cli_writeint32(curpe+24, 0);
cli_writeint32(curpe+28, 0);
cli_writeint32(curpe+32, 0);
*/
cli_writeint32(curpe+0x24, 0xffffffff);
memcpy(pefile+rawbase, buffer+sections[i].raw, sections[i].rsz);
curpe+=40;
if (!align) {
rawbase+=PESALIGN(sections[i].rsz, 0x200);
datasize+=PESALIGN(sections[i].vsz, 0x1000);
rawbase+=PESALIGN(sections[i].rsz, 0x200);
datasize+=PESALIGN(sections[i].vsz, 0x1000);
} else {
rawbase+=PESALIGN(PESALIGN(sections[i].rsz, align), 0x200);
datasize+=PESALIGN(PESALIGN(sections[i].vsz, align), 0x1000);
rawbase+=PESALIGN(PESALIGN(sections[i].rsz, align), 0x200);
datasize+=PESALIGN(PESALIGN(sections[i].vsz, align), 0x1000);
}
}
fakepe->SizeOfImage = EC32(datasize);
} else {
return 0;
}
fakepe->SizeOfImage = EC32(datasize);
i = (cli_writen(file, pefile, rawbase)!=-1);
free(pefile);

View file

@ -105,6 +105,7 @@
#include "ooxml.h"
#include "xdp.h"
#include "json_api.h"
#include "msxml.h"
#ifdef HAVE_BZLIB_H
#include <bzlib.h>
@ -2212,6 +2213,12 @@ static int cli_scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_file_
case CL_TYPE_XDP:
ret = cli_scanxdp(ctx);
break;
case CL_TYPE_XML_WORD:
ret = cli_scanmsxml(ctx);
break;
case CL_TYPE_XML_XL:
ret = cli_scanmsxml(ctx);
break;
case CL_TYPE_RARSFX:
if(type != CL_TYPE_RAR && have_rar && SCAN_ARCHIVE && (DCONF_ARCH & ARCH_CONF_RAR)) {
char *tmpname = NULL;
@ -2602,7 +2609,9 @@ static int magic_scandesc(cli_ctx *ctx, cli_file_t type)
//type == CL_TYPE_ZIP ||
type == CL_TYPE_OOXML_WORD ||
type == CL_TYPE_OOXML_PPT ||
type == CL_TYPE_OOXML_XL) {
type == CL_TYPE_OOXML_XL ||
type == CL_TYPE_XML_WORD ||
type == CL_TYPE_XML_XL) {
ctx->properties = json_object_new_object();
if (NULL == ctx->properties) {
cli_errmsg("magic_scandesc: no memory for json properties object\n");
@ -2750,6 +2759,14 @@ static int magic_scandesc(cli_ctx *ctx, cli_file_t type)
case CL_TYPE_IGNORED:
break;
case CL_TYPE_XML_WORD:
ret = cli_scanmsxml(ctx);
break;
case CL_TYPE_XML_XL:
ret = cli_scanmsxml(ctx);
break;
case CL_TYPE_XDP:
ret = cli_scanxdp(ctx);
break;
@ -3467,15 +3484,46 @@ static int scan_common(int desc, cl_fmap_t *map, const char **virname, unsigned
int ret = CL_SUCCESS;
cli_dbgmsg("%s\n", jstring);
/* Scan the json string unless a virus was detected */
if (rc != CL_VIRUS) {
ctx.options &= ~CL_SCAN_FILE_PROPERTIES;
rc = cli_mem_scandesc(jstring, strlen(jstring), &ctx);
/* run bytecode preclass hook; generate fmap if needed for running hook */
struct cli_bc_ctx *bc_ctx = cli_bytecode_context_alloc();
if (!bc_ctx) {
cli_errmsg("scan_common: can't allocate memory for bc_ctx\n");
rc = CL_EMEM;
}
else {
fmap_t *pc_map = map;
if (!pc_map) {
perf_start(&ctx, PERFT_MAP);
if(!(pc_map = fmap(desc, 0, sb.st_size))) {
perf_stop(&ctx, PERFT_MAP);
rc = CL_EMEM;
}
perf_stop(&ctx, PERFT_MAP);
}
if (pc_map) {
cli_bytecode_context_setctx(bc_ctx, &ctx);
rc = cli_bytecode_runhook(&ctx, ctx.engine, bc_ctx, BC_PRECLASS, pc_map);
cli_bytecode_context_destroy(bc_ctx);
if (!map)
funmap(pc_map);
}
}
/* backwards compatibility: scan the json string unless a virus was detected */
if (rc != CL_VIRUS && ctx.engine->root[13]->ac_lsigs) {
cli_warnmsg("scan_common: running depeciated preclass bytecodes for target type 13\n");
ctx.options &= ~CL_SCAN_FILE_PROPERTIES;
rc = cli_mem_scandesc(jstring, strlen(jstring), &ctx);
}
}
/* Invoke file props callback */
if (ctx.engine->cb_file_props != NULL) {
ret = ctx.engine->cb_file_props(jstring, rc, ctx.engine->cb_file_props_data);
ret = ctx.engine->cb_file_props(jstring, rc, ctx.cb_ctx);
if (ret != CL_SUCCESS)
rc = ret;
}

View file

@ -690,3 +690,49 @@ char *cli_utf16_to_utf8(const char *utf16, size_t length, utf16_type type)
s2[j] = '\0';
return s2;
}
int cli_isutf8(const char *buf, unsigned int len)
{
unsigned int i, j;
for(i = 0; i < len; i++) {
if((buf[i] & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */
continue;
} else if((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */
return 0;
} else {
unsigned int following;
if((buf[i] & 0x20) == 0) { /* 110xxxxx */
/* c = buf[i] & 0x1f; */
following = 1;
} else if((buf[i] & 0x10) == 0) { /* 1110xxxx */
/* c = buf[i] & 0x0f; */
following = 2;
} else if((buf[i] & 0x08) == 0) { /* 11110xxx */
/* c = buf[i] & 0x07; */
following = 3;
} else if((buf[i] & 0x04) == 0) { /* 111110xx */
/* c = buf[i] & 0x03; */
following = 4;
} else if((buf[i] & 0x02) == 0) { /* 1111110x */
/* c = buf[i] & 0x01; */
following = 5;
} else {
return 0;
}
for(j = 0; j < following; j++) {
if(++i >= len)
return 0;
if((buf[i] & 0x80) == 0 || (buf[i] & 0x40))
return 0;
/* c = (c << 6) + (buf[i] & 0x3f); */
}
}
}
return 1;
}

View file

@ -64,5 +64,7 @@ typedef enum {
} utf16_type;
char *cli_utf16_to_utf8(const char *utf16, size_t length, utf16_type type);
int cli_isutf8(const char *buf, unsigned int len);
size_t cli_strlcat(char *dst, const char *src, size_t sz); /* libclamav/strlcat.c */
#endif

View file

@ -39,7 +39,7 @@
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/stat.h>
#ifdef HAVE_UNISTD_H
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <time.h>
@ -49,68 +49,69 @@
#include "swf.h"
#include "clamav.h"
#include "scanners.h"
#include "lzma_iface.h"
#define EC16(v) le16_to_host(v)
#define EC32(v) le32_to_host(v)
#define EC16(v) le16_to_host(v)
#define EC32(v) le32_to_host(v)
#define INITBITS \
{ \
if(fmap_readn(map, &get_c, offset, sizeof(get_c)) == sizeof(get_c)) { \
bitpos = 8; \
bitbuf = (unsigned int) get_c; \
offset += sizeof(get_c); \
} else { \
cli_warnmsg("cli_scanswf: INITBITS: Can't read file or file truncated\n"); \
return CL_EFORMAT; \
} \
#define INITBITS \
{ \
if(fmap_readn(map, &get_c, offset, sizeof(get_c)) == sizeof(get_c)) { \
bitpos = 8; \
bitbuf = (unsigned int) get_c; \
offset += sizeof(get_c); \
} else { \
cli_warnmsg("cli_scanswf: INITBITS: Can't read file or file truncated\n"); \
return CL_EFORMAT; \
} \
}
#define GETBITS(v, n) \
{ \
getbits_n = n; \
bits = 0; \
while(getbits_n > bitpos) { \
getbits_n -= bitpos; \
bits |= bitbuf << getbits_n; \
if(fmap_readn(map, &get_c, offset, sizeof(get_c)) == sizeof(get_c)) { \
bitbuf = (unsigned int) get_c; \
bitpos = 8; \
offset += sizeof(get_c); \
} else { \
cli_warnmsg("cli_scanswf: GETBITS: Can't read file or file truncated\n"); \
return CL_EFORMAT; \
} \
} \
bitpos -= getbits_n; \
bits |= bitbuf >> bitpos; \
bitbuf &= 0xff >> (8 - bitpos); \
v = bits & 0xffff; \
#define GETBITS(v, n) \
{ \
getbits_n = n; \
bits = 0; \
while(getbits_n > bitpos) { \
getbits_n -= bitpos; \
bits |= bitbuf << getbits_n; \
if(fmap_readn(map, &get_c, offset, sizeof(get_c)) == sizeof(get_c)) { \
bitbuf = (unsigned int) get_c; \
bitpos = 8; \
offset += sizeof(get_c); \
} else { \
cli_warnmsg("cli_scanswf: GETBITS: Can't read file or file truncated\n"); \
return CL_EFORMAT; \
} \
} \
bitpos -= getbits_n; \
bits |= bitbuf >> bitpos; \
bitbuf &= 0xff >> (8 - bitpos); \
v = bits & 0xffff; \
}
#define GETWORD(v) \
{ \
if(fmap_readn(map, &get_c, offset, sizeof(get_c)) == sizeof(get_c)) { \
getword_1 = (unsigned int) get_c; \
offset += sizeof(get_c); \
} else { \
cli_warnmsg("cli_scanswf: GETWORD: Can't read file or file truncated\n"); \
return CL_EFORMAT; \
} \
if(fmap_readn(map, &get_c, offset, sizeof(get_c)) == sizeof(get_c)) { \
getword_2 = (unsigned int) get_c; \
offset += sizeof(get_c); \
} else { \
cli_warnmsg("cli_scanswf: GETWORD: Can't read file or file truncated\n"); \
return CL_EFORMAT; \
} \
v = (uint16_t)(getword_1 & 0xff) | ((getword_2 & 0xff) << 8); \
#define GETWORD(v) \
{ \
if(fmap_readn(map, &get_c, offset, sizeof(get_c)) == sizeof(get_c)) { \
getword_1 = (unsigned int) get_c; \
offset += sizeof(get_c); \
} else { \
cli_warnmsg("cli_scanswf: GETWORD: Can't read file or file truncated\n"); \
return CL_EFORMAT; \
} \
if(fmap_readn(map, &get_c, offset, sizeof(get_c)) == sizeof(get_c)) { \
getword_2 = (unsigned int) get_c; \
offset += sizeof(get_c); \
} else { \
cli_warnmsg("cli_scanswf: GETWORD: Can't read file or file truncated\n"); \
return CL_EFORMAT; \
} \
v = (uint16_t)(getword_1 & 0xff) | ((getword_2 & 0xff) << 8); \
}
#define GETDWORD(v) \
{ \
GETWORD(getdword_1); \
GETWORD(getdword_2); \
v = (uint32_t)(getdword_1 | (getdword_2 << 16)); \
#define GETDWORD(v) \
{ \
GETWORD(getdword_1); \
GETWORD(getdword_2); \
v = (uint32_t)(getdword_1 | (getdword_2 << 16)); \
}
struct swf_file_hdr {
@ -119,30 +120,200 @@ struct swf_file_hdr {
uint32_t filesize;
};
static int scancws(cli_ctx *ctx, struct swf_file_hdr *hdr)
static int scanzws(cli_ctx *ctx, struct swf_file_hdr *hdr)
{
z_stream stream;
char inbuff[FILEBUFF], outbuff[FILEBUFF];
fmap_t *map = *ctx->fmap;
int offset = 8, ret, zret, outsize = 8, count, zend;
char *tmpname;
int fd;
struct CLI_LZMA lz;
unsigned char inbuff[FILEBUFF], outbuff[FILEBUFF];
fmap_t *map = *ctx->fmap;
/* strip off header */
off_t offset = 8;
uint32_t d_insize;
size_t outsize = 8;
int ret, lret, count;
char *tmpname;
int fd;
if((ret = cli_gentempfd(ctx->engine->tmpdir, &tmpname, &fd)) != CL_SUCCESS) {
cli_errmsg("scancws: Can't generate temporary file\n");
return ret;
cli_errmsg("scanzws: Can't generate temporary file\n");
return ret;
}
hdr->signature[0] = 'F';
if(cli_writen(fd, hdr, sizeof(struct swf_file_hdr)) != sizeof(struct swf_file_hdr)) {
cli_errmsg("scancws: Can't write to file %s\n", tmpname);
cli_errmsg("scanzws: Can't write to file %s\n", tmpname);
close(fd);
if(cli_unlink(tmpname)) {
free(tmpname);
return CL_EUNLINK;
}
free(tmpname);
return CL_EWRITE;
if(cli_unlink(tmpname)) {
free(tmpname);
return CL_EUNLINK;
}
free(tmpname);
return CL_EWRITE;
}
/* read 4 bytes (for compressed 32-bit filesize) [not used for LZMA] */
if (fmap_readn(map, &d_insize, offset, sizeof(d_insize)) != sizeof(d_insize)) {
cli_errmsg("scanzws: Error reading SWF file\n");
close(fd);
if (cli_unlink(tmpname)) {
free(tmpname);
return CL_EUNLINK;
}
free(tmpname);
return CL_EREAD;
}
offset += sizeof(d_insize);
/* check if declared input size matches actual output size */
/* map->len = header (8 bytes) + d_insize (4 bytes) + flags (5 bytes) + compressed stream */
if (d_insize != (map->len - 17)) {
cli_warnmsg("SWF: declared input length != compressed stream size, %u != %llu\n",
d_insize, (long long unsigned)(map->len - 17));
} else {
cli_dbgmsg("SWF: declared input length == compressed stream size, %u == %llu\n",
d_insize, (long long unsigned)(map->len - 17));
}
/* first buffer required for initializing LZMA */
ret = fmap_readn(map, inbuff, offset, FILEBUFF);
if (ret < 0) {
cli_errmsg("scanzws: Error reading SWF file\n");
close(fd);
if (cli_unlink(tmpname)) {
free(tmpname);
return CL_EUNLINK;
}
free(tmpname);
return CL_EUNPACK;
}
if (!ret)
return CL_EFORMAT; /* likely truncated */
offset += ret;
memset(&lz, 0, sizeof(lz));
lz.next_in = inbuff;
lz.next_out = outbuff;
lz.avail_in = ret;
lz.avail_out = FILEBUFF;
lret = cli_LzmaInit(&lz, hdr->filesize);
if (lret != LZMA_RESULT_OK) {
cli_errmsg("scanzws: LzmaInit() failed\n");
close(fd);
if (cli_unlink(tmpname)) {
free(tmpname);
return CL_EUNLINK;
}
free(tmpname);
return CL_EUNPACK;
}
while (lret == LZMA_RESULT_OK) {
if (lz.avail_in == 0) {
lz.next_in = inbuff;
ret = fmap_readn(map, inbuff, offset, FILEBUFF);
if (ret < 0) {
cli_errmsg("scanzws: Error reading SWF file\n");
cli_LzmaShutdown(&lz);
close(fd);
if (cli_unlink(tmpname)) {
free(tmpname);
return CL_EUNLINK;
}
free(tmpname);
return CL_EUNPACK;
}
if (!ret)
break;
lz.avail_in = ret;
offset += ret;
}
lret = cli_LzmaDecode(&lz);
count = FILEBUFF - lz.avail_out;
if (count) {
if (cli_checklimits("SWF", ctx, outsize + count, 0, 0) != CL_SUCCESS)
break;
if (cli_writen(fd, outbuff, count) != count) {
cli_errmsg("scanzws: Can't write to file %s\n", tmpname);
cli_LzmaShutdown(&lz);
close(fd);
if (cli_unlink(tmpname)) {
free(tmpname);
return CL_EUNLINK;
}
free(tmpname);
return CL_EWRITE;
}
outsize += count;
}
lz.next_out = outbuff;
lz.avail_out = FILEBUFF;
}
cli_LzmaShutdown(&lz);
if (lret != LZMA_STREAM_END && lret != LZMA_RESULT_OK) {
/* outsize starts at 8, therefore, if its still 8, nothing was decompressed */
if (outsize == 8) {
cli_infomsg(ctx, "scanzws: Error decompressing SWF file. No data decompressed.\n");
close(fd);
if (cli_unlink(tmpname)) {
free(tmpname);
return CL_EUNLINK;
}
free(tmpname);
return CL_EUNPACK;
}
cli_infomsg(ctx, "scanzws: Error decompressing SWF file. Scanning what was decompressed.\n");
}
cli_dbgmsg("SWF: Decompressed[LZMA] to %s, size %d\n", tmpname, outsize);
/* check if declared output size matches actual output size */
if (hdr->filesize != outsize) {
cli_warnmsg("SWF: declared output length != inflated stream size, %u != %llu\n",
hdr->filesize, (long long unsigned)outsize);
} else {
cli_dbgmsg("SWF: declared output length == inflated stream size, %u == %llu\n",
hdr->filesize, (long long unsigned)outsize);
}
ret = cli_magic_scandesc(fd, ctx);
close(fd);
if (!(ctx->engine->keeptmp)) {
if (cli_unlink(tmpname)) {
free(tmpname);
return CL_EUNLINK;
}
}
free(tmpname);
return ret;
}
static int scancws(cli_ctx *ctx, struct swf_file_hdr *hdr)
{
z_stream stream;
char inbuff[FILEBUFF], outbuff[FILEBUFF];
fmap_t *map = *ctx->fmap;
int offset = 8, ret, zret, outsize = 8, count, zend;
char *tmpname;
int fd;
if((ret = cli_gentempfd(ctx->engine->tmpdir, &tmpname, &fd)) != CL_SUCCESS) {
cli_errmsg("scancws: Can't generate temporary file\n");
return ret;
}
hdr->signature[0] = 'F';
if(cli_writen(fd, hdr, sizeof(struct swf_file_hdr)) != sizeof(struct swf_file_hdr)) {
cli_errmsg("scancws: Can't write to file %s\n", tmpname);
close(fd);
if(cli_unlink(tmpname)) {
free(tmpname);
return CL_EUNLINK;
}
free(tmpname);
return CL_EWRITE;
}
stream.avail_in = 0;
@ -155,56 +326,56 @@ static int scancws(cli_ctx *ctx, struct swf_file_hdr *hdr)
zret = inflateInit(&stream);
if(zret != Z_OK) {
cli_errmsg("scancws: inflateInit() failed\n");
cli_errmsg("scancws: inflateInit() failed\n");
close(fd);
if(cli_unlink(tmpname)) {
free(tmpname);
return CL_EUNLINK;
}
free(tmpname);
return CL_EUNPACK;
if(cli_unlink(tmpname)) {
free(tmpname);
return CL_EUNLINK;
}
free(tmpname);
return CL_EUNPACK;
}
do {
if(stream.avail_in == 0) {
stream.next_in = (Bytef *)inbuff;
ret = fmap_readn(map, inbuff, offset, FILEBUFF);
if(ret < 0) {
cli_errmsg("scancws: Error reading SWF file\n");
close(fd);
if(cli_unlink(tmpname)) {
free(tmpname);
inflateEnd(&stream);
return CL_EUNLINK;
}
free(tmpname);
inflateEnd(&stream);
return CL_EUNPACK;
}
if(!ret)
break;
stream.avail_in = ret;
offset += ret;
}
zret = inflate(&stream, Z_SYNC_FLUSH);
count = FILEBUFF - stream.avail_out;
if(count) {
if(cli_checklimits("SWF", ctx, outsize + count, 0, 0) != CL_SUCCESS)
break;
if(cli_writen(fd, outbuff, count) != count) {
cli_errmsg("scancws: Can't write to file %s\n", tmpname);
close(fd);
if(cli_unlink(tmpname)) {
free(tmpname);
return CL_EUNLINK;
}
free(tmpname);
return CL_EWRITE;
}
outsize += count;
}
stream.next_out = (Bytef *)outbuff;
stream.avail_out = FILEBUFF;
if(stream.avail_in == 0) {
stream.next_in = (Bytef *)inbuff;
ret = fmap_readn(map, inbuff, offset, FILEBUFF);
if(ret < 0) {
cli_errmsg("scancws: Error reading SWF file\n");
close(fd);
inflateEnd(&stream);
if(cli_unlink(tmpname)) {
free(tmpname);
return CL_EUNLINK;
}
free(tmpname);
return CL_EUNPACK;
}
if(!ret)
break;
stream.avail_in = ret;
offset += ret;
}
zret = inflate(&stream, Z_SYNC_FLUSH);
count = FILEBUFF - stream.avail_out;
if(count) {
if(cli_checklimits("SWF", ctx, outsize + count, 0, 0) != CL_SUCCESS)
break;
if(cli_writen(fd, outbuff, count) != count) {
cli_errmsg("scancws: Can't write to file %s\n", tmpname);
inflateEnd(&stream);
close(fd);
if(cli_unlink(tmpname)) {
free(tmpname);
return CL_EUNLINK;
}
free(tmpname);
return CL_EWRITE;
}
outsize += count;
}
stream.next_out = (Bytef *)outbuff;
stream.avail_out = FILEBUFF;
} while(zret == Z_OK);
zend = inflateEnd(&stream);
@ -226,16 +397,25 @@ static int scancws(cli_ctx *ctx, struct swf_file_hdr *hdr)
}
cli_infomsg(ctx, "scancws: Error decompressing SWF file. Scanning what was decompressed.\n");
}
cli_dbgmsg("SWF: Decompressed to %s, size %d\n", tmpname, outsize);
cli_dbgmsg("SWF: Decompressed[zlib] to %s, size %d\n", tmpname, outsize);
/* check if declared output size matches actual output size */
if (hdr->filesize != outsize) {
cli_warnmsg("SWF: declared output length != inflated stream size, %u != %llu\n",
hdr->filesize, (long long unsigned)outsize);
} else {
cli_dbgmsg("SWF: declared output length == inflated stream size, %u == %llu\n",
hdr->filesize, (long long unsigned)outsize);
}
ret = cli_magic_scandesc(fd, ctx);
close(fd);
if(!ctx->engine->keeptmp) {
if(cli_unlink(tmpname)) {
free(tmpname);
return CL_EUNLINK;
}
if(cli_unlink(tmpname)) {
free(tmpname);
return CL_EUNLINK;
}
}
free(tmpname);
return ret;
@ -243,11 +423,11 @@ static int scancws(cli_ctx *ctx, struct swf_file_hdr *hdr)
static const char *tagname(tag_id id)
{
unsigned int i;
unsigned int i;
for(i = 0; tag_names[i].name; i++)
if(tag_names[i].id == id)
return tag_names[i].name;
if(tag_names[i].id == id)
return tag_names[i].name;
return NULL;
}
@ -265,19 +445,22 @@ int cli_scanswf(cli_ctx *ctx)
cli_dbgmsg("in cli_scanswf()\n");
if(fmap_readn(map, &file_hdr, offset, sizeof(file_hdr)) != sizeof(file_hdr)) {
cli_dbgmsg("SWF: Can't read file header\n");
return CL_CLEAN;
cli_dbgmsg("SWF: Can't read file header\n");
return CL_CLEAN;
}
offset += sizeof(file_hdr);
if(!strncmp(file_hdr.signature, "CWS", 3)) {
cli_dbgmsg("SWF: Compressed file\n");
return scancws(ctx, &file_hdr);
cli_dbgmsg("SWF: zlib compressed file\n");
return scancws(ctx, &file_hdr);
} else if(!strncmp(file_hdr.signature, "ZWS", 3)) {
cli_dbgmsg("SWF: LZMA compressed file\n");
return scanzws(ctx, &file_hdr);
} else if(!strncmp(file_hdr.signature, "FWS", 3)) {
cli_dbgmsg("SWF: Uncompressed file\n");
cli_dbgmsg("SWF: Uncompressed file\n");
} else {
cli_dbgmsg("SWF: Not a SWF file\n");
return CL_CLEAN;
cli_dbgmsg("SWF: Not a SWF file\n");
return CL_CLEAN;
}
cli_dbgmsg("SWF: Version: %u\n", file_hdr.version);
@ -306,62 +489,62 @@ int cli_scanswf(cli_ctx *ctx)
}
while(offset < map->len) {
GETWORD(tag_hdr);
tag_type = tag_hdr >> 6;
if(tag_type == 0)
break;
tag_len = tag_hdr & 0x3f;
if(tag_len == 0x3f)
GETDWORD(tag_len);
GETWORD(tag_hdr);
tag_type = tag_hdr >> 6;
if(tag_type == 0)
break;
tag_len = tag_hdr & 0x3f;
if(tag_len == 0x3f)
GETDWORD(tag_len);
pt = tagname(tag_type);
cli_dbgmsg("SWF: %s\n", pt ? pt : "UNKNOWN TAG");
cli_dbgmsg("SWF: Tag length: %u\n", tag_len);
if (tag_len > map->len) {
cli_dbgmsg("SWF: Invalid tag length.\n");
return CL_EFORMAT;
}
if ((offset + tag_len) < offset) {
cli_warnmsg("SWF: Tag length too large.\n");
break;
}
if(!pt) {
offset += tag_len;
continue;
}
pt = tagname(tag_type);
cli_dbgmsg("SWF: %s\n", pt ? pt : "UNKNOWN TAG");
cli_dbgmsg("SWF: Tag length: %u\n", tag_len);
if (tag_len > map->len) {
cli_dbgmsg("SWF: Invalid tag length.\n");
return CL_EFORMAT;
}
if ((offset + tag_len) < offset) {
cli_warnmsg("SWF: Tag length too large.\n");
break;
}
if(!pt) {
offset += tag_len;
continue;
}
switch(tag_type) {
case TAG_SCRIPTLIMITS: {
unsigned int recursion, timeout;
GETWORD(recursion);
GETWORD(timeout);
cli_dbgmsg("SWF: scriptLimits recursion %u timeout %u\n", recursion, timeout);
break;
}
switch(tag_type) {
case TAG_SCRIPTLIMITS: {
unsigned int recursion, timeout;
GETWORD(recursion);
GETWORD(timeout);
cli_dbgmsg("SWF: scriptLimits recursion %u timeout %u\n", recursion, timeout);
break;
}
case TAG_FILEATTRIBUTES:
GETDWORD(val);
cli_dbgmsg("SWF: File attributes:\n");
if(val & SWF_ATTR_USENETWORK)
cli_dbgmsg(" * Use network\n");
if(val & SWF_ATTR_RELATIVEURLS)
cli_dbgmsg(" * Relative URLs\n");
if(val & SWF_ATTR_SUPPRESSCROSSDOMAINCACHE)
cli_dbgmsg(" * Suppress cross domain cache\n");
if(val & SWF_ATTR_ACTIONSCRIPT3)
cli_dbgmsg(" * ActionScript 3.0\n");
if(val & SWF_ATTR_HASMETADATA)
cli_dbgmsg(" * Has metadata\n");
if(val & SWF_ATTR_USEDIRECTBLIT)
cli_dbgmsg(" * Use hardware acceleration\n");
if(val & SWF_ATTR_USEGPU)
cli_dbgmsg(" * Use GPU\n");
break;
case TAG_FILEATTRIBUTES:
GETDWORD(val);
cli_dbgmsg("SWF: File attributes:\n");
if(val & SWF_ATTR_USENETWORK)
cli_dbgmsg(" * Use network\n");
if(val & SWF_ATTR_RELATIVEURLS)
cli_dbgmsg(" * Relative URLs\n");
if(val & SWF_ATTR_SUPPRESSCROSSDOMAINCACHE)
cli_dbgmsg(" * Suppress cross domain cache\n");
if(val & SWF_ATTR_ACTIONSCRIPT3)
cli_dbgmsg(" * ActionScript 3.0\n");
if(val & SWF_ATTR_HASMETADATA)
cli_dbgmsg(" * Has metadata\n");
if(val & SWF_ATTR_USEDIRECTBLIT)
cli_dbgmsg(" * Use hardware acceleration\n");
if(val & SWF_ATTR_USEGPU)
cli_dbgmsg(" * Use GPU\n");
break;
default:
offset += tag_len;
continue;
}
default:
offset += tag_len;
continue;
}
}
return CL_CLEAN;

View file

@ -302,6 +302,8 @@ int unupack(int upack, char *dest, uint32_t dsize, char *buff, uint32_t vma, uin
loc_esi += 4;
cli_dbgmsg("Upack: ecx counter: %08x\n", j);
if (((uint64_t)count+j) * 4 > UINT_MAX)
return -1;
if (!CLI_ISCONTAINED(dest, dsize, loc_esi, (j*4)) || !CLI_ISCONTAINED(dest, dsize, loc_edi, ((j+count)*4)))
return -1;
for (;j--; loc_edi+=4, loc_esi+=4)
@ -359,6 +361,8 @@ int unupack(int upack, char *dest, uint32_t dsize, char *buff, uint32_t vma, uin
loc_edi += 4;
loc_ebx = loc_edi;
if (((uint64_t)count+6) * 4 > UINT_MAX)
return -1;
if (!CLI_ISCONTAINED(dest, dsize, loc_edi, ((6+count)*4)))
return -1;
cli_writeint32(loc_edi, 0xffffffff);
@ -432,6 +436,13 @@ int unupack(int upack, char *dest, uint32_t dsize, char *buff, uint32_t vma, uin
section.rsz = end_edi-loc_edi;
section.vsz = end_edi-loc_edi;
/* bb#11282 - prevent dest+va/dest from passing an invalid dereference to cli_rebuildpe */
/* check should trigger on broken PE files where the section exists outside of the file */
if ((!upack && ((va + section.rsz) > dsize)) || (upack && (section.rsz > dsize))) {
cli_dbgmsg("Upack: Rebuilt section exceeds allocated buffer; breaks cli_rebuildpe() bb#11282\n");
return 0;
}
if (!cli_rebuildpe(dest + (upack?0:va), &section, 1, base, original_ep, 0, 0, file)) {
cli_dbgmsg("Upack: Rebuilding failed\n");
return 0;

View file

@ -128,13 +128,13 @@ static int pefromupx (const char *src, uint32_t ssize, char *dst, uint32_t *dsiz
return 0;
while ((valign=magic[sectcnt++])) {
if ( ep - upx1 + valign <= ssize-5 && /* Wondering how we got so far?! */
if (CLI_ISCONTAINED(src, ssize - 5, src + ep - upx1 + valign - 2, 2) &&
src[ep - upx1 + valign - 2] == '\x8d' && /* lea edi, ... */
src[ep - upx1 + valign - 1] == '\xbe' ) /* ... [esi + offset] */
break;
}
if (!valign && ep - upx1 + 0x80 < ssize-8) {
if (!valign && CLI_ISCONTAINED(src, ssize - 8, src + ep - upx1 + 0x80, 8)) {
const char *pt = &src[ep - upx1 + 0x80];
cli_dbgmsg("UPX: bad magic - scanning for imports\n");

View file

@ -186,7 +186,8 @@ static int xar_get_toc_data_values(xmlTextReaderPtr reader, long *length, long *
cli_dbgmsg("cli_scanxar: <archived-checksum>:\n");
xar_get_checksum_values(reader, a_cksum, a_hash);
} else if (xmlStrEqual(name, (const xmlChar *)"extracted-checksum") &&
} else if ((xmlStrEqual(name, (const xmlChar *)"extracted-checksum") ||
xmlStrEqual(name, (const xmlChar *)"unarchived-checksum")) &&
xmlTextReaderNodeType(reader) == XML_READER_TYPE_ELEMENT) {
cli_dbgmsg("cli_scanxar: <extracted-checksum>:\n");
xar_get_checksum_values(reader, e_cksum, e_hash);
@ -561,11 +562,7 @@ int cli_scanxar(cli_ctx *ctx)
a_hash_ctx = xar_hash_init(a_hash, &a_sc, &a_mc);
if (a_hash_ctx == NULL)
goto exit_tmpfile;
e_hash_ctx = xar_hash_init(e_hash, &e_sc, &e_mc);
if (e_hash_ctx == NULL)
goto exit_tmpfile;
switch (encoding) {
case CL_TYPE_GZ:
@ -606,7 +603,8 @@ int cli_scanxar(cli_ctx *ctx)
bytes = sizeof(buff) - strm.avail_out;
xar_hash_update(e_hash_ctx, buff, bytes, e_hash);
if (e_hash_ctx != NULL)
xar_hash_update(e_hash_ctx, buff, bytes, e_hash);
if (cli_writen(fd, buff, bytes) < 0) {
cli_dbgmsg("cli_scanxar: cli_writen error file %s.\n", tmpname);
@ -627,7 +625,8 @@ int cli_scanxar(cli_ctx *ctx)
break;
avail_in -= strm.avail_in;
xar_hash_update(a_hash_ctx, next_in, avail_in, a_hash);
if (a_hash_ctx != NULL)
xar_hash_update(a_hash_ctx, next_in, avail_in, a_hash);
}
inflateEnd(&strm);
@ -665,7 +664,8 @@ int cli_scanxar(cli_ctx *ctx)
lz.next_in = blockp;
lz.avail_in = CLI_LZMA_HDR_SIZE;
xar_hash_update(a_hash_ctx, blockp, CLI_LZMA_HDR_SIZE, a_hash);
if (a_hash_ctx != NULL)
xar_hash_update(a_hash_ctx, blockp, CLI_LZMA_HDR_SIZE, a_hash);
lret = cli_LzmaInit(&lz, 0);
if (lret != LZMA_RESULT_OK) {
@ -716,8 +716,10 @@ int cli_scanxar(cli_ctx *ctx)
cli_dbgmsg("cli_scanxar: cli_LzmaDecode() produces no output for "
"avail_in %lu, avail_out %lu.\n", avail_in, avail_out);
xar_hash_update(a_hash_ctx, next_in, in_consumed, a_hash);
xar_hash_update(e_hash_ctx, buff, avail_out, e_hash);
if (a_hash_ctx != NULL)
xar_hash_update(a_hash_ctx, next_in, in_consumed, a_hash);
if (e_hash_ctx != NULL)
xar_hash_update(e_hash_ctx, buff, avail_out, e_hash);
/* Write a decompressed block. */
/* cli_dbgmsg("Writing %li bytes to LZMA decompress temp file, " */
@ -770,7 +772,8 @@ int cli_scanxar(cli_ctx *ctx)
goto exit_tmpfile;
}
xar_hash_update(a_hash_ctx, blockp, length, a_hash);
if (a_hash_ctx != NULL)
xar_hash_update(a_hash_ctx, blockp, length, a_hash);
if (cli_writen(fd, blockp, write_len) < 0) {
cli_dbgmsg("cli_scanxar: cli_writen error %li bytes @ %li.\n", length, at);
@ -782,25 +785,36 @@ int cli_scanxar(cli_ctx *ctx)
}
if (rc == CL_SUCCESS) {
xar_hash_final(a_hash_ctx, result, a_hash);
a_hash_ctx = NULL;
if (a_hash_ctx != NULL) {
xar_hash_final(a_hash_ctx, result, a_hash);
a_hash_ctx = NULL;
} else {
cli_dbgmsg("cli_scanxar: archived-checksum missing.\n");
cksum_fails++;
}
if (a_cksum != NULL) {
expected = cli_hex2str((char *)a_cksum);
if (xar_hash_check(a_hash, result, expected) != 0) {
cli_dbgmsg("cli_scanxar: archived-checksum missing or mismatch.\n");
cli_dbgmsg("cli_scanxar: archived-checksum mismatch.\n");
cksum_fails++;
} else {
cli_dbgmsg("cli_scanxar: archived-checksum matched.\n");
}
free(expected);
}
xar_hash_final(e_hash_ctx, result, e_hash);
e_hash_ctx = NULL;
if (e_hash_ctx != NULL) {
xar_hash_final(e_hash_ctx, result, e_hash);
e_hash_ctx = NULL;
} else {
cli_dbgmsg("cli_scanxar: extracted-checksum(unarchived-checksum) missing.\n");
cksum_fails++;
}
if (e_cksum != NULL) {
if (do_extract_cksum) {
expected = cli_hex2str((char *)e_cksum);
if (xar_hash_check(e_hash, result, expected) != 0) {
cli_dbgmsg("cli_scanxar: extracted-checksum missing or mismatch.\n");
cli_dbgmsg("cli_scanxar: extracted-checksum mismatch.\n");
cksum_fails++;
} else {
cli_dbgmsg("cli_scanxar: extracted-checksum matched.\n");

View file

@ -75,7 +75,7 @@ int cli_XzDecode(struct CLI_XZ *XZ) {
return XZ_STREAM_END;
if (XZ->status == CODER_STATUS_NOT_FINISHED && XZ->avail_out == 0)
return XZ_RESULT_OK;
if (res != SZ_OK)
if (((inbytes == 0) && (outbytes == 0)) || res != SZ_OK)
return XZ_RESULT_DATA_ERROR;
return XZ_RESULT_OK;
}

View file

@ -81,6 +81,7 @@ static int yc_poly_emulator(cli_ctx *ctx, char *base, unsigned int filesize, cha
unsigned char al;
unsigned char cl = ecx & 0xff;
unsigned int j,i;
unsigned int max_jmp_loop = 100000000;
for(i=0;i<ecx&&i<max_emu;i++) /* Byte looper - Decrypts every byte and write it back */
{
@ -103,6 +104,9 @@ static int yc_poly_emulator(cli_ctx *ctx, char *base, unsigned int filesize, cha
if (yc_bounds_check(ctx, base, filesize, decryptor_offset, j)) {
return 2;
}
if (!max_jmp_loop)
return 2;
max_jmp_loop--;
j = j + decryptor_offset[j];
break;

View file

@ -3,7 +3,7 @@ VERSION="devel-`date +%Y%m%d`"
dnl VERSION="1.0rc1"
LC_CURRENT=7
LC_REVISION=24
LC_REVISION=26
LC_AGE=1
LIBCLAMAV_VERSION="$LC_CURRENT":"$LC_REVISION":"$LC_AGE"
AC_SUBST([LIBCLAMAV_VERSION])

View file

@ -623,6 +623,7 @@ static Suite *test_cl_suite(void)
Suite *s = suite_create("cl_api");
TCase *tc_cl = tcase_create("cl_dup");
TCase *tc_cl_scan = tcase_create("cl_scan");
char *user_timeout = NULL;
int expect = expected_testfiles;
suite_add_tcase (s, tc_cl);
tcase_add_test(tc_cl, test_cl_free);
@ -661,6 +662,12 @@ static Suite *test_cl_suite(void)
tcase_add_loop_test(tc_cl_scan, test_cl_scanmap_callback_handle_allscan, 0, expect);
tcase_add_loop_test(tc_cl_scan, test_cl_scanmap_callback_mem, 0, expect);
tcase_add_loop_test(tc_cl_scan, test_cl_scanmap_callback_mem_allscan, 0, expect);
user_timeout = getenv("T");
if (user_timeout) {
int timeout = atoi(user_timeout);
tcase_set_timeout(tc_cl_scan, timeout);
}
#endif
return s;
}

View file

@ -357,6 +357,8 @@
<ClCompile Include="..\libclamav\mpool.c" />
<ClCompile Include="..\libclamav\msexpand.c" />
<ClCompile Include="..\libclamav\mspack.c" />
<ClCompile Include="..\libclamav\msxml.c" />
<ClCompile Include="..\libclamav\msxml_parser.c" />
<ClCompile Include="..\libclamav\nsis\bzlib.c">
<ObjectFileName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)\nsis_bzlib</ObjectFileName>
<ObjectFileName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)\nsis_bzlib</ObjectFileName>

View file

@ -201,6 +201,12 @@
<ClCompile Include="..\libclamav\mspack.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\libclamav\msxml.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\libclamav\msxml_parser.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\libclamav\ole2_extract.c">
<Filter>Source Files</Filter>
</ClCompile>

View file

@ -6,8 +6,8 @@
#define REPO_VERSION VERSION
#endif
#define RES_VER_Q 0,98,0,0
#define RES_VER_S "ClamAV 0.98"
#define RES_VER_Q 0,98,7,0
#define RES_VER_S "ClamAV 0.98.7"
VS_VERSION_INFO VERSIONINFO
FILEVERSION RES_VER_Q