Merge master to features/yara.

2025-11-11 13:31:04 +00:00 · 2015-05-01 18:36:48 -04:00 · 2015-05-01 18:36:48 -04:00 · a80453e6e9
commit a80453e6e9
parent 758e74cf66 7c05ec73ee
66 changed files with 3142 additions and 1048 deletions
--- a/COPYING.sha256
+++ b/COPYING.sha256
@ -1,23 +0,0 @@
- Copyright (c) 2001-2003 Allan Saddi <allan@saddi.com>
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
- 1. Redistributions of source code must retain the above copyright
-    notice, this list of conditions and the following disclaimer.
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- THIS SOFTWARE IS PROVIDED BY ALLAN SADDI AND HIS CONTRIBUTORS ``AS IS''
- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- ARE DISCLAIMED.  IN NO EVENT SHALL ALLAN SADDI OR HIS CONTRIBUTORS BE
- LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- POSSIBILITY OF SUCH DAMAGE.
--- a/203
+++ b/203
@ -1,3 +1,206 @@
+Mon, 27 Apr 12:00:00 EDT
+-----------------------------------
+ * 0.98.7 Release.
+
+Tue, 14 Apr 2015 15:53:17 EDT (klin)
+-----------------------------------
+ * bb#11296 - various fixes to pdf string base64 string conversion 
+
+Mon, 13 Apr 2015 12:14:41 EDT (smorgan)
+-----------------------------------
+ * bb11298 - look for TOC element name <unarchived-checksum> 
+   (as a synonynm for <extracted-checksum>). Continue processing rather
+    than exit in the event of missing or error in TOC checksum specification.
+
+Wed, 8 Apr 2015 15:51:04 EDT (smorgan)
+-----------------------------------
+ * iso9660: remove unnecessaty parameter on iso_parse_dir() and reset return
+   code when scanall is in effect. 
+
+Wed, 1 Apr 2015 17:41:59 EDT (klin)
+-----------------------------------
+ * pdf: correctly handle decoding, decryption, character set conversions,
+   and file properties collection(base64 encoded as needed).
+
+Fri, 27 Mar 2015 13:21:49 EDT (klin)
+-----------------------------------
+ * converted cb_file_props from using engine-based ctx to file-based ctx 
+
+Thu, 26 Mar 2015 12:24:02 EDT (smorgan)
+-----------------------------------
+ * bb11281 - Reworked reverted upack.c crash patch to fix regression
+   false negatives. 
+
+Tue, 24 Mar 2015 12:06:57 EDT (klin)
+-----------------------------------
+ * make check: added env check 'T' to set timeout 
+
+Mon, 23 Mar 2015 17:58:35 EDT (klin)
+-----------------------------------
+ * bb#11282 - patch for code clean up in rebuildpe. Patch
+   supplied by Sebastian Andrzej Siewior.
+
+Mon, 23 Mar 2015 13:04:54 EDT (klin)
+-----------------------------------
+ * bb#11284 - fixed integer underflow in detecting W32.Polipos.A method.
+   Patch supplied by Sebastian Andrzej Siewior.
+
+Mon, 16 Mar 2015 18:35:14 EDT (klin)
+-----------------------------------
+ * updated documentation on document property collection 
+
+Mon, 16 Mar 2015 18:26:07 EDT (klin)
+-----------------------------------
+ * added support for MS Office 2003 XML(msxml) document types and msxml
+   file properties collection. 
+
+Mon, 16 Mar 2015 13:11:56 EDT (klin)
+-----------------------------------
+ * fixed converity issue ID 12109 buffer was not freed on rare error case 
+
+Mon, 16 Mar 2015 13:08:03 EDT (klin)
+-----------------------------------
+ * fixed coverity ID 12110 12111 changed a the type of a value from unsigned
+  to signed due to possible negative values 
+
+Thu, 12 Mar 2015 19:06:23 EDT (smorgan)
+-----------------------------------
+ * Fix for infinite loop on crafted xz file. 
+
+Wed, 11 Mar 2015 15:03:43 EDT (smorgan)
+-----------------------------------
+ * bb11278 - was not detecting viruses on files inside iso9660.
+   Also fix up all-match logic. 
+
+Mon, 9 Mar 2015 13:02:25 EDT (smorgan)
+-----------------------------------
+ * bb11274 - adds out of bounds check for petite packed files.
+   Patch from Sebastian Andrzej Siewior. 
+
+Wed, 4 Mar 2015 14:04:24 EDT (klin)
+-----------------------------------
+ * updated example fileprop analysis bytecodes moved old example bytecodes
+   to examples/fileprop_analysis/old/ 
+
+Wed, 4 Mar 2015 12:08:34 EDT (klin)
+-----------------------------------
+ * backwards compatibility for target type 13 json scanning 
+
+Tue, 3 Mar 2015 17:47:55 EDT (klin)
+-----------------------------------
+ * generates fmap from desc if no map is NULL 
+
+Tue, 3 Mar 2015 16:37:08 EDT (smorgan)
+-----------------------------------
+ * Apply y0da cryptor patch sent in by Sebastian Andrzej Siewior. 
+
+Tue, 3 Mar 2015 16:12:48 EDT (klin)
+-----------------------------------
+ * flevel updated to 80 (new bytecode hook type) 
+
+Tue, 3 Mar 2015 16:12:22 EDT (klin)
+-----------------------------------
+ * clambc info option updated for new hook type 
+
+Tue, 3 Mar 2015 15:00:41 EDT (klin)
+-----------------------------------
+ * added BC_PRECLASS hook support; replaces target type 13 
+
+Mon, 2 Mar 2015 19:06:23 EDT (klin)
+-----------------------------------
+ * pdf string UTF-16 conversion no longer solely depends on ICONV reason:
+   no ICONV meant no conversion even though conversion function existed 
+
+Fri, 27 Feb 2015 15:23:51 EDT (klin)
+-----------------------------------
+ * bb#11269 - bm matcher no longer sets scanning window offset reason:
+   certain segments could be hashed multiple times 
+
+Wed, 25 Feb 2015 14:55:21 EDT (klin)
+-----------------------------------
+ * bb#11269 - hash does not compute on segments smaller than the maxpatlen 
+
+Tue, 24 Feb 2015 16:21:09 EDT (klin)
+-----------------------------------
+ * bb#11267 - libclamav upx cover against hand crafted section ove patch
+   supplied bySebastian Andrzej Siewior.
+
+Fri, 27 Feb 2015 16:57:19 EDT (smorgan)
+-----------------------------------
+ * Patch for integer overflow checks for petite unpack code supplied by
+   Sebastian Andrzej Siewior. 
+
+Fri, 27 Feb 2015 16:54:55 EDT (smorgan)
+-----------------------------------
+ * remove obsolete parameters from the clamd.conf man page: MailMaxRecursion,
+   ArchiveMaxFileSize, ArchiveMaxRecursion, ArchiveMaxFiles,
+   ArchiveMaxCompressionRatio, ArchiveBlockMax, ArchiveLimitMemoryUsage, Clamuko*. 
+
+Wed, 18 Feb 2015 15:23:54 EDT (klin)
+-----------------------------------
+ * bb#11212 - fix MEW unpacker 
+
+Mon, 16 Feb 2015 11:46:21 EDT (smorgan)
+-----------------------------------
+ * bb11264 - patch for 'possible' heap overflow submitted by the Debian team. 
+
+Tue, 10 Feb 2015 15:16:48 EDT (smorgan)
+-----------------------------------
+ * bb11260: fix compile error when './configure --disable-pthreads' is specified. 
+
+Fri, 6 Feb 2015 14:59:43 EDT (klin)
+-----------------------------------
+ * bb#11254 - removed built-in llvm configure check and added
+   --with-llvm-linking option to specify system-llvm linking method 
+
+Fri, 6 Feb 2015 13:22:35 EDT (klin)
+-----------------------------------
+ * improved documentation on macro subsignatures 
+
+Wed, 4 Feb 2015 18:52:01 EDT (smorgan)
+-----------------------------------
+ * fix documentation errors in example logical signature. 
+
+Fri, 30 Jan 2015 12:15:07 EDT (klin)
+-----------------------------------
+ * bb#12887 - fixed an issue regarding (fd==-1) in WinAPI 
+
+Wed, 28 Jan 2015 11:20:35 EDT (klin)
+-----------------------------------
+ * fixed Windows API SetOption/GetOption CLAM_LIMIT_RECURSION 
+
+Wed, 21 Jan 2015 11:41:07 EDT (klin)
+-----------------------------------
+ * added ICONV to clamconf optional features report 
+
+Thu, 15 Jan 2015 15:15:01 EDT (klin)
+-----------------------------------
+ * fixed an incorrect return value for magic_scandesc 
+
+Wed, 14 Jan 2015 09:25:47 EDT (klin)
+-----------------------------------
+ * cleaned up configure help strings by using AS_HELP_STRING 
+
+Mon, 12 Jan 2015 13:45:36 EDT (klin)
+-----------------------------------
+ * bb#11238 - added missing PDF preclass operations
+   > added whitespace fix for indirect references strings
+   > added PDF escape sequence handling (including octal) 
+
+Thu, 8 Jan 2015 09:48:20 EDT (klin)
+-----------------------------------
+ * bb#11237 - fixed bug in building CUD file 
+
+Wed, 7 Jan 2015 04:46:15 EDT (smorgan)
+-----------------------------------
+ * bb11233 - fix a strange bus error on Mac OS X PPC when using debug mode. 
+
+Mon, 22 Dec 2014 12:13:38 EDT (klin)
+-----------------------------------
+ * bb#11226 - fixed gpt GUID debugging message 
+
+ *** End of 0.98.6, Start of 0.98.7
+

 Tue Dec 16 16:21:40 2014 EDT (swebb)
 -------------------------------------
--- a/65
+++ b/65
@ -1,36 +1,45 @@
-0.98.6
+0.98.7
 ------

-ClamAV 0.98.6 is a bug fix release correcting the following:
+ClamAV 0.98.7 is here! This release contains new scanning features
+and bug fixes. 

-    - library shared object revisions.
-    - installation issues on some Mac OS X and FreeBSD platforms.
-    - includes a patch from Sebastian Andrzej Siewior making
-      ClamAV pid files compatible with systemd.
-    - Fix a heap out of bounds condition with crafted Yoda's
-      crypter files. This issue was discovered by Felix Groebert
-      of the Google Security Team.
-    - Fix a heap out of bounds condition with crafted mew packer
-      files. This issue was discovered by Felix Groebert of the
-      Google Security Team.
-    - Fix a heap out of bounds condition with crafted upx packer
-      files. This issue was discovered by Kevin Szkudlapski of
-      Quarkslab.
-    - Fix a heap out of bounds condition with crafted upack packer
-      files. This issue was discovered by Sebastian Andrzej Siewior.
-      CVE-2014-9328.
-    - Compensate a crash due to incorrect compiler optimization when
-      handling crafted petite packer files. This issue was discovered
-      by Sebastian Andrzej Siewior.
-      
-Thanks to the following ClamAV community members for code submissions
-and bug reporting included in ClamAV 0.98.6:
+    - Improvements to PDF processing: decryption, escape sequence
+      handling, and file property collection.
+    - Scanning/analysis of additional Microsoft Office 2003 XML format.
+    - Fix infinite loop condition on crafted y0da cryptor file. Identified
+      and patch suggested by Sebastian Andrzej Siewior. CVE-2015-2221.
+    - Fix crash on crafted petite packed file. Reported and patch
+      supplied by Sebastian Andrzej Siewior. CVE-2015-2222.
+    - Fix false negatives on files within iso9660 containers. This issue
+      was reported by Minzhuan Gong.
+    - Fix a couple crashes on crafted upack packed file. Identified and
+      patches supplied by Sebastian Andrzej Siewior.
+    - Fix a crash during algorithmic detection on crafted PE file.
+      Identified and patch supplied by Sebastian Andrzej Siewior.
+    - Fix an infinite loop condition on a crafted "xz" archive file.
+      This was reported by Dimitri Kirchner and Goulven Guiheux.
+      CVE-2015-2668.
+    - Fix compilation error after ./configure --disable-pthreads.
+      Reported and fix suggested by John E. Krokes.
+    - Apply upstream patch for possible heap overflow in Henry Spencer's 
+      regex library. CVE-2015-2305.
+    - Fix crash in upx decoder with crafted file. Discovered and patch
+      supplied by Sebastian Andrzej Siewior. CVE-2015-2170.
+    - Fix segfault scanning certain HTML files. Reported with sample by
+      Kai Risku.
+    - Improve detections within xar/pkg files.
+
+As always, we appreciate contributions of bug reports, code fixes,
+and sample submission from the ClamAV community members:

 Sebastian Andrzej Siewior
-Felix Groebert
-Kevin Szkudlapski
-Mark Pizzolato
-Daniel J. Luke
+Minzhaun Gong
+Dimitri Kirchner
+Goulven Guiheux
+John E. Krokes
+Kai Risku
+

 --
 The ClamAV team (http://www.clamav.net/about.html#credits)
--- a/42
+++ b/42
@ -2,6 +2,48 @@ Note: This README/NEWS file refers to the source tarball. Some things described
 here may not be available in binary packages.
 --

+0.98.7
+------
+
+ClamAV 0.98.7 is here! This release contains new scanning features
+and bug fixes. 
+
+    - Improvements to PDF processing: decryption, escape sequence
+      handling, and file property collection.
+    - Scanning/analysis of additional Microsoft Office 2003 XML format.
+    - Fix infinite loop condition on crafted y0da cryptor file. Identified
+      and patch suggested by Sebastian Andrzej Siewior. CVE-2015-2221.
+    - Fix crash on crafted petite packed file. Reported and patch
+      supplied by Sebastian Andrzej Siewior. CVE-2015-2222.
+    - Fix false negatives on files within iso9660 containers. This issue
+      was reported by Minzhuan Gong.
+    - Fix a couple crashes on crafted upack packed file. Identified and
+      patches supplied by Sebastian Andrzej Siewior.
+    - Fix a crash during algorithmic detection on crafted PE file.
+      Identified and patch supplied by Sebastian Andrzej Siewior.
+    - Fix an infinite loop condition on a crafted "xz" archive file.
+      This was reported by Dimitri Kirchner and Goulven Guiheux.
+      CVE-2015-2668.
+    - Fix compilation error after ./configure --disable-pthreads.
+      Reported and fix suggested by John E. Krokes.
+    - Apply upstream patch for possible heap overflow in Henry Spencer's 
+      regex library. CVE-2015-2305.
+    - Fix crash in upx decoder with crafted file. Discovered and patch
+      supplied by Sebastian Andrzej Siewior. CVE-2015-2170.
+    - Fix segfault scanning certain HTML files. Reported with sample by
+      Kai Risku.
+    - Improve detections within xar/pkg files.
+
+As always, we appreciate contributions of bug reports, code fixes,
+and sample submission from the ClamAV community members:
+
+Sebastian Andrzej Siewior
+Minzhaun Gong
+Dimitri Kirchner
+Goulven Guiheux
+John E. Krokes
+Kai Risku
+
 0.98.6
 ------

--- a/4
+++ b/4
@ -28291,7 +28291,7 @@ fi
 if test "x$XML_LIBS" = "x"; then


-   $as_echo_n "              dmg and xar : "
+   $as_echo_n "              libxml2     : "
   if test "x" = "xno"; then :
  $as_echo "no (disabled)"
 elif test "x" = "xyes"; then :
@ -28305,7 +28305,7 @@ fi
 else


-   $as_echo_n "              dmg and xar : "
+   $as_echo_n "              libxml2     : "
   if test "x" = "xno"; then :
  $as_echo "yes, from $XML_HOME (disabled)"
 elif test "x" = "xyes"; then :
--- a/configure.ac
+++ b/configure.ac
@ -241,9 +241,9 @@ else
    CL_MSG_STATUS([pcre        ],[$PCRE_HOME],[$have_pcre])
 fi
 if test "x$XML_LIBS" = "x"; then 
-    CL_MSG_STATUS([dmg and xar ],[no],[])
+    CL_MSG_STATUS([libxml2     ],[no],[])
 else
-    CL_MSG_STATUS([dmg and xar ],[yes, from $XML_HOME],[])
+    CL_MSG_STATUS([libxml2     ],[yes, from $XML_HOME],[])
 fi

 # Yep, downgrading the compiler avoids the bug too:
--- a/docs/ClamAV_Document_Properties.xlsx
+++ b/docs/ClamAV_Document_Properties.xlsx
--- a/docs/man/clamd.conf.5.in
+++ b/docs/man/clamd.conf.5.in
@ -379,9 +379,6 @@ Scan RFC1341 messages split over many emails. You will need to periodically clea
 .br
 Default: no
 .TP
-\fBMailMaxRecursion (OBSOLETE)\fR
-\fBWARNING:\fR This option is no longer accepted. See \fBMaxRecursion\fR.
-.TP 
 \fBPhishingSignatures BOOL\fR
 With this option enabled ClamAV will try to detect phishing attempts by using signatures.
 .br 
@ -488,24 +485,6 @@ This option causes memory or nested map scans to dump the content to disk.
 If you turn on this option, more data is written to disk and is available when the leave-temps option is enabled at the cost of more disk writes.
 .br
 Default: no
-.TP 
-\fBArchiveMaxFileSize (OBSOLETE)\fR
-\fBWARNING:\fR This option is no longer accepted. See \fBMaxFileSize\fR and \fBMaxScanSize\fR.
-.TP 
-\fBArchiveMaxRecursion (OBSOLETE)\fR
-\fBWARNING:\fR This option is no longer accepted. See \fBMaxRecursion\fR.
-.TP 
-\fBArchiveMaxFiles (OBSOLETE)\fR
-\fBWARNING:\fR This option is no longer accepted. See \fBMaxFiles\fR.
-.TP 
-\fBArchiveMaxCompressionRatio (OBSOLETE)\fR
-\fBWARNING:\fR This option is no longer accepted.
-.TP 
-\fBArchiveBlockMax (OBSOLETE)\fR
-\fBWARNING:\fR This option is no longer accepted.
-.TP 
-\fBArchiveLimitMemoryUsage (OBSOLETE)\fR
-\fBWARNING:\fR This option is no longer accepted.
 .br 
 Default: no
 .TP 
@ -637,33 +616,6 @@ WARNING: setting this limit too high or disabling it may severely impact perform
 .br
 Default: 25M
 .TP
-\fBClamukoScanOnAccess (OBSOLETE)\fR
-\fBWARNING:\fR This option is no longer accepted. See \fBScanOnAccess\fR.
-.TP 
-\fBClamukoScannerCount (OBSOLETE)\fR
-\fBWARNING:\fR This option is no longer accepted.
-.TP 
-\fBClamukoScanOnOpen (OBSOLETE)\fR
-\fBWARNING:\fR This option is no longer accepted.
-.TP 
-\fBClamukoScanOnClose (OBSOLETE)\fR
-\fBWARNING:\fR This option is no longer accepted.
-.TP 
-\fBClamukoScanOnExec (OBSOLETE)\fR
-\fBWARNING:\fR This option is no longer accepted.
-.TP 
-\fBClamukoIncludePath (OBSOLETE)\fR
-\fBWARNING:\fR This option is no longer accepted. See \fBOnAccessIncludePath\fR.
-.TP 
-\fBClamukoExcludePath (OBSOLETE)\fR
-\fBWARNING:\fR This option is no longer accepted. See \fBOnAccessExcludePath\fR.
-.TP
-\fBClamukoExcludeUID (OBSOLETE)\fR
-\fBWARNING:\fR This option is no longer accepted. See \fBOnAccessExcludeUID\fR.
-.TP 
-\fBClamukoMaxFileSize (OBSOLETE)\fR
-\fBWARNING:\fR This option is no longer accepted. See \fBOnAccessMaxFileSize\fR.
-.TP
 \fBScanOnAccess BOOL\fR
 This option enables on-access scanning (Linux only)
 .br
--- a/examples/fileprop_analysis/analysis.cud
+++ b/examples/fileprop_analysis/analysis.cud
--- a/examples/fileprop_analysis/embedpe_sample.c
+++ b/examples/fileprop_analysis/embedpe_sample.c
@ -1,26 +1,15 @@
 VIRUSNAME_PREFIX("SUBMIT.contains")
 VIRUSNAMES("EmbedPE")

-/* Target type is 13, internal JSON properties */
-TARGET(13)
+/* Target type is 0, all relevant files */
+TARGET(0)
+
+/* Declares to run bytecode only for preclassification (affecting only preclass files) */
+PRECLASS_HOOK_DECLARE

 /* JSON API call will require FUNC_LEVEL_098_5 = 78 */
-FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_5)
-
-SIGNATURES_DECL_BEGIN
-DECLARE_SIGNATURE(sig1)
-SIGNATURES_DECL_END
-
-SIGNATURES_DEF_BEGIN
-/* search @offset 0 : '{ "Magic": "CLAMJSON' */
-/* this can be readjusted for specific filetypes */
-DEFINE_SIGNATURE(sig1, "0:7b20224d61676963223a2022434c414d4a534f4e")
-SIGNATURES_END
-
-bool logical_trigger(void)
-{
-    return matches(Signatures.sig1);
-}
+/* PRECLASS_HOOK_DECLARE will require FUNC_LEVEL_098_7 = 80 */
+FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_7)

 #define STR_MAXLEN 256

--- a/examples/fileprop_analysis/ftype_sample.c
+++ b/examples/fileprop_analysis/ftype_sample.c
@ -3,26 +3,15 @@ VIRUSNAMES("CL_TYPE_MSWORD", "CL_TYPE_MSPPT", "CL_TYPE_MSXL",
           "CL_TYPE_OOXML_WORD", "CL_TYPE_OOXML_PPT", "CL_TYPE_OOXML_XL",
           "CL_TYPE_MSEXE", "CL_TYPE_PDF", "CL_TYPE_MSOLE2", "CL_TYPE_UNKNOWN", "InActive")

-/* Target type is 13, internal JSON properties */
-TARGET(13)
+/* Target type is 0, all relevant files */
+TARGET(0)
+
+/* Declares to run bytecode only for preclassification (affecting only preclass files) */
+PRECLASS_HOOK_DECLARE

 /* JSON API call will require FUNC_LEVEL_098_5 = 78 */
-FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_5)
-
-SIGNATURES_DECL_BEGIN
-DECLARE_SIGNATURE(sig1)
-SIGNATURES_DECL_END
-
-SIGNATURES_DEF_BEGIN
-/* search @offset 0 : '{ "Magic": "CLAMJSON' */
-/* this can be readjusted for specific filetypes */
-DEFINE_SIGNATURE(sig1, "0:7b20224d61676963223a2022434c414d4a534f4e")
-SIGNATURES_END
-
-bool logical_trigger(void)
-{
-    return matches(Signatures.sig1);
-}
+/* PRECLASS_HOOK_DECLARE will require FUNC_LEVEL_098_7 = 80 */
+FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_7)

 #define STR_MAXLEN 256

--- a/examples/fileprop_analysis/notpdf_sample.c
+++ b/examples/fileprop_analysis/notpdf_sample.c
@ -1,34 +1,51 @@
 VIRUSNAME_PREFIX("SUBMIT.NotPDF")
 VIRUSNAMES("InActive", "Submit")

-/* Target type is 13, internal JSON properties */
-TARGET(13)
+/* Target type is 0, all relevant files */
+TARGET(0)
+
+/* Declares to run bytecode only for preclassification (affecting only preclass files) */
+PRECLASS_HOOK_DECLARE

 /* JSON API call will require FUNC_LEVEL_098_5 = 78 */
-FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_5)
-
-SIGNATURES_DECL_BEGIN
-DECLARE_SIGNATURE(sig1)
-DECLARE_SIGNATURE(sig2)
-SIGNATURES_DECL_END
-
-SIGNATURES_DEF_BEGIN
-/* search @offset 0 : '{ "Magic": "CLAMJSON' */
-/* this can be readjusted for specific filetypes */
-DEFINE_SIGNATURE(sig1, "0:7b20224d61676963223a2022434c414d4a534f4e")
-/* search '"RootFileType": "CL_TYPE_PDF"' */
-DEFINE_SIGNATURE(sig2, "22526f6f7446696c6554797065223a2022434c5f545950455f50444622")
-SIGNATURES_END
-
-bool logical_trigger(void)
-{
-    return matches(Signatures.sig1) && !matches(Signatures.sig2);
-}
+/* PRECLASS_HOOK_DECLARE will require FUNC_LEVEL_098_7 = 80 */
+FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_7)

 #define STR_MAXLEN 256

 int entrypoint ()
 {
-    foundVirus("Submit");
+    int32_t type, obj, strlen;
+    char str[STR_MAXLEN];
+
+    /* check is json is available, alerts on inactive (optional) */
+    if (!json_is_active()) {
+        return -1;
+    }
+
+    /* acquire array of internal contained objects */
+    obj = json_get_object("FileType", 8, 0);
+    if (obj <= 0) return -1;
+
+    /* acquire and check type */
+    type = json_get_type(obj);
+    if (type == JSON_TYPE_STRING) {
+        /* acquire string length, note +1 is for the NULL terminator */
+        strlen = json_get_string_length(obj)+1;
+        /* prevent buffer overflow */
+        if (strlen > STR_MAXLEN)
+            strlen = STR_MAXLEN;
+        /* acquire string data, note strlen includes NULL terminator */
+        if (json_get_string(str, strlen, obj)) {
+            /* debug print str (with '\n' and prepended message */
+            debug_print_str(str,strlen);
+
+            /* check the contained object's type */
+            if (!(strlen == 12) || !memcmp(str, "CL_TYPE_PDF", 12)) {
+                foundVirus("Submit");
+            }
+        }
+    }
+
    return 0;
 }
--- a/examples/fileprop_analysis/old/analysis.cud
+++ b/examples/fileprop_analysis/old/analysis.cud
--- a/examples/fileprop_analysis/old/embedpe_sample.c
+++ b/examples/fileprop_analysis/old/embedpe_sample.c
@ -0,0 +1,84 @@
+VIRUSNAME_PREFIX("SUBMIT.contains")
+VIRUSNAMES("EmbedPE")
+
+/* Target type is 13, internal JSON properties */
+TARGET(13)
+
+/* JSON API call will require FUNC_LEVEL_098_5 = 78 */
+FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_5)
+
+SIGNATURES_DECL_BEGIN
+DECLARE_SIGNATURE(sig1)
+SIGNATURES_DECL_END
+
+SIGNATURES_DEF_BEGIN
+/* search @offset 0 : '{ "Magic": "CLAMJSON' */
+/* this can be readjusted for specific filetypes */
+DEFINE_SIGNATURE(sig1, "0:7b20224d61676963223a2022434c414d4a534f4e")
+SIGNATURES_END
+
+bool logical_trigger(void)
+{
+    return matches(Signatures.sig1);
+}
+
+#define STR_MAXLEN 256
+
+int entrypoint ()
+{
+    int i;
+    int32_t type, obj, objarr, objit, arrlen, strlen;
+    char str[STR_MAXLEN];
+
+    /* check is json is available, alerts on inactive (optional) */
+    if (!json_is_active()) {
+        return -1;
+    }
+
+    /* acquire array of internal contained objects */
+    objarr = json_get_object("ContainedObjects", 16, 0);
+    type = json_get_type(objarr);
+    /* debug print uint (no '\n' or prepended message */
+    debug_print_uint(type);
+
+    if (type != JSON_TYPE_ARRAY) {
+        return -1;
+    }
+
+    /* check array length for iteration over elements */
+    arrlen = json_get_array_length(objarr);
+    for (i = 0; i < arrlen; ++i) {
+        /* acquire json object @ idx i */
+        objit = json_get_array_idx(i, objarr);
+        if (objit <= 0) continue;
+
+        /* acquire FileType object of the array element @ idx i */
+        obj = json_get_object("FileType", 8, objit);
+        if (obj <= 0) continue;
+
+        /* acquire and check type */
+        type = json_get_type(obj);
+        if (type == JSON_TYPE_STRING) {
+            /* acquire string length, note +1 is for the NULL terminator */
+            strlen = json_get_string_length(obj)+1;
+            /* prevent buffer overflow */
+            if (strlen > STR_MAXLEN)
+                strlen = STR_MAXLEN;
+            /* acquire string data, note strlen includes NULL terminator */
+            if (json_get_string(str, strlen, obj)) {
+                /* debug print str (with '\n' and prepended message */
+                debug_print_str(str,strlen);
+
+                /* check the contained object's type */
+                if (strlen == 14 && !memcmp(str, "CL_TYPE_MSEXE", 14)) {
+                //if (!strcmp(str, strlen, "CL_TYPE_MSEXE", strlen)) {
+                    /* alert for submission */
+                    foundVirus("EmbedPE");
+                    return 0;
+                }
+            }
+        }
+    }
+
+    return 0;
+}
--- a/examples/fileprop_analysis/old/ftype_sample.c
+++ b/examples/fileprop_analysis/old/ftype_sample.c
@ -0,0 +1,104 @@
+VIRUSNAME_PREFIX("SUBMIT.filetype")
+VIRUSNAMES("CL_TYPE_MSWORD", "CL_TYPE_MSPPT", "CL_TYPE_MSXL",
+           "CL_TYPE_OOXML_WORD", "CL_TYPE_OOXML_PPT", "CL_TYPE_OOXML_XL",
+           "CL_TYPE_MSEXE", "CL_TYPE_PDF", "CL_TYPE_MSOLE2", "CL_TYPE_UNKNOWN", "InActive")
+
+/* Target type is 13, internal JSON properties */
+TARGET(13)
+
+/* JSON API call will require FUNC_LEVEL_098_5 = 78 */
+FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_5)
+
+SIGNATURES_DECL_BEGIN
+DECLARE_SIGNATURE(sig1)
+SIGNATURES_DECL_END
+
+SIGNATURES_DEF_BEGIN
+/* search @offset 0 : '{ "Magic": "CLAMJSON' */
+/* this can be readjusted for specific filetypes */
+DEFINE_SIGNATURE(sig1, "0:7b20224d61676963223a2022434c414d4a534f4e")
+SIGNATURES_END
+
+bool logical_trigger(void)
+{
+    return matches(Signatures.sig1);
+}
+
+#define STR_MAXLEN 256
+
+int entrypoint ()
+{
+    int32_t objid, type, strlen;
+    char str[STR_MAXLEN];
+
+    /* check is json is available, alerts on inactive (optional) */
+    if (!json_is_active())
+        foundVirus("InActive");
+
+    /* acquire the filetype object */
+    objid = json_get_object("FileType", 8, 0);
+    if (objid <= 0) {
+        debug_print_str("json object has no filetype!", 28);
+        return 1;
+    }
+    type = json_get_type(objid);
+    if (type != JSON_TYPE_STRING) {
+        debug_print_str("json object filetype property is not string!", 44);
+        return 1;
+    }
+
+    /* acquire string length, note +1 is for the NULL terminator */
+    strlen = json_get_string_length(objid)+1;
+    /* prevent buffer overflow */
+    if (strlen > STR_MAXLEN)
+        strlen = STR_MAXLEN;
+    
+    /* acquire string data, note strlen includes NULL terminator */
+    if (json_get_string(str, strlen, objid)) {
+        /* debug print str (with '\n' and prepended message */
+        debug_print_str(str,strlen);
+
+        /* check the contained object's filetype */
+        if (strlen == 14 && !memcmp(str, "CL_TYPE_MSEXE", 14)) {
+            foundVirus("CL_TYPE_MSEXE");
+            return 0;
+        }
+        if (strlen == 12 && !memcmp(str, "CL_TYPE_PDF", 12)) {
+            foundVirus("CL_TYPE_PDF");
+            return 0;
+        }
+        if (strlen == 19 && !memcmp(str, "CL_TYPE_OOXML_WORD", 19)) {
+            foundVirus("CL_TYPE_OOXML_WORD");
+            return 0;
+        }
+        if (strlen == 18 && !memcmp(str, "CL_TYPE_OOXML_PPT", 18)) {
+            foundVirus("CL_TYPE_OOXML_PPT");
+            return 0;
+        }
+        if (strlen == 17 && !memcmp(str, "CL_TYPE_OOXML_XL", 17)) {
+            foundVirus("CL_TYPE_OOXML_XL");
+            return 0;
+        }
+        if (strlen == 15 && !memcmp(str, "CL_TYPE_MSWORD", 15)) {
+            foundVirus("CL_TYPE_MSWORD");
+            return 0;
+        }
+        if (strlen == 14 && !memcmp(str, "CL_TYPE_MSPPT", 14)) {
+            foundVirus("CL_TYPE_MSPPT");
+            return 0;
+        }
+        if (strlen == 13 && !memcmp(str, "CL_TYPE_MSXL", 13)) {
+            foundVirus("CL_TYPE_MSXL");
+            return 0;
+        }
+        if (strlen == 15 && !memcmp(str, "CL_TYPE_MSOLE2", 15)) {
+            foundVirus("CL_TYPE_MSOLE2");
+            return 0;
+        }
+
+        foundVirus("CL_TYPE_UNKNOWN");
+        return 0;
+    }
+
+    return 0;
+}
--- a/examples/fileprop_analysis/old/notpdf_sample.c
+++ b/examples/fileprop_analysis/old/notpdf_sample.c
@ -0,0 +1,34 @@
+VIRUSNAME_PREFIX("SUBMIT.NotPDF")
+VIRUSNAMES("InActive", "Submit")
+
+/* Target type is 13, internal JSON properties */
+TARGET(13)
+
+/* JSON API call will require FUNC_LEVEL_098_5 = 78 */
+FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_5)
+
+SIGNATURES_DECL_BEGIN
+DECLARE_SIGNATURE(sig1)
+DECLARE_SIGNATURE(sig2)
+SIGNATURES_DECL_END
+
+SIGNATURES_DEF_BEGIN
+/* search @offset 0 : '{ "Magic": "CLAMJSON' */
+/* this can be readjusted for specific filetypes */
+DEFINE_SIGNATURE(sig1, "0:7b20224d61676963223a2022434c414d4a534f4e")
+/* search '"RootFileType": "CL_TYPE_PDF"' */
+DEFINE_SIGNATURE(sig2, "22526f6f7446696c6554797065223a2022434c5f545950455f50444622")
+SIGNATURES_END
+
+bool logical_trigger(void)
+{
+    return matches(Signatures.sig1) && !matches(Signatures.sig2);
+}
+
+#define STR_MAXLEN 256
+
+int entrypoint ()
+{
+    foundVirus("Submit");
+    return 0;
+}
--- a/examples/fileprop_analysis/old/onlype_sample.c
+++ b/examples/fileprop_analysis/old/onlype_sample.c
@ -0,0 +1,134 @@
+VIRUSNAME_PREFIX("SUBMIT.PE")
+VIRUSNAMES("Root", "Embedded", "RootEmbedded")
+
+/* Target type is 13, internal JSON properties */
+TARGET(13)
+
+/* JSON API call will require FUNC_LEVEL_098_5 = 78 */
+FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_5)
+
+SIGNATURES_DECL_BEGIN
+DECLARE_SIGNATURE(sig1)
+DECLARE_SIGNATURE(sig2)
+SIGNATURES_DECL_END
+
+SIGNATURES_DEF_BEGIN
+/* search @offset 0 : '{ "Magic": "CLAMJSON' */
+/* this can be readjusted for specific filetypes */
+DEFINE_SIGNATURE(sig1, "0:7b20224d61676963223a2022434c414d4a534f4e")
+/* search '"FileType": "CL_TYPE_MSEXE"' */
+DEFINE_SIGNATURE(sig2, "2246696c6554797065223a2022434c5f545950455f4d5345584522")
+SIGNATURES_END
+
+bool logical_trigger(void)
+{
+    return matches(Signatures.sig1) && matches(Signatures.sig2);
+}
+
+#define STR_MAXLEN 256
+
+int entrypoint ()
+{
+    int32_t i, root = 0, embedded = 0;
+    int32_t type, obj, strlen, objarr, objit, arrlen;
+    char str[STR_MAXLEN];
+
+    /* check is json is available, alerts on inactive (optional) */
+    if (!json_is_active()) {
+        return -1;
+    }
+
+    /* acquire array of internal contained objects */
+    obj = json_get_object("FileType", 8, 0);
+    if (obj <= 0) return -1;
+
+    /* acquire and check type */
+    type = json_get_type(obj);
+    if (type == JSON_TYPE_STRING) {
+        /* acquire string length, note +1 is for the NULL terminator */
+        strlen = json_get_string_length(obj)+1;
+        /* prevent buffer overflow */
+        if (strlen > STR_MAXLEN)
+            strlen = STR_MAXLEN;
+        /* acquire string data, note strlen includes NULL terminator */
+        if (json_get_string(str, strlen, obj)) {
+            /* debug print str (with '\n' and prepended message */
+            debug_print_str(str,strlen);
+
+            /* check the contained object's type */
+            if (strlen == 14 && !memcmp(str, "CL_TYPE_MSEXE", 14)) {
+                //if (!strcmp(str, strlen, "CL_TYPE_MSEXE", strlen)) {
+                /* alert for submission */
+                root = 1;
+            }
+        }
+    }
+
+    debug_print_uint(root);
+
+    /* acquire array of internal contained objects */
+    objarr = json_get_object("ContainedObjects", 16, 0);
+    if (objarr <= 0) {
+        if (root)
+            foundVirus("Root");
+        return 0;
+    }
+
+    type = json_get_type(objarr);
+    /* debug print uint (no '\n' or prepended message */
+    debug_print_uint(type);
+
+    if (type != JSON_TYPE_ARRAY) {
+        return -1;
+    }
+
+    /* check array length for iteration over elements */
+    arrlen = json_get_array_length(objarr);
+    for (i = 0; i < arrlen; ++i) {
+        /* acquire json object @ idx i */
+        objit = json_get_array_idx(i, objarr);
+        if (objit <= 0) continue;
+
+        /* acquire FileType object of the array element @ idx i */
+        obj = json_get_object("FileType", 8, objit);
+        if (obj <= 0) continue;
+
+        /* acquire and check type */
+        type = json_get_type(obj);
+        if (type == JSON_TYPE_STRING) {
+            /* acquire string length, note +1 is for the NULL terminator */
+            strlen = json_get_string_length(obj)+1;
+            /* prevent buffer overflow */
+            if (strlen > STR_MAXLEN)
+                strlen = STR_MAXLEN;
+            /* acquire string data, note strlen includes NULL terminator */
+            if (json_get_string(str, strlen, obj)) {
+                /* debug print str (with '\n' and prepended message */
+                debug_print_str(str,strlen);
+
+                /* check the contained object's type */
+                if (strlen == 14 && !memcmp(str, "CL_TYPE_MSEXE", 14)) {
+                    //if (!strcmp(str, strlen, "CL_TYPE_MSEXE", strlen)) {
+                    /* alert for submission */
+                    embedded = 1;
+                    break;
+                }
+            }
+        }
+    }
+
+    debug_print_uint(root);
+    debug_print_uint(embedded);
+
+    if (root && embedded) {
+        foundVirus("RootEmbedded");
+    }
+    else if (root) {
+        foundVirus("Root");
+    }
+    else if (embedded) {
+        foundVirus("Embedded");
+    }
+
+    return 0;
+}
--- a/examples/fileprop_analysis/old/sandbox.c
+++ b/examples/fileprop_analysis/old/sandbox.c
@ -0,0 +1,28 @@
+VIRUSNAME_PREFIX("SUBMIT")
+VIRUSNAMES("Sandbox")
+
+/* Target type is 13, internal JSON properties */
+TARGET(13)
+
+/* JSON API call will require FUNC_LEVEL_098_5 = 78 */
+FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_5)
+
+SIGNATURES_DECL_BEGIN
+DECLARE_SIGNATURE(sig1)
+SIGNATURES_DECL_END
+
+SIGNATURES_DEF_BEGIN
+/* search @offset 0 : '{ "Magic": "CLAMJSON' */
+/* this can be readjusted for specific filetypes */
+DEFINE_SIGNATURE(sig1, "0:7b20224d61676963223a2022434c414d4a534f4e")
+SIGNATURES_END
+
+bool logical_trigger(void)
+{
+    return matches(Signatures.sig1);
+}
+
+int entrypoint ()
+{
+    return 0;
+}
--- a/examples/fileprop_analysis/onlype_sample.c
+++ b/examples/fileprop_analysis/onlype_sample.c
@ -1,29 +1,15 @@
 VIRUSNAME_PREFIX("SUBMIT.PE")
 VIRUSNAMES("Root", "Embedded", "RootEmbedded")

-/* Target type is 13, internal JSON properties */
-TARGET(13)
+/* Target type is 0, all relevant files */
+TARGET(0)
+
+/* Declares to run bytecode only for preclassification (affecting only preclass files) */
+PRECLASS_HOOK_DECLARE

 /* JSON API call will require FUNC_LEVEL_098_5 = 78 */
-FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_5)
-
-SIGNATURES_DECL_BEGIN
-DECLARE_SIGNATURE(sig1)
-DECLARE_SIGNATURE(sig2)
-SIGNATURES_DECL_END
-
-SIGNATURES_DEF_BEGIN
-/* search @offset 0 : '{ "Magic": "CLAMJSON' */
-/* this can be readjusted for specific filetypes */
-DEFINE_SIGNATURE(sig1, "0:7b20224d61676963223a2022434c414d4a534f4e")
-/* search '"FileType": "CL_TYPE_MSEXE"' */
-DEFINE_SIGNATURE(sig2, "2246696c6554797065223a2022434c5f545950455f4d5345584522")
-SIGNATURES_END
-
-bool logical_trigger(void)
-{
-    return matches(Signatures.sig1) && matches(Signatures.sig2);
-}
+/* PRECLASS_HOOK_DECLARE will require FUNC_LEVEL_098_7 = 80 */
+FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_7)

 #define STR_MAXLEN 256

--- a/examples/fileprop_analysis/sandbox.c
+++ b/examples/fileprop_analysis/sandbox.c
@ -1,26 +1,15 @@
 VIRUSNAME_PREFIX("SUBMIT")
 VIRUSNAMES("Sandbox")

-/* Target type is 13, internal JSON properties */
-TARGET(13)
+/* Target type is 0, all relevant files */
+TARGET(0)
+
+/* Declares to run bytecode only for preclassification (affecting only preclass files) */
+PRECLASS_HOOK_DECLARE

 /* JSON API call will require FUNC_LEVEL_098_5 = 78 */
-FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_5)
-
-SIGNATURES_DECL_BEGIN
-DECLARE_SIGNATURE(sig1)
-SIGNATURES_DECL_END
-
-SIGNATURES_DEF_BEGIN
-/* search @offset 0 : '{ "Magic": "CLAMJSON' */
-/* this can be readjusted for specific filetypes */
-DEFINE_SIGNATURE(sig1, "0:7b20224d61676963223a2022434c414d4a534f4e")
-SIGNATURES_END
-
-bool logical_trigger(void)
-{
-    return matches(Signatures.sig1);
-}
+/* PRECLASS_HOOK_DECLARE will require FUNC_LEVEL_098_7 = 80 */
+FUNCTIONALITY_LEVEL_MIN(FUNC_LEVEL_098_7)

 int entrypoint ()
 {
--- a/libclamav/Makefile.am
+++ b/libclamav/Makefile.am
@ -462,7 +462,11 @@ libclamav_la_SOURCES = \
 	matcher-pcre.c \
 	matcher-pcre.h \
 	regex_pcre.c \
-	regex_pcre.h
+	regex_pcre.h \
+	msxml.c \
+	msxml.h \
+	msxml_parser.c \
+	msxml_parser.h

 libclamav_la_SOURCES += bignum.h\
 	bignum_fast.h\
--- a/libclamav/Makefile.in
+++ b/libclamav/Makefile.in
@ -243,7 +243,8 @@ am_libclamav_la_OBJECTS = libclamav_la-matcher-ac.lo \
 	libclamav_la-yara_hash.lo libclamav_la-yara_grammar.lo \
 	libclamav_la-yara_lexer.lo libclamav_la-yara_parser.lo \
 	libclamav_la-msdoc.lo libclamav_la-matcher-pcre.lo \
-	libclamav_la-regex_pcre.lo libclamav_la-fp_add.lo \
+	libclamav_la-regex_pcre.lo libclamav_la-msxml.lo \
+	libclamav_la-msxml_parser.lo libclamav_la-fp_add.lo \
 	libclamav_la-fp_add_d.lo libclamav_la-fp_addmod.lo \
 	libclamav_la-fp_cmp.lo libclamav_la-fp_cmp_d.lo \
 	libclamav_la-fp_cmp_mag.lo libclamav_la-fp_sub.lo \
@ -839,10 +840,10 @@ libclamav_la_SOURCES = matcher-ac.c matcher-ac.h matcher-bm.c \
 	yara_hash.c yara_hash.h yara_grammar.y yara_lexer.l \
 	yara_lexer.h yara_parser.c yara_parser.h yara_clam.h msdoc.c \
 	msdoc.h matcher-pcre.c matcher-pcre.h regex_pcre.c \
-	regex_pcre.h bignum.h bignum_fast.h \
-	tomsfastmath/addsub/fp_add.c tomsfastmath/addsub/fp_add_d.c \
-	tomsfastmath/addsub/fp_addmod.c tomsfastmath/addsub/fp_cmp.c \
-	tomsfastmath/addsub/fp_cmp_d.c \
+	regex_pcre.h msxml.c msxml.h msxml_parser.c msxml_parser.h \
+	bignum.h bignum_fast.h tomsfastmath/addsub/fp_add.c \
+	tomsfastmath/addsub/fp_add_d.c tomsfastmath/addsub/fp_addmod.c \
+	tomsfastmath/addsub/fp_cmp.c tomsfastmath/addsub/fp_cmp_d.c \
 	tomsfastmath/addsub/fp_cmp_mag.c tomsfastmath/addsub/fp_sub.c \
 	tomsfastmath/addsub/fp_sub_d.c tomsfastmath/addsub/fp_submod.c \
 	tomsfastmath/addsub/s_fp_add.c tomsfastmath/addsub/s_fp_sub.c \
@ -1196,6 +1197,8 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-msdoc.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-msexpand.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-mspack.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-msxml.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-msxml_parser.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-nulsft.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-ole2_extract.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libclamav_la-ooxml.Plo@am__quote@
@ -2311,6 +2314,20 @@ libclamav_la-regex_pcre.lo: regex_pcre.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -c -o libclamav_la-regex_pcre.lo `test -f 'regex_pcre.c' || echo '$(srcdir)/'`regex_pcre.c

+libclamav_la-msxml.lo: msxml.c
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -MT libclamav_la-msxml.lo -MD -MP -MF $(DEPDIR)/libclamav_la-msxml.Tpo -c -o libclamav_la-msxml.lo `test -f 'msxml.c' || echo '$(srcdir)/'`msxml.c
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libclamav_la-msxml.Tpo $(DEPDIR)/libclamav_la-msxml.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='msxml.c' object='libclamav_la-msxml.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -c -o libclamav_la-msxml.lo `test -f 'msxml.c' || echo '$(srcdir)/'`msxml.c
+
+libclamav_la-msxml_parser.lo: msxml_parser.c
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -MT libclamav_la-msxml_parser.lo -MD -MP -MF $(DEPDIR)/libclamav_la-msxml_parser.Tpo -c -o libclamav_la-msxml_parser.lo `test -f 'msxml_parser.c' || echo '$(srcdir)/'`msxml_parser.c
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libclamav_la-msxml_parser.Tpo $(DEPDIR)/libclamav_la-msxml_parser.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='msxml_parser.c' object='libclamav_la-msxml_parser.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -c -o libclamav_la-msxml_parser.lo `test -f 'msxml_parser.c' || echo '$(srcdir)/'`msxml_parser.c
+
 libclamav_la-fp_add.lo: tomsfastmath/addsub/fp_add.c
@am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libclamav_la_CFLAGS) $(CFLAGS) -MT libclamav_la-fp_add.lo -MD -MP -MF $(DEPDIR)/libclamav_la-fp_add.Tpo -c -o libclamav_la-fp_add.lo `test -f 'tomsfastmath/addsub/fp_add.c' || echo '$(srcdir)/'`tomsfastmath/addsub/fp_add.c
@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libclamav_la-fp_add.Tpo $(DEPDIR)/libclamav_la-fp_add.Plo
--- a/libclamav/bytecode.c
+++ b/libclamav/bytecode.c
@ -2970,7 +2970,13 @@ void cli_bytecode_describe(const struct cli_bc *bc)
 	    puts("logical only");
 	    break;
 	case BC_PE_UNPACKER:
-	    puts("PE hook");
+	    puts("PE unpacker hook");
+	    break;
+    case BC_PE_ALL:
+        puts("all PE hook");
+        break;
+    case BC_PRECLASS:
+        puts("preclass hook");
 	    break;
 	default:
 	    printf("Unknown (type %u)", bc->kind);
@ -3007,6 +3013,12 @@ void cli_bytecode_describe(const struct cli_bc *bc)
 	    else
 		puts("all PE files!");
 	    break;
+	case BC_PRECLASS:
+	    if (bc->lsig)
+		puts("PRECLASS files matching logical signature");
+	    else
+		puts("all PRECLASS files!");
+	    break;
 	default:
 	    puts("N/A (unknown type)\n");
 	    break;
--- a/libclamav/bytecode_api.h
+++ b/libclamav/bytecode_api.h
@ -61,6 +61,9 @@ enum BytecodeKind {
    /** specifies a PE hook, executes at a predetermined point in PE parsing for PE files,
      * both packed and unpacked files */
    BC_PE_ALL,
+    /** specifies a PRECLASS hook, executes at the end of file property collection and
+      * operates on the original file targeted for property collection */
+    BC_PRECLASS,
    _BC_LAST_HOOK
 };

@ -97,12 +100,13 @@ enum FunctionalityLevels {
    FUNC_LEVEL_097_6     = 67, /**< LibClamAV release 0.97.6 */
    FUNC_LEVEL_097_7     = 68, /**< LibClamAV release 0.97.7 */
    FUNC_LEVEL_097_8     = 69, /**< LibClamAV release 0.97.8 */
-    FUNC_LEVEL_098_1     = 76, /**< LibClamAV release 0.98.2 */ /*last syncing to clamav*/
+    FUNC_LEVEL_098_1     = 76, /**< LibClamAV release 0.98.1 */ /*last syncing to clamav*/
    FUNC_LEVEL_098_2     = 77, /**< LibClamAV release 0.98.2 */
    FUNC_LEVEL_098_3     = 77, /**< LibClamAV release 0.98.3 */
    FUNC_LEVEL_098_4     = 77, /**< LibClamAV release 0.98.4 */
    FUNC_LEVEL_098_5     = 79, /**< LibClamAV release 0.98.5: JSON reading API requires this minimum level */
    FUNC_LEVEL_098_6     = 79, /**< LibClamAV release 0.98.6 */
+    FUNC_LEVEL_098_7     = 80, /**< LibClamAV release 0.98.7: BC_PRECLASS bytecodes require minimum level */
    FUNC_LEVEL_100       = 100 /*future release candidate*/
 };

@ -111,7 +115,7 @@ enum FunctionalityLevels {
 * Phase of PDF parsing used for PDF Hooks
 */
 enum pdf_phase {
-    PDF_PHASE_NONE,     /* not a PDF */
+    PDF_PHASE_NONE,     /**< not a PDF */
    PDF_PHASE_PARSED,   /**< after parsing a PDF, object flags can be set etc. */
    PDF_PHASE_POSTDUMP, /**< after an obj was dumped and scanned */
    PDF_PHASE_END,      /**< after the pdf scan finished */
@ -1123,14 +1127,14 @@ int32_t get_file_reliability(void);
 /* ----------------- END 0.96.4 APIs ---------------------------------- */
 /* ----------------- BEGIN 0.98.4 APIs -------------------------------- */
 /* ----------------- JSON Parsing APIs -------------------------------- */
-/*
+/**
 \group_json
 * @return 0 - json is disabled or option not specified
 * @return 1 - json is active and properties are available
 */
 int32_t json_is_active(void);

-/*
+/**
 \group_json
 * @return objid of json object with specified name
 * @return 0 if json object of specified name cannot be found
@ -1142,7 +1146,7 @@ int32_t json_is_active(void);
 */
 int32_t json_get_object(const int8_t* name, int32_t name_len, int32_t objid);

-/*
+/**
 \group_json
 * @return type (json_type) of json object specified
 * @return -1 if type unknown or invalid id
@ -1150,7 +1154,7 @@ int32_t json_get_object(const int8_t* name, int32_t name_len, int32_t objid);
 */
 int32_t json_get_type(int32_t objid);

-/*
+/**
 \group_json
 * @return number of elements in the json array of objid
 * @return -1 if an error has occurred
@ -1159,7 +1163,7 @@ int32_t json_get_type(int32_t objid);
 */
 int32_t json_get_array_length(int32_t objid);

-/*
+/**
 \group_json
 * @return objid of json object at idx of json array of objid
 * @return 0 if invalid idx
@ -1170,7 +1174,7 @@ int32_t json_get_array_length(int32_t objid);
 */
 int32_t json_get_array_idx(int32_t idx, int32_t objid);

-/*
+/**
 \group_json
 * @return length of json string of objid, not including terminating null-character
 * @return -1 if an error has occurred
@ -1179,7 +1183,7 @@ int32_t json_get_array_idx(int32_t idx, int32_t objid);
 */
 int32_t json_get_string_length(int32_t objid);

-/*
+/**
 \group_json
 * @return number of characters transferred (capped by str_len), 
 *         including terminating null-character
@ -1192,20 +1196,21 @@ int32_t json_get_string_length(int32_t objid);
 */
 int32_t json_get_string(int8_t* str, int32_t str_len, int32_t objid);

-/*
+/**
 \group_json
 * @return boolean value of queried objid; will force other types to boolean
 * @param[in] objid - id value of json object to query
 */
 int32_t json_get_boolean(int32_t objid);

-/*
+/**
 \group_json
 * @return integer value of queried objid; will force other types to integer
 * @param[in] objid - id value of json object to query
 */
 int32_t json_get_int(int32_t objid);

+//int64_t json_get_int64(int32_t objid);
 /* bytecode does not support double type */
 //double json_get_double(int32_t objid);

--- a/libclamav/clamav.h
+++ b/libclamav/clamav.h
@ -402,7 +402,7 @@ extern void cl_engine_set_clcb_meta(struct cl_engine *engine, clcb_meta callback

 /* File properties callback */
 typedef int (*clcb_file_props)(const char *j_propstr, int rc, void *cbdata);
-extern void cl_engine_set_clcb_file_props(struct cl_engine *engine, clcb_file_props callback, void * cbdata);
+extern void cl_engine_set_clcb_file_props(struct cl_engine *engine, clcb_file_props callback);

 /* Statistics/intelligence gathering callbacks */
 extern void cl_engine_set_stats_set_cbdata(struct cl_engine *engine, void *cbdata);
--- a/libclamav/conv.c
+++ b/libclamav/conv.c
@ -115,7 +115,13 @@ char *cl_base64_encode(void *data, size_t len)
    size_t elen;

    b64 = BIO_new(BIO_f_base64());
+    if (!(b64))
+        return NULL;
    bio = BIO_new(BIO_s_mem());
+    if (!(bio)) {
+        BIO_free(b64);
+        return NULL;
+    }

    bio = BIO_push(b64, bio);
    BIO_write(bio, data, len);
--- a/libclamav/filetypes.c
+++ b/libclamav/filetypes.c
@ -119,6 +119,8 @@ static const struct ftmap_s {
    { "CL_TYPE_OOXML_XL",	CL_TYPE_OOXML_XL     	},
    { "CL_TYPE_INTERNAL",	CL_TYPE_INTERNAL     	},
    { "CL_TYPE_XDP",        CL_TYPE_XDP             },
+    { "CL_TYPE_XML_WORD",   CL_TYPE_XML_WORD        },
+    { "CL_TYPE_XML_XL",     CL_TYPE_XML_XL          },
    { NULL,			CL_TYPE_IGNORED		}
 };

--- a/libclamav/filetypes.h
+++ b/libclamav/filetypes.h
@ -108,6 +108,8 @@ typedef enum {
    CL_TYPE_GPT,
    CL_TYPE_APM,
    CL_TYPE_XDP,
+    CL_TYPE_XML_WORD,
+    CL_TYPE_XML_XL,
    CL_TYPE_IGNORED /* please don't add anything below */
 } cli_file_t;

--- a/libclamav/filetypes_int.h
+++ b/libclamav/filetypes_int.h
@ -160,7 +160,8 @@ static const char *ftypes_int[] = {
  "0:0:377f0683002de218:SQLite WAL:CL_TYPE_ANY:CL_TYPE_IGNORED",
  "0:0:53514c69746520666f726d6174203300:SQLite database:CL_TYPE_ANY:CL_TYPE_IGNORED",
  "0:0:d9d505f920a163d7:SQLite journal:CL_TYPE_ANY:CL_TYPE_IGNORED",
-  "0:0:435753:SWF (compressed):CL_TYPE_ANY:CL_TYPE_SWF:71",
+  "0:0:5a5753:SWF (LZMA compressed):CL_TYPE_ANY:CL_TYPE_SWF:81",
+  "0:0:435753:SWF (zlib compressed):CL_TYPE_ANY:CL_TYPE_SWF:71",
  "0:0:465753:SWF (uncompressed):CL_TYPE_ANY:CL_TYPE_SWF:71",
  "0:0:4d53434600000000:MS CAB:CL_TYPE_ANY:CL_TYPE_MSCAB",
  "1:*:4d53434600000000:CAB-SFX:CL_TYPE_ANY:CL_TYPE_CABSFX",
@ -182,6 +183,10 @@ static const char *ftypes_int[] = {
  "1:0:4552{510}504D0000:Disk Image - Apple Partition Map:CL_TYPE_ANY:CL_TYPE_APM:77",
  "0:0:7b20224d61676963223a2022434c414d4a534f4e763022:Internal properties:CL_TYPE_ANY:CL_TYPE_INTERNAL:78",
  "1:*:3c7864703a786470:Adobe XDP - Embedded PDF:CL_TYPE_ANY:CL_TYPE_XDP:79",
+  "1:0:3c3f786d6c2076657273696f6e3d22312e3022{0-1024}3c776f7264446f63756d656e74:Microsoft Word 2003 XML Document:CL_TYPE_ANY:CL_TYPE_XML_WORD:80",
+  "1:0:3c3f786d6c2076657273696f6e3d22312e3022{0-1024}3c??3a776f7264446f63756d656e74:Microsoft Word 2003 XML Document:CL_TYPE_ANY:CL_TYPE_XML_WORD:80",
+  "1:0:3c3f786d6c2076657273696f6e3d22312e3022{0-1024}3c576f726b626f6f6b:Microsoft Excel 2003 XML Document:CL_TYPE_ANY:CL_TYPE_XML_XL:80",
+  "1:0:3c3f786d6c2076657273696f6e3d22312e3022{0-1024}3c??3a576f726b626f6f6b:Microsoft Excel 2003 XML Document:CL_TYPE_ANY:CL_TYPE_XML_XL:80",
  NULL
 };

--- a/libclamav/hostid.c
+++ b/libclamav/hostid.c
@ -47,7 +47,7 @@
 #include <ifaddrs.h>
 #endif

-#if defined(SIOCGIFHWADDR)
+#if defined(SIOCGIFHWADDR) && !defined(__GNU__)
 #if defined(_AIX)
 #include <sys/ndd_var.h>
 #include <sys/kinfo.h>
@ -116,7 +116,7 @@ struct device *get_devices(void)
    uint8_t *mac;
    int sock;

-#if defined(SIOCGIFHWADDR)
+#if defined(SIOCGIFHWADDR) && !defined(__GNU__)
    struct ifreq ifr;
 #else
    struct sockaddr_dl *sdl;
@ -155,7 +155,7 @@ struct device *get_devices(void)
         * Instead, Linux uses its own ioctl. This code only runs if we're not Linux,
         * Windows, or FreeBSD.
         */
-#if !defined(SIOCGIFHWADDR)
+#if !defined(SIOCGIFHWADDR) || defined(__GNU__)
        for (i=0; i < ndevices; i++) {
            if (!(strcmp(devices[i].name, addr->ifa_name))) {
                sdl = (struct sockaddr_dl *)(addr->ifa_addr);
@ -180,7 +180,7 @@ struct device *get_devices(void)
    }

    /* This is the Linux version of getting the MAC addresses */
-#if defined(SIOCGIFHWADDR)
+#if defined(SIOCGIFHWADDR) && !defined(__GNU__)
    for (i=0; i < ndevices; i++) {
        if (!(devices[i].name))
            continue;
--- a/libclamav/iso9660.c
+++ b/libclamav/iso9660.c
@ -118,6 +118,7 @@ static char *iso_string(iso9660_t *iso, const void *src, unsigned int len) {
 static int iso_parse_dir(iso9660_t *iso, unsigned int block, unsigned int len) {
    cli_ctx *ctx = iso->ctx;
    int ret = CL_CLEAN;
+    int viruses_found = 0;

    if(len < 34) {
 	cli_dbgmsg("iso_parse_dir: Directory too small, skipping\n");
@ -182,10 +183,13 @@ static int iso_parse_dir(iso9660_t *iso, unsigned int block, unsigned int len) {
 	    filesz = cli_readint32(dir+10);

 	    cli_dbgmsg("iso_parse_dir: %s '%s': off %x - size %x - flags %x - unit size %x - gap size %x - volume %u\n", (dir[25] & 2) ? "Directory" : "File", iso->buf, fileoff, filesz, dir[25], dir[26], dir[27], cli_readint32(&dir[28]) & 0xffff);
-	    if(cli_matchmeta(ctx, iso->buf, filesz, filesz, 0, 0, 0, NULL) == CL_VIRUS) {
-		ret = CL_VIRUS;
-		break;
-	    }
+            ret = cli_matchmeta(ctx, iso->buf, filesz, filesz, 0, 0, 0, NULL);
+            if (ret == CL_VIRUS) {
+                viruses_found = 1;
+                if (!SCAN_ALL)
+                    break;
+                ret = CL_CLEAN;
+            }

 	    if(dir[26] || dir[27])
 		cli_dbgmsg("iso_parse_dir: Skipping interleaved file\n");
@ -199,6 +203,12 @@ static int iso_parse_dir(iso9660_t *iso, unsigned int block, unsigned int len) {
 		    else
 			ret = iso_scan_file(iso, fileoff, filesz);
 		}
+                if (ret == CL_VIRUS) {
+                    viruses_found = 1;
+                    if (!SCAN_ALL)
+                        break;
+                    ret = CL_CLEAN;
+                }
 	    }
 	    dirsz -= entrysz;
 	    dir += entrysz;
@ -206,6 +216,8 @@ static int iso_parse_dir(iso9660_t *iso, unsigned int block, unsigned int len) {

 	fmap_unneed_ptr(*ctx->fmap, dir_orig, iso->blocksz);
    }
+    if (viruses_found == 1)
+        return CL_VIRUS;
    return ret;
 }

--- a/libclamav/json_api.c
+++ b/libclamav/json_api.c
@ -46,6 +46,21 @@ int cli_json_timeout_cycle_check(cli_ctx *ctx, int *toval)
    return CL_SUCCESS;
 }

+int cli_json_parse_error(json_object *root, const char *errstr)
+{
+    json_object *perr;
+
+    if (!root)
+        return CL_SUCCESS; /* CL_ENULLARG? */
+
+    perr = cli_jsonarray(root, "ParseErrors");
+    if (perr == NULL) {
+        return CL_EMEM;
+    }
+
+    return cli_jsonstr(perr, NULL, errstr);
+}
+
 int cli_jsonnull(json_object *obj, const char* key)
 {
    json_type objty;
--- a/libclamav/json_api.h
+++ b/libclamav/json_api.h
@ -37,6 +37,7 @@
 #define JSON_TIMEOUT_SKIP_CYCLES 3

 int cli_json_timeout_cycle_check(cli_ctx *ctx, int *toval);
+int cli_json_parse_error(json_object *root, const char *errstr);

 int cli_jsonnull(json_object *obj, const char* key);
 int cli_jsonstr(json_object *obj, const char* key, const char* s);
--- a/libclamav/matcher-bm.c
+++ b/libclamav/matcher-bm.c
@ -245,7 +245,7 @@ void cli_bm_free(struct cli_matcher *root)
    }
 }

-int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cli_bm_patt **patt, const struct cli_matcher *root, uint32_t offset, const struct cli_target_info *info, struct cli_bm_off *offdata, uint32_t *viroffset)
+int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cli_bm_patt **patt, const struct cli_matcher *root, uint32_t offset, const struct cli_target_info *info, struct cli_bm_off *offdata, cli_ctx *ctx)
 {
 	uint32_t i, j, off, off_min, off_max;
 	uint8_t found, pchain, shift;
@ -253,7 +253,7 @@ int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
 	struct cli_bm_patt *p;
 	const unsigned char *bp, *pt;
 	unsigned char prefix;
-        int ret;
+        int ret, viruses_found = 0;

    if(!root || !root->bm_shift)
 	return CL_CLEAN;
@ -285,8 +285,11 @@ int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
 		if(offdata) {
 		    off = offset + i - BM_MIN_LENGTH + BM_BLOCK_SIZE;
 		    for(; offdata->pos < offdata->cnt && off >= offdata->offtab[offdata->pos]; offdata->pos++);
-		    if(offdata->pos == offdata->cnt || off >= offdata->offtab[offdata->pos])
+		    if(offdata->pos == offdata->cnt || off >= offdata->offtab[offdata->pos]) {
+			if (viruses_found)
+			    return CL_VIRUS;
 			return CL_CLEAN;
+		    }
 		    i += offdata->offtab[offdata->pos] - off;
 		} else {
 		    i++;
@ -377,12 +380,18 @@ int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
 		    }
 		    if(virname) {
 			*virname = p->virname;
-			if(viroffset)
-			    *viroffset = offset + i + j - BM_MIN_LENGTH + BM_BLOCK_SIZE;
+			if(ctx != NULL && SCAN_ALL) {
+			    cli_append_virus(ctx, *virname);
+			    //*viroffset = offset + i + j - BM_MIN_LENGTH + BM_BLOCK_SIZE;
+			}
 		    }
 		    if(patt)
 			*patt = p;
-		    return CL_VIRUS;
+
+		    viruses_found = 1;
+
+		    if(ctx != NULL && !SCAN_ALL)
+			return CL_VIRUS;
 		}
 		p = p->next;
 	    }
@ -392,8 +401,11 @@ int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **v
 	if(offdata) {
 	    off = offset + i - BM_MIN_LENGTH + BM_BLOCK_SIZE;
 	    for(; offdata->pos < offdata->cnt && off >= offdata->offtab[offdata->pos]; offdata->pos++);
-	    if(offdata->pos == offdata->cnt || off >= offdata->offtab[offdata->pos])
+	    if(offdata->pos == offdata->cnt || off >= offdata->offtab[offdata->pos]) {
+		if (viruses_found)
+		    return CL_VIRUS;
 		return CL_CLEAN;
+	    }
 	    i += offdata->offtab[offdata->pos] - off;
 	} else {
 	    i += shift;
@ -401,5 +413,7 @@ int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **v

    }

+    if (viruses_found)
+	return CL_VIRUS;
    return CL_CLEAN;
 }
--- a/libclamav/matcher-bm.h
+++ b/libclamav/matcher-bm.h
@ -25,6 +25,7 @@
 #include "filetypes.h"
 #include "cltypes.h"
 #include "fmap.h"
+#include "others.h"

 #define BM_BOUNDARY_EOL	1

@ -47,7 +48,7 @@ int cli_bm_addpatt(struct cli_matcher *root, struct cli_bm_patt *pattern, const
 int cli_bm_init(struct cli_matcher *root);
 int cli_bm_initoff(const struct cli_matcher *root, struct cli_bm_off *data, const struct cli_target_info *info);
 void cli_bm_freeoff(struct cli_bm_off *data);
-int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cli_bm_patt **patt, const struct cli_matcher *root, uint32_t offset, const struct cli_target_info *info, struct cli_bm_off *offdata, uint32_t *viroffset);
+int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cli_bm_patt **patt, const struct cli_matcher *root, uint32_t offset, const struct cli_target_info *info, struct cli_bm_off *offdata, cli_ctx *ctx);
 void cli_bm_free(struct cli_matcher *root);

 #endif
--- a/libclamav/matcher.c
+++ b/libclamav/matcher.c
@ -104,7 +104,6 @@ static inline int matcher_run(const struct cli_matcher *root,
 			      fmap_t *map,
 			      struct cli_bm_off *offdata,
 			      struct cli_pcre_off *poffdata,
-			      uint32_t *viroffset,
 			      cli_ctx *ctx)
 {
    int ret, tmp;
@ -143,17 +142,20 @@ static inline int matcher_run(const struct cli_matcher *root,
 	    /* Don't use prefiltering for BM offset mode, since BM keeps tracks
 	     * of offsets itself, and doesn't work if we skip chunks of input
 	     * data */
-	    ret = cli_bm_scanbuff(orig_buffer, orig_length, virname, NULL, root, orig_offset, tinfo, offdata, viroffset);
+	    ret = cli_bm_scanbuff(orig_buffer, orig_length, virname, NULL, root, orig_offset, tinfo, offdata, ctx);
 	} else {
-	    ret = cli_bm_scanbuff(buffer, length, virname, NULL, root, offset, tinfo, offdata, viroffset);
+	    ret = cli_bm_scanbuff(buffer, length, virname, NULL, root, offset, tinfo, offdata, ctx);
 	}
-	if (ret == CL_VIRUS) {
-	    if (ctx) {
+	if (ret != CL_CLEAN) {
+	    if (ret != CL_VIRUS)
+		return ret;
+
+	    /* else (ret == CL_VIRUS) */
+	    if (SCAN_ALL)
+		viruses_found = 1;
+	    else {
 		cli_append_virus(ctx, *virname);
-		if (SCAN_ALL)
-		    viruses_found++;
-		else
-		    return ret;
+		return ret;
 	    }
 	}
    }
@ -253,7 +255,7 @@ int cli_scanbuff(const unsigned char *buffer, uint32_t length, uint32_t offset,
 	if(!acdata && (ret = cli_ac_initdata(&mdata, troot->ac_partsigs, troot->ac_lsigs, troot->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN)))
 	    return ret;

-	ret = matcher_run(troot, buffer, length, &virname, acdata ? (acdata[0]): (&mdata), offset, NULL, ftype, NULL, AC_SCAN_VIR, PCRE_SCAN_BUFF, NULL, *ctx->fmap, NULL, NULL, NULL, ctx);
+	ret = matcher_run(troot, buffer, length, &virname, acdata ? (acdata[0]): (&mdata), offset, NULL, ftype, NULL, AC_SCAN_VIR, PCRE_SCAN_BUFF, NULL, *ctx->fmap, NULL, NULL, ctx);

 	if(!acdata)
 	    cli_ac_freedata(&mdata);
@ -273,7 +275,7 @@ int cli_scanbuff(const unsigned char *buffer, uint32_t length, uint32_t offset,
    if(!acdata && (ret = cli_ac_initdata(&mdata, groot->ac_partsigs, groot->ac_lsigs, groot->ac_reloff_num, CLI_DEFAULT_AC_TRACKLEN)))
 	return ret;

-    ret = matcher_run(groot, buffer, length, &virname, acdata ? (acdata[1]): (&mdata), offset, NULL, ftype, NULL, AC_SCAN_VIR, PCRE_SCAN_BUFF, NULL, *ctx->fmap, NULL, NULL, NULL, ctx);
+    ret = matcher_run(groot, buffer, length, &virname, acdata ? (acdata[1]): (&mdata), offset, NULL, ftype, NULL, AC_SCAN_VIR, PCRE_SCAN_BUFF, NULL, *ctx->fmap, NULL, NULL, ctx);

    if(!acdata)
 	cli_ac_freedata(&mdata);
@ -813,7 +815,6 @@ int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli
    fmap_t *map = *ctx->fmap;
    struct cli_matcher *hdb, *fp;
    const char *virname = NULL;
-    uint32_t viroffset = 0;
    uint32_t viruses_found = 0;
    void *md5ctx, *sha1ctx, *sha256ctx;

@ -993,8 +994,7 @@ int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli

        if(troot) {
                virname = NULL;
-                viroffset = 0;
-                ret = matcher_run(troot, buff, bytes, &virname, &tdata, offset, &info, ftype, ftoffset, acmode, PCRE_SCAN_FMAP, acres, map, bm_offmode ? &toff : NULL, &tpoff, &viroffset, ctx);
+                ret = matcher_run(troot, buff, bytes, &virname, &tdata, offset, &info, ftype, ftoffset, acmode, PCRE_SCAN_FMAP, acres, map, bm_offmode ? &toff : NULL, &tpoff, ctx);

            if (virname) {
                /* virname already appended by matcher_run */
@ -1024,8 +1024,7 @@ int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli

        if(!ftonly) {
            virname = NULL;
-            viroffset = 0;
-            ret = matcher_run(groot, buff, bytes, &virname, &gdata, offset, &info, ftype, ftoffset, acmode, PCRE_SCAN_FMAP, acres, map, NULL, &gpoff, &viroffset, ctx);
+            ret = matcher_run(groot, buff, bytes, &virname, &gdata, offset, &info, ftype, ftoffset, acmode, PCRE_SCAN_FMAP, acres, map, NULL, &gpoff, ctx);

            if (virname) {
                /* virname already appended by matcher_run */
@ -1054,7 +1053,9 @@ int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli
                    type = ret;
            }

-            if(hdb) {
+            /* if (bytes <= (maxpatlen * (offset!=0))), it means the last window finished the file hashing *
+             *   since the last window is responsible for adding intersection between windows (maxpatlen)  */
+            if(hdb && (bytes > (maxpatlen * (offset!=0)))) {
                const void *data = buff + maxpatlen * (offset!=0);
                uint32_t data_len = bytes - maxpatlen * (offset!=0);

@ -1067,11 +1068,6 @@ int cli_fmap_scandesc(cli_ctx *ctx, cli_file_t ftype, uint8_t ftonly, struct cli
            }
        }

-        if(SCAN_ALL && viroffset) {
-            offset = viroffset;
-            continue;
-        }
-
        if(bytes < SCANBUFF)
            break;

--- a/libclamav/msdoc.c
+++ b/libclamav/msdoc.c
@ -3,7 +3,7 @@
 * 
 * Copyright (C) 2007-2013 Sourcefire, Inc.
 * 
- * Authors: Trog
+ * Authors: Kevin Lin
 * 
 * This program is free software; you can redistribute it and/or modify it under
 * the terms of the GNU General Public License version 2 as published by the
--- a/libclamav/msdoc.h
+++ b/libclamav/msdoc.h
@ -3,7 +3,7 @@
 *
 *  Copyright (C) 2007-2008 Sourcefire, Inc.
 *
- *  Authors: Trog
+ *  Authors: Kevin Lin
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License version 2 as
--- a/libclamav/msxml.c
+++ b/libclamav/msxml.c
@ -0,0 +1,285 @@
+/*
+ * Extract component parts of MS XML files (e.g. MS Office 2003 XML Documents)
+ * 
+ * Copyright (C) 2007-2013 Sourcefire, Inc.
+ * 
+ * Authors: Kevin Lin
+ * 
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ * 
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "clamav.h"
+#include "others.h"
+#include "conv.h"
+#include "json_api.h"
+#include "msxml.h"
+#include "msxml_parser.h"
+
+#if HAVE_LIBXML2
+#ifdef _WIN32
+#ifndef LIBXML_WRITER_ENABLED
+#define LIBXML_WRITER_ENABLED 1
+#endif
+#endif
+#include <libxml/xmlreader.h>
+
+#define MSXML_VERBIOSE 0
+#if MSXML_VERBIOSE
+#define cli_msxmlmsg(...) cli_dbgmsg(__VA_ARGS__)
+#else
+#define cli_msxmlmsg(...)
+#endif
+
+#define MSXML_READBUFF SCANBUFF
+
+static const struct key_entry msxml_keys[] = {
+    { "worddocument",       "WordDocument",       MSXML_JSON_ROOT | MSXML_JSON_ATTRIB },
+    { "workbook",           "Workbook",           MSXML_JSON_ROOT | MSXML_JSON_ATTRIB },
+
+    { "bindata",            "BinaryData",         MSXML_SCAN_B64 | MSXML_JSON_COUNT | MSXML_JSON_ROOT },
+    { "documentproperties", "DocumentProperties", MSXML_JSON_ROOT },
+    { "author",             "Author",             MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "lastauthor",         "LastAuthor",         MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "revision",           "Revision",           MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "totaltime",          "TotalTime",          MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "created",            "Created",            MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "lastsaved",          "LastSaved",          MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "pages",              "Pages",              MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "words",              "Words",              MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "characters",         "Characters",         MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "lines",              "Lines",              MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "paragraph",          "Paragraph",          MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "characterswithspaces", "CharactersWithSpaces", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "version",            "Version",            MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+
+    { "allowpng",           "AllowPNG",           MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+
+    { "fonts",              "Fonts",              MSXML_IGNORE_ELEM },
+    { "styles",             "Styles",             MSXML_IGNORE_ELEM }
+};
+static size_t num_msxml_keys = sizeof(msxml_keys) / sizeof(struct key_entry);
+
+enum msxml_state {
+    MSXML_STATE_NORMAL = 0,
+    MSXML_STATE_ENTITY_START_1,
+    MSXML_STATE_ENTITY_START_2,
+    MSXML_STATE_ENTITY_HEX,
+    MSXML_STATE_ENTITY_DEC,
+    MSXML_STATE_ENTITY_CLOSE,
+    MSXML_STATE_ENTITY_NONE
+};
+
+struct msxml_cbdata {
+    enum msxml_state state;
+    fmap_t *map;
+    const unsigned char *window;
+    off_t winpos, mappos;
+    size_t winsize;
+};
+
+static inline size_t msxml_read_cb_new_window(struct msxml_cbdata *cbdata)
+{
+    const unsigned char *new_window = NULL;
+    off_t new_mappos;
+    size_t bytes;
+
+    if (cbdata->mappos == cbdata->map->len) {
+        cli_msxmlmsg("msxml_read_cb: fmap REALLY EOF\n");
+        return 0;
+    }
+
+    new_mappos = cbdata->mappos + cbdata->winsize;
+    bytes = MIN(cbdata->map->len - new_mappos, MSXML_READBUFF);
+    if (!bytes) {
+        cbdata->window = NULL;
+        cbdata->winpos = 0;
+        cbdata->mappos = cbdata->map->len;
+        cbdata->winsize = 0;
+
+        cli_msxmlmsg("msxml_read_cb: fmap EOF\n");
+        return 0;
+    }
+
+    new_window = fmap_need_off_once(cbdata->map, new_mappos, bytes);
+    if (!new_window) {
+        cli_errmsg("msxml_read_cb: cannot acquire new window for fmap\n");
+        return -1;
+    }
+
+    cbdata->window = new_window;
+    cbdata->winpos = 0;
+    cbdata->mappos = new_mappos;
+    cbdata->winsize = bytes;
+
+    cli_msxmlmsg("msxml_read_cb: acquired new window @ [%llu(+%llu)(max:%llu)]\n",
+                 (long long unsigned)cbdata->mappos, (long long unsigned)(cbdata->mappos + cbdata->winsize),
+                 (long long unsigned)cbdata->map->len);
+
+    return bytes;
+}
+
+int msxml_read_cb(void *ctx, char *buffer, int len)
+{
+    struct msxml_cbdata *cbdata = (struct msxml_cbdata *)ctx;
+    size_t wbytes, rbytes;
+    int winret;
+
+    cli_msxmlmsg("msxml_read_cb called\n");
+
+    /* initial iteration */
+    if (!cbdata->window) {
+        if ((winret = msxml_read_cb_new_window(cbdata)) <= 0)
+            return winret;
+    }
+
+    cli_msxmlmsg("msxml_read_cb: requested %d bytes from offset %llu\n", len, (long long unsigned)(cbdata->mappos+cbdata->winpos));
+
+    wbytes = 0;
+    rbytes = cbdata->winsize - cbdata->winpos;
+
+    /* copying loop with preprocessing */
+    while (wbytes < len) {
+        const unsigned char *read_from;
+        char *write_to = buffer + wbytes;
+        enum msxml_state *state;
+#if MSXML_VERBIOSE
+        size_t written;
+#endif
+
+        if (!rbytes) {
+            if ((winret = msxml_read_cb_new_window(cbdata)) < 0)
+                return winret;
+            if (winret == 0) {
+                cli_msxmlmsg("msxml_read_cb: propagating fmap EOF [%llu]\n", (long long unsigned)wbytes);
+                return (int)wbytes;
+            }
+
+            rbytes = cbdata->winsize;
+        }
+
+#if MSXML_VERBIOSE
+        written = MIN(rbytes, len - wbytes);
+        cli_msxmlmsg("msxml_read_cb: copying from window [%llu(+%llu)] %llu->~%llu\n",
+                     (long long unsigned)(cbdata->winsize - rbytes), (long long unsigned)cbdata->winsize,
+                     (long long unsigned)cbdata->winpos, (long long unsigned)(cbdata->winpos + written));
+#endif
+
+        read_from = cbdata->window + cbdata->winpos;
+        state = &(cbdata->state);
+
+        while (rbytes > 0 && wbytes < len) {
+            switch (*state) {
+            case MSXML_STATE_NORMAL:
+                if ((*read_from) == '&')
+                    *state = MSXML_STATE_ENTITY_START_1;
+                break;
+            case MSXML_STATE_ENTITY_START_1:
+                if ((*read_from) == '#')
+                    *state = MSXML_STATE_ENTITY_START_2;
+                else
+                    *state = MSXML_STATE_NORMAL;
+                break;
+            case MSXML_STATE_ENTITY_START_2:
+                if ((*read_from) == 'x')
+                    *state = MSXML_STATE_ENTITY_HEX;
+                else if (((*read_from) >= '0') && ((*read_from) <= '9'))
+                    *state = MSXML_STATE_ENTITY_DEC;
+                else
+                    *state = MSXML_STATE_NORMAL;
+                break;
+            case MSXML_STATE_ENTITY_HEX:
+                if ((((*read_from) >= '0') && ((*read_from) <= '9')) ||
+                    (((*read_from) >= 'a') && ((*read_from) <= 'f')) ||
+                    (((*read_from) >= 'A') && ((*read_from) <= 'F'))) {}
+                else
+                    *state = MSXML_STATE_ENTITY_CLOSE;
+                break;
+            case MSXML_STATE_ENTITY_DEC:
+                if (((*read_from) >= '0') && ((*read_from) <= '9')) {}
+                else
+                    *state = MSXML_STATE_ENTITY_CLOSE;
+                break;
+            default:
+                cli_errmsg("unknown *state: %d\n", *state);
+            }
+
+            if (*state == MSXML_STATE_ENTITY_CLOSE) {
+                if ((*read_from) != ';') {
+                    cli_msxmlmsg("msxml_read_cb: detected unterminated character entity @ winoff %d\n",
+                                 (int)(read_from - cbdata->window));
+                    (*write_to++) = ';';
+                    wbytes++;
+                }
+                *state = MSXML_STATE_NORMAL;
+                if (wbytes >= len)
+                    break;
+            }
+
+            *(write_to++) = *(read_from++);
+            rbytes--;
+            wbytes++;
+        }
+    }
+
+    cbdata->winpos = cbdata->winsize - rbytes;
+    return (int)wbytes;
+}
+#endif
+
+int cli_scanmsxml(cli_ctx *ctx)
+{
+#if HAVE_LIBXML2
+    struct msxml_cbdata cbdata;
+    xmlTextReaderPtr reader = NULL;
+    int state, ret = CL_SUCCESS;
+
+    cli_dbgmsg("in cli_scanmsxml()\n");
+
+    if (!ctx)
+        return CL_ENULLARG;
+
+    memset(&cbdata, 0, sizeof(cbdata));
+    cbdata.map = *ctx->fmap;
+
+    reader = xmlReaderForIO(msxml_read_cb, NULL, &cbdata, "msxml.xml", NULL, CLAMAV_MIN_XMLREADER_FLAGS);
+    if (!reader) {
+        cli_dbgmsg("cli_scanmsxml: cannot intialize xmlReader\n");
+
+#if HAVE_JSON
+        ret = cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_XML_READER_IO");
+#endif
+        return ret; // libxml2 failed!
+    }
+
+    ret = cli_msxml_parse_document(ctx, reader, msxml_keys, num_msxml_keys, 1);
+
+    xmlTextReaderClose(reader);
+    xmlFreeTextReader(reader);
+    return ret;
+#else
+    UNUSEDPARAM(ctx);
+    cli_dbgmsg("in cli_scanmsxml()\n");
+    cli_dbgmsg("cli_scanmsxml: scanning msxml documents requires libxml2!\n");
+
+    return CL_SUCCESS;
+#endif
+}
--- a/libclamav/msxml.h
+++ b/libclamav/msxml.h
@ -0,0 +1,34 @@
+/*
+ *  Extract component parts of MS XML files (e.g. MS Office 2003 XML Documents)
+ *
+ *  Copyright (C) 2007-2008 Sourcefire, Inc.
+ *
+ *  Authors: Kevin Lin
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *  MA 02110-1301, USA.
+ */
+
+#ifndef __MSXML_H
+#define __MSXML_H
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include "others.h"
+
+int cli_scanmsxml(cli_ctx *ctx);
+
+#endif /* __MSXML_H */
--- a/libclamav/msxml_parser.c
+++ b/libclamav/msxml_parser.c
@ -0,0 +1,543 @@
+/*
+ * Extract component parts of MS XML files (e.g. MS Office 2003 XML Documents)
+ *
+ * Copyright (C) 2007-2013 Sourcefire, Inc.
+ *
+ * Authors: Kevin Lin
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "clamav.h"
+#include "others.h"
+#include "conv.h"
+#include "scanners.h"
+#include "json_api.h"
+#include "msxml_parser.h"
+
+#if HAVE_LIBXML2
+#ifdef _WIN32
+#ifndef LIBXML_WRITER_ENABLED
+#define LIBXML_WRITER_ENABLED 1
+#endif
+#endif
+#include <libxml/xmlreader.h>
+
+#define MSXML_VERBIOSE 0
+#if MSXML_VERBIOSE
+#define cli_msxmlmsg(...) cli_dbgmsg(__VA_ARGS__)
+#else
+#define cli_msxmlmsg(...)
+#endif
+
+#define check_state(state)                                              \
+    do {                                                                \
+        if (state == -1) {                                              \
+            cli_warnmsg("check_state[msxml]: CL_EPARSE @ ln%d\n", __LINE__); \
+            return CL_EPARSE;                                           \
+        }                                                               \
+        else if (state == 0) {                                          \
+            cli_dbgmsg("check_state[msxml]: CL_BREAK @ ln%d\n", __LINE__); \
+            return CL_BREAK;                                            \
+        }                                                               \
+    } while(0)
+
+
+struct key_entry blank_key = { NULL, NULL, 0 };
+
+static const struct key_entry *msxml_check_key(struct msxml_ctx *mxctx, const xmlChar *key, size_t keylen)
+{
+    unsigned i;
+
+    if (keylen > MSXML_JSON_STRLEN_MAX-1) {
+        cli_dbgmsg("msxml_check_key: key name too long\n");
+        return &blank_key;
+    }
+
+    for (i = 0; i < mxctx->num_keys; ++i) {
+        if (keylen == strlen(mxctx->keys[i].key) && !strncasecmp((char *)key, mxctx->keys[i].key, keylen)) {
+            return &mxctx->keys[i];
+        }
+    }
+
+    return &blank_key;
+}
+
+static void msxml_error_handler(void* arg, const char* msg, xmlParserSeverities severity, xmlTextReaderLocatorPtr locator)
+{
+    int line = xmlTextReaderLocatorLineNumber(locator);
+    xmlChar *URI = xmlTextReaderLocatorBaseURI(locator);
+
+    switch (severity) {
+    case XML_PARSER_SEVERITY_WARNING:
+    case XML_PARSER_SEVERITY_VALIDITY_WARNING:
+        cli_warnmsg("%s:%d: parser warning : %s", (char*)URI, line, msg);
+        break;
+    case XML_PARSER_SEVERITY_ERROR:
+    case XML_PARSER_SEVERITY_VALIDITY_ERROR:
+        cli_warnmsg("%s:%d: parser error : %s", (char*)URI, line, msg);
+        break;
+    default:
+        cli_dbgmsg("%s:%d: unknown severity : %s", (char*)URI, line, msg);
+        break;
+    }
+    free(URI);
+}
+
+#if HAVE_JSON
+static int msxml_is_int(const char *value, size_t len, int32_t *val)
+{
+    long val2;
+    char *endptr = NULL;
+
+    val2 = strtol(value, &endptr, 10);
+    if (endptr != value+len) {
+        return 0;
+    }
+
+    *val = (int32_t)(val2 & 0x0000ffff);
+
+    return 1;
+}
+
+static int msxml_parse_value(json_object *wrkptr, const char *arrname, const xmlChar *node_value)
+{
+    json_object *newobj, *arrobj;
+    int val;
+
+    if (!wrkptr)
+        return CL_ENULLARG;
+
+    arrobj = cli_jsonarray(wrkptr, arrname);
+    if (arrobj == NULL) {
+        return CL_EMEM;
+    }
+
+    if (msxml_is_int((const char *)node_value, xmlStrlen(node_value), &val)) {
+        newobj = json_object_new_int(val);
+    }
+    else if (!xmlStrcmp(node_value, (const xmlChar *)"true")) {
+        newobj = json_object_new_boolean(1);
+    }
+    else if (!xmlStrcmp(node_value, (const xmlChar *)"false")) {
+        newobj = json_object_new_boolean(0);
+    }
+    else {
+        newobj = json_object_new_string((const char *)node_value);
+    }
+
+    if (NULL == newobj) {
+        cli_errmsg("msxml_parse_value: no memory for json value for [%s]\n", arrname);
+        return CL_EMEM;
+    }
+
+    json_object_array_add(arrobj, newobj);
+    return CL_SUCCESS;
+}
+#endif /* HAVE_JSON */
+
+static int msxml_parse_element(struct msxml_ctx *mxctx, xmlTextReaderPtr reader, int rlvl, void *jptr)
+{
+    const xmlChar *element_name = NULL;
+    const xmlChar *node_name = NULL, *node_value = NULL;
+    const struct key_entry *keyinfo;
+    int ret, virus = 0, state, node_type, endtag = 0;
+    cli_ctx *ctx = mxctx->ctx;
+#if HAVE_JSON
+    json_object *parent = (json_object *)jptr;
+    json_object *thisjobj = NULL;
+#else
+    void *thisjobj = NULL;
+#endif
+
+    cli_msxmlmsg("in msxml_parse_element @ layer %d\n", rlvl);
+
+    /* check recursion level */
+    if (rlvl >= MSXML_RECLEVEL_MAX) {
+        cli_dbgmsg("msxml_parse_element: reached msxml json recursion limit\n");
+
+#if HAVE_JSON
+        if (mxctx->mode) {
+            int tmp = cli_json_parse_error(mxctx->root, "MSXML_RECURSIVE_LIMIT");
+            if (tmp != CL_SUCCESS)
+                return tmp;
+        }
+#endif
+
+        /* skip it */
+        state = xmlTextReaderNext(reader);
+        check_state(state);
+        return CL_SUCCESS;
+    }
+
+    /* acquire element type */
+    node_type = xmlTextReaderNodeType(reader);
+    if (node_type == -1)
+        return CL_EPARSE;
+
+    node_name = xmlTextReaderConstLocalName(reader);
+    node_value = xmlTextReaderConstValue(reader);
+
+    /* branch on node type */
+    switch (node_type) {
+    case XML_READER_TYPE_ELEMENT:
+        cli_msxmlmsg("msxml_parse_element: ELEMENT %s [%d]: %s\n", node_name, node_type, node_value);
+
+        /* storing the element name for verification/collection */
+        element_name = node_name;
+        if (!element_name) {
+            cli_dbgmsg("msxml_parse_element: element tag node nameless\n");
+#if HAVE_JSON
+            if (mxctx->mode) {
+                int tmp = cli_json_parse_error(mxctx->root, "MSXML_NAMELESS_ELEMENT");
+                if (tmp != CL_SUCCESS)
+                    return tmp;
+            }
+#endif
+            return CL_EPARSE; /* no name, nameless */
+        }
+
+        /* determine if the element is interesting */
+        keyinfo = msxml_check_key(mxctx, element_name, xmlStrlen(element_name));
+
+        cli_msxmlmsg("key:  %s\n", keyinfo->key);
+        cli_msxmlmsg("name: %s\n", keyinfo->name);
+        cli_msxmlmsg("type: 0x%x\n", keyinfo->type);
+
+        /* element and contents are ignored */
+        if (keyinfo->type & MSXML_IGNORE_ELEM) {
+            cli_msxmlmsg("msxml_parse_element: IGNORING ELEMENT %s\n", keyinfo->name);
+
+            state = xmlTextReaderNext(reader);
+            check_state(state);
+            return CL_SUCCESS;
+        }
+
+#if HAVE_JSON
+        if (mxctx->mode && (keyinfo->type & MSXML_JSON_TRACK)) {
+            if (keyinfo->type & MSXML_JSON_ROOT)
+                thisjobj = cli_jsonobj(mxctx->root, keyinfo->name);
+            else if (keyinfo->type & MSXML_JSON_WRKPTR)
+                thisjobj = cli_jsonobj(parent, keyinfo->name);
+
+            if (!thisjobj) {
+                return CL_EMEM;
+            }
+            cli_msxmlmsg("msxml_parse_element: generated json object [%s]\n", keyinfo->name);
+
+            /* count this element */
+            if (thisjobj && (keyinfo->type & MSXML_JSON_COUNT)) {
+                json_object *counter = NULL;
+
+                if (!json_object_object_get_ex(thisjobj, "Count", &counter)) { /* object not found */
+                    cli_jsonint(thisjobj, "Count", 1);
+                } else {
+                    int value = json_object_get_int(counter);
+                    cli_jsonint(thisjobj, "Count", value+1);
+                }
+                cli_msxmlmsg("msxml_parse_element: retrieved json object [Count]\n");
+            }
+
+            /* handle attributes */
+            if (thisjobj && (keyinfo->type & MSXML_JSON_ATTRIB)) {
+                state = xmlTextReaderHasAttributes(reader);
+                if (state == 1) {
+                    json_object *attributes;
+                    const xmlChar *name, *value;
+
+                    attributes = cli_jsonobj(thisjobj, "Attributes");
+                    if (!attributes) {
+                        return CL_EPARSE;
+                    }
+                    cli_msxmlmsg("msxml_parse_element: retrieved json object [Attributes]\n");
+
+                    while (xmlTextReaderMoveToNextAttribute(reader) == 1) {
+                        name = xmlTextReaderConstLocalName(reader);
+                        value = xmlTextReaderConstValue(reader);
+
+                        cli_msxmlmsg("\t%s: %s\n", name, value);
+                        cli_jsonstr(attributes, name, (const char *)value);
+                    }
+                }
+                else if (state == -1)
+                    return CL_EPARSE;
+            }
+        }
+#endif
+
+        /* check self-containment */
+        state = xmlTextReaderMoveToElement(reader);
+        if (state == -1)
+            return CL_EPARSE;
+
+        state = xmlTextReaderIsEmptyElement(reader);
+        if (state == 1) {
+            cli_msxmlmsg("msxml_parse_element: SELF-CLOSING\n");
+
+            state = xmlTextReaderNext(reader);
+            check_state(state);
+            return CL_SUCCESS;
+        } else if (state == -1)
+            return CL_EPARSE;
+
+        /* advance to first content node */
+        state = xmlTextReaderRead(reader);
+        check_state(state);
+
+        while (!endtag) {
+#if HAVE_JSON
+            if (mxctx->mode && (cli_json_timeout_cycle_check(mxctx->ctx, &(mxctx->toval)) != CL_SUCCESS))
+                return CL_ETIMEOUT;
+#endif
+
+            node_type = xmlTextReaderNodeType(reader);
+            if (node_type == -1)
+                return CL_EPARSE;
+
+            switch (node_type) {
+            case XML_READER_TYPE_ELEMENT:
+                ret = msxml_parse_element(mxctx, reader, rlvl+1, thisjobj);
+                if (ret != CL_SUCCESS || (!SCAN_ALL && ret == CL_VIRUS)) {
+                    return ret;
+                } else if (SCAN_ALL && ret == CL_VIRUS) {
+                    virus = 1;
+                }
+                break;
+
+            case XML_READER_TYPE_TEXT:
+                node_value = xmlTextReaderConstValue(reader);
+
+                cli_msxmlmsg("TEXT: %s\n", node_value);
+
+#if HAVE_JSON
+                if (thisjobj && (keyinfo->type & MSXML_JSON_VALUE)) {
+
+                    ret = msxml_parse_value(thisjobj, "Value", node_value);
+                    if (ret != CL_SUCCESS)
+                        return ret;
+
+                    cli_msxmlmsg("msxml_parse_element: added json value [%s: %s]\n", keyinfo->name, (const char *)node_value);
+                }
+#endif
+
+                /* scanning protocol for embedded objects encoded in base64 */
+                if (keyinfo->type & MSXML_SCAN_B64) {
+                    char name[1024];
+                    char *decoded, *tempfile = name;
+                    size_t decodedlen;
+                    int of;
+
+                    cli_msxmlmsg("BINARY DATA!\n");
+
+                    decoded = (char *)cl_base64_decode((char *)node_value, strlen((const char *)node_value), NULL, &decodedlen, 0);
+                    if (!decoded) {
+                        cli_warnmsg("msxml_parse_element: failed to decode base64-encoded binary data\n");
+                        state = xmlTextReaderRead(reader);
+                        check_state(state);
+                        break;
+                    }
+
+                    if ((ret = cli_gentempfd(ctx->engine->tmpdir, &tempfile, &of)) != CL_SUCCESS) {
+                        cli_warnmsg("msxml_parse_element: failed to create temporary file %s\n", tempfile);
+                        free(decoded);
+                        return ret;
+                    }
+
+                    if(cli_writen(of, decoded, decodedlen) != (int)decodedlen) {
+                        free(decoded);
+                        close(of);
+                        return CL_EWRITE;
+                    }
+                    free(decoded);
+
+                    cli_dbgmsg("msxml_parse_element: extracted binary data to %s\n", tempfile);
+
+                    ret = cli_magic_scandesc(of, ctx);
+                    close(of);
+                    if (ctx && !(ctx->engine->keeptmp))
+                        cli_unlink(tempfile);
+                    free(tempfile);
+                    if (ret != CL_SUCCESS || (!SCAN_ALL && ret == CL_VIRUS)) {
+                        return ret;
+                    } else if (SCAN_ALL && ret == CL_VIRUS) {
+                        virus = 1;
+                    }
+                }
+
+                /* advance to next node */
+                state = xmlTextReaderRead(reader);
+                check_state(state);
+                break;
+
+            case XML_READER_TYPE_SIGNIFICANT_WHITESPACE:
+                /* advance to next node */
+                state = xmlTextReaderRead(reader);
+                check_state(state);
+                break;
+
+            case XML_READER_TYPE_END_ELEMENT:
+                cli_msxmlmsg("in msxml_parse_element @ layer %d closed\n", rlvl);
+                node_name = xmlTextReaderConstLocalName(reader);
+                if (!node_name) {
+                    cli_dbgmsg("msxml_parse_element: element end tag node nameless\n");
+                    return CL_EPARSE; /* no name, nameless */
+                }
+
+                if (xmlStrcmp(element_name, node_name)) {
+                    cli_dbgmsg("msxml_parse_element: element tag does not match end tag %s != %s\n", element_name, node_name);
+                    return CL_EFORMAT;
+                }
+
+                /* advance to next element tag */
+                state = xmlTextReaderRead(reader);
+                check_state(state);
+
+                endtag = 1;
+                break;
+
+            default:
+                node_name = xmlTextReaderConstLocalName(reader);
+                node_value = xmlTextReaderConstValue(reader);
+
+                cli_dbgmsg("msxml_parse_element: unhandled xml secondary node %s [%d]: %s\n", node_name, node_type, node_value);
+
+                state = xmlTextReaderNext(reader);
+                check_state(state);
+                return (virus ? CL_VIRUS : CL_SUCCESS);
+            }
+        }
+
+        break;
+    case XML_READER_TYPE_PROCESSING_INSTRUCTION:
+        cli_msxmlmsg("msxml_parse_element: PROCESSING INSTRUCTION %s [%d]: %s\n", node_name, node_type, node_value);
+        break;
+    case XML_READER_TYPE_SIGNIFICANT_WHITESPACE:
+        cli_msxmlmsg("msxml_parse_element: SIGNIFICANT WHITESPACE %s [%d]: %s\n", node_name, node_type, node_value);
+        break;
+    case XML_READER_TYPE_END_ELEMENT:
+        cli_msxmlmsg("msxml_parse_element: END ELEMENT %s [%d]: %s\n", node_name, node_type, node_value);
+        return (virus ? CL_VIRUS : CL_SUCCESS);
+    default:
+        cli_dbgmsg("msxml_parse_element: unhandled xml primary node %s [%d]: %s\n", node_name, node_type, node_value);
+    }
+
+    return (virus ? CL_VIRUS : CL_SUCCESS);
+}
+
+/* reader intialization and closing handled by caller */
+int cli_msxml_parse_document(cli_ctx *ctx, xmlTextReaderPtr reader, const struct key_entry *keys, const size_t num_keys, int mode)
+{
+    struct msxml_ctx mxctx;
+    int state, virus = 0, ret = CL_SUCCESS;
+
+    mxctx.ctx = ctx;
+    mxctx.keys = keys;
+    mxctx.num_keys = num_keys;
+#if HAVE_JSON
+    mxctx.mode = mode;
+    if (mode) {
+        mxctx.root = ctx->wrkproperty;
+        /* JSON Sanity Check */
+        if (!mxctx.root)
+            mxctx.mode = 0;
+        mxctx.toval = 0;
+    }
+#endif
+
+    /* Error Handler */
+    xmlTextReaderSetErrorHandler(reader, NULL, NULL); /* xml default handler */
+    //xmlTextReaderSetErrorHandler(reader, msxml_error_handler, NULL);
+
+    /* Main Processing Loop */
+    while ((state = xmlTextReaderRead(reader)) == 1) {
+#if HAVE_JSON
+        if (mxctx.mode && (cli_json_timeout_cycle_check(mxctx.ctx, &(mxctx.toval)) != CL_SUCCESS))
+            return CL_ETIMEOUT;
+
+        ret = msxml_parse_element(&mxctx, reader, 0, mxctx.root);
+#else
+        ret = msxml_parse_element(&mxctx, reader, 0, NULL);
+#endif
+        if (ret == CL_SUCCESS);
+        else if (SCAN_ALL && ret == CL_VIRUS) {
+            /* non-allmatch simply propagates it down to return through ret */
+            virus = 1;
+        } else if (ret == CL_VIRUS || ret == CL_ETIMEOUT || ret == CL_BREAK) {
+            cli_dbgmsg("cli_msxml_parse_document: encountered halt event in parsing xml document\n");
+            break;
+        } else {
+            cli_warnmsg("cli_msxml_parse_document: encountered issue in parsing xml document\n");
+            break;
+        }
+    }
+
+    if (state == -1)
+        ret = CL_EPARSE;
+
+#if HAVE_JSON
+    /* Parse General Error Handler */
+    if (mxctx.mode) {
+        int tmp = CL_SUCCESS;
+
+        switch(ret) {
+        case CL_SUCCESS:
+        case CL_BREAK: /* OK */
+            break;
+        case CL_VIRUS:
+            tmp = cli_json_parse_error(mxctx.root, "MSXML_INTR_VIRUS");
+            break;
+        case CL_ETIMEOUT:
+            tmp = cli_json_parse_error(mxctx.root, "MSXML_INTR_TIMEOUT");
+            break;
+        case CL_EPARSE:
+            tmp = cli_json_parse_error(mxctx.root, "MSXML_ERROR_XMLPARSER");
+            break;
+        case CL_EMEM:
+            tmp = cli_json_parse_error(mxctx.root, "MSXML_ERROR_OUTOFMEM");
+            break;
+        case CL_EFORMAT:
+            tmp = cli_json_parse_error(mxctx.root, "MSXML_ERROR_MALFORMED");
+            break;
+        default:
+            tmp = cli_json_parse_error(mxctx.root, "MSXML_ERROR_OTHER");
+            break;
+        }
+
+        if (tmp)
+            return tmp;
+    }
+#endif
+
+    /* non-critical return supression */
+    if (ret == CL_ETIMEOUT || ret == CL_BREAK)
+        ret = CL_SUCCESS;
+
+    /* important but non-critical suppression */
+    if (ret == CL_EPARSE) {
+        cli_dbgmsg("cli_msxml_parse_document: suppressing parsing error to continue scan\n");
+        ret = CL_SUCCESS;
+    }
+
+    return (virus ? CL_VIRUS : ret);
+}
+
+#endif /* HAVE_LIBXML2 */
--- a/libclamav/msxml_parser.h
+++ b/libclamav/msxml_parser.h
@ -0,0 +1,80 @@
+/*
+ * Extract component parts of MS XML files (e.g. MS Office 2003 XML Documents)
+ * 
+ * Copyright (C) 2007-2013 Sourcefire, Inc.
+ * 
+ * Authors: Kevin Lin
+ * 
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ * 
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ * 
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __MSXML_PARSER_H
+#define __MSXML_PARSER_H
+
+#if HAVE_LIBXML2
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include "others.h"
+#include "json_api.h"
+
+#ifdef _WIN32
+#ifndef LIBXML_WRITER_ENABLED
+#define LIBXML_WRITER_ENABLED 1
+#endif
+#endif
+#include <libxml/xmlreader.h>
+
+
+#define MSXML_RECLEVEL_MAX 20
+#define MSXML_JSON_STRLEN_MAX 128
+
+struct key_entry {
+/* how */
+#define MSXML_IGNORE       0x00
+#define MSXML_IGNORE_ELEM  0x01
+#define MSXML_SCAN_B64     0x02
+/* where */
+#define MSXML_JSON_ROOT    0x04
+#define MSXML_JSON_WRKPTR  0x08
+
+#define MSXML_JSON_TRACK (MSXML_JSON_ROOT | MSXML_JSON_WRKPTR)
+/* what */
+#define MSXML_JSON_COUNT   0x10
+#define MSXML_JSON_VALUE   0x20
+#define MSXML_JSON_ATTRIB  0x40
+
+    const char *key;
+    const char *name;
+    int type;
+};
+
+struct msxml_ctx {
+    cli_ctx *ctx;
+    const struct key_entry *keys;
+    size_t num_keys;
+
+#if HAVE_JSON
+    json_object *root;
+    int mode, toval;
+#endif
+};
+
+int cli_msxml_parse_document(cli_ctx *ctx, xmlTextReaderPtr reader, const struct key_entry *keys, const size_t num_keys, int mode);
+
+#endif /* HAVE_LIBXML2 */
+
+#endif /* __MSXML_PARSER_H */
--- a/libclamav/ole2_extract.c
+++ b/libclamav/ole2_extract.c
@ -32,6 +32,7 @@
 #include <stdlib.h>
 #include <errno.h>
 #include <conv.h>
+#include <zlib.h>
 #ifdef	HAVE_UNISTD_H
 #include <unistd.h>
 #endif
@ -955,6 +956,168 @@ handler_enum(ole2_header_t * hdr, property_t * prop, const char *dir, cli_ctx *
    return CL_SUCCESS;
 }

+static int
+likely_mso_stream(int fd)
+{
+    off_t fsize;
+    unsigned char check[2];
+
+    fsize = lseek(fd, 0, SEEK_END);
+    if (fsize == -1) {
+        cli_dbgmsg("likely_mso_stream: call to lseek() failed\n");
+        return 0;
+    } else if (fsize < 6) {
+        return 0;
+    }
+
+    if (lseek(fd, 4, SEEK_SET) == -1) {
+        cli_dbgmsg("likely_mso_stream: call to lseek() failed\n");
+        return 0;
+    }
+
+    if (cli_readn(fd, check, 2) != 2) {
+        cli_dbgmsg("likely_mso_stream: reading from fd failed\n");
+        return 0;
+    }
+
+    if (check[0] == 0x78 && check[1] == 0x9C)
+        return 1;
+
+    return 0;
+}
+
+static int
+scan_mso_stream(int fd, cli_ctx *ctx)
+{
+    int zret, ofd, ret = CL_SUCCESS;
+    fmap_t *input;
+    off_t off_in = 0;
+    size_t count, outsize = 0;
+    z_stream zstrm;
+    char *tmpname;
+    uint32_t prefix;
+    unsigned char inbuf[FILEBUFF], outbuf[FILEBUFF];
+
+    /* fmap the input file for easier manipulation */
+    if (fd < 0) {
+        cli_dbgmsg("scan_mso_stream: Invalid file descriptor argument\n");
+        return CL_ENULLARG;
+    } else {
+        STATBUF statbuf;
+
+        if (FSTAT(fd, &statbuf) == -1) {
+            cli_dbgmsg("scan_mso_stream: Can't stat file descriptor\n");
+            return CL_ESTAT;
+        }
+
+        input = fmap(fd, 0, statbuf.st_size);
+        if (!input) {
+            cli_dbgmsg("scan_mso_stream: Failed to get fmap for input stream\n");
+            return CL_EMAP;
+        }
+    }
+
+    /* reserve tempfile for output and scanning */
+    if ((ret = cli_gentempfd(ctx->engine->tmpdir, &tmpname, &ofd)) != CL_SUCCESS) {
+        cli_errmsg("scan_mso_stream: Can't generate temporary file\n");
+        funmap(input);
+        return ret;
+    }
+
+    /* initialize zlib inflation stream */
+    memset(&zstrm, 0, sizeof(zstrm));
+    zstrm.zalloc = Z_NULL;
+    zstrm.zfree = Z_NULL;
+    zstrm.opaque = Z_NULL;
+    zstrm.next_in = inbuf;
+    zstrm.next_out = outbuf;
+    zstrm.avail_in = 0;
+    zstrm.avail_out = FILEBUFF;
+
+    zret = inflateInit(&zstrm);
+    if (zret != Z_OK) {
+        cli_dbgmsg("scan_mso_stream: Can't initialize zlib inflation stream\n");
+        ret = CL_EUNPACK;
+        goto mso_end;
+    }
+
+    /* extract 32-bit prefix */
+    if (fmap_readn(input, &prefix, off_in, sizeof(prefix)) != sizeof(prefix)) {
+        cli_dbgmsg("scan_mso_stream: Can't extract 4-byte prefix\n");
+        ret = CL_EREAD;
+        goto mso_end;
+    }
+    off_in += sizeof(uint32_t);
+    cli_dbgmsg("scan_mso_stream: stream prefix = %08x(%d)\n", prefix, prefix);
+
+    /* inflation loop */
+    do {
+        if (zstrm.avail_in == 0) {
+            zstrm.next_in = inbuf;
+            ret = fmap_readn(input, inbuf, off_in, FILEBUFF);
+            if (ret < 0) {
+                cli_errmsg("scan_mso_stream: Error reading MSO file\n");
+                ret = CL_EUNPACK;
+                goto mso_end;
+            }
+            if (!ret)
+                break;
+
+            zstrm.avail_in = ret;
+            off_in += ret;
+        }
+        zret = inflate(&zstrm, Z_SYNC_FLUSH);
+        count = FILEBUFF - zstrm.avail_out;
+        if (count) {
+            if (cli_checklimits("MSO", ctx, outsize + count, 0, 0) != CL_SUCCESS)
+                break;
+            if (cli_writen(ofd, outbuf, count) != count) {
+                cli_errmsg("scan_mso_stream: Can't write to file %s\n", tmpname);
+                ret = CL_EWRITE;
+                goto mso_end;
+            }
+            outsize += count;
+        }
+        zstrm.next_out = outbuf;
+        zstrm.avail_out = FILEBUFF;
+    } while(zret == Z_OK);
+
+    /* post inflation checks */
+    if (zret != Z_STREAM_END && zret != Z_OK) {
+        if (outsize == 0) {
+            cli_infomsg(ctx, "scan_mso_stream: Error decompressing MSO file. No data decompressed.\n");
+            ret = CL_EUNPACK;
+            goto mso_end;
+        }
+
+        cli_infomsg(ctx, "scan_mso_stream: Error decompressing MSO file. Scanning what was decompressed.\n");
+    }
+    cli_dbgmsg("scan_mso_stream: Decompressed to %s, size %d\n", tmpname, outsize);
+
+    if (outsize != prefix) {
+        cli_warnmsg("scan_mso_stream: declared prefix != inflated stream size, %llu != %llu\n",
+                    (long long unsigned)prefix, (long long unsigned)outsize);
+    } else {
+        cli_dbgmsg("scan_mso_stream: declared prefix == inflated stream size, %llu == %llu\n",
+                   (long long unsigned)prefix, (long long unsigned)outsize);
+    }
+
+    /* scanning inflated stream */
+    ret = cli_magic_scandesc(ofd, ctx);
+
+    /* clean-up */
+ mso_end:
+    zret = inflateEnd(&zstrm);
+    if (zret != Z_OK)
+        ret = CL_EUNPACK;
+    close(ofd);
+    if(ctx && !ctx->engine->keeptmp)
+        if (cli_unlink(tmpname))
+            ret = CL_EUNLINK;
+    free(tmpname);
+    funmap(input);
+    return ret;
+}

 static int
 handler_otf(ole2_header_t * hdr, property_t * prop, const char *dir, cli_ctx * ctx)
@ -962,7 +1125,7 @@ handler_otf(ole2_header_t * hdr, property_t * prop, const char *dir, cli_ctx * c
    char           *tempfile;
    unsigned char  *buff;
    int32_t         current_block, len, offset;
-    int             ofd, ret;
+    int             ofd, is_mso, ret;
    bitset_t       *blk_bitset;

    UNUSEDPARAM(dir);
@ -1061,6 +1224,7 @@ handler_otf(ole2_header_t * hdr, property_t * prop, const char *dir, cli_ctx * c
        }
    }

+    is_mso = likely_mso_stream(ofd);
    if (lseek(ofd, 0, SEEK_SET) == -1) {
        close(ofd);
        if (ctx && !(ctx->engine->keeptmp))
@ -1112,8 +1276,18 @@ handler_otf(ole2_header_t * hdr, property_t * prop, const char *dir, cli_ctx * c
    }
 #endif

-    /* Normal File Scan */
-    ret = cli_magic_scandesc(ofd, ctx);
+    if (is_mso < 0) {
+        ret = CL_ESEEK;
+    } else if (is_mso) {
+        /* MSO Stream Scan */
+        ret = scan_mso_stream(ofd, ctx);
+        /* CONSIDER: running cli_magic_scandesc in the chance of MSO fp? */
+        //if (ret != CL_SUCCESS || ret != CL_VIRUS)
+        //ret = cli_magic_scandesc(ofd, ctx);
+    } else {
+        /* Normal File Scan */
+        ret = cli_magic_scandesc(ofd, ctx);
+    }
    close(ofd);
    free(buff);
    cli_bitset_free(blk_bitset);
--- a/libclamav/ooxml.c
+++ b/libclamav/ooxml.c
@ -31,7 +31,7 @@
 #include "json.h"
 #endif
 #include "json_api.h"
-
+#include "msxml_parser.h"
 #include "ooxml.h"

 #if HAVE_LIBXML2
@ -43,366 +43,59 @@
 #include <libxml/xmlreader.h>
 #endif

-#define OOXML_DEBUG 0
+

 #if HAVE_LIBXML2 && HAVE_JSON

-#define OOXML_JSON_RECLEVEL 16
-#define OOXML_JSON_RECLEVEL_MAX 5
-#define OOXML_JSON_STRLEN_MAX 100
+static const struct key_entry ooxml_keys[] = {
+    { "coreproperties",     "CoreProperties",     MSXML_JSON_ROOT | MSXML_JSON_ATTRIB },
+    { "title",              "Title",              MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "subject",            "Subject",            MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "creator",            "Author",             MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "keywords",           "Keywords",           MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "comments",           "Comments",           MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "description",        "Description",        MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "lastmodifiedby",     "LastAuthor",         MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "revision",           "Revision",           MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "created",            "Created",            MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "modified",           "Modified",           MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "category",           "Category",           MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "contentstatus",      "ContentStatus",      MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },

-#define check_state(state)                                              \
-    do {                                                                \
-        if (state == -1) {                                              \
-            cli_warnmsg("check_state: CL_EPARSE @ ln%d\n", __LINE__);   \
-            return CL_EPARSE;                                           \
-        }                                                               \
-        else if (state == 0) {                                          \
-            cli_dbgmsg("check_state: CL_BREAK @ ln%d\n", __LINE__);     \
-            return CL_BREAK;                                            \
-        }                                                               \
-    } while(0)
+    { "properties",         "ExtendedProperties", MSXML_JSON_ROOT | MSXML_JSON_ATTRIB },
+    { "application",        "Application",        MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "appversion",         "AppVersion",         MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "characters",         "Characters",         MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "characterswithspaces", "CharactersWithSpaces", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "company",            "Company",            MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "digsig",             "DigSig",             MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "docsecurity",        "DocSecurity",        MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    //{ "headingpairs",       "HeadingPairs",       MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "hiddenslides",       "HiddenSlides",       MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "hlinks",             "HLinks",             MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "hyperlinkbase",      "HyperlinkBase",      MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "hyperlinkschanged",  "HyperlinksChanged",  MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "lines",              "Lines",              MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "linksuptodate",      "LinksUpToDate",      MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "manager",            "Manager",            MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "mmclips",            "MultimediaClips",    MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "notes",              "Notes",              MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "pages",              "Pages",              MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "paragraphs",         "Paragraphs",         MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "presentationformat", "PresentationFormat", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    //{ "properties",         "Properties",         MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "scalecrop",          "ScaleCrop",          MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "shareddoc",          "SharedDocs",         MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "slides",             "Slides",             MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "template",           "Template",           MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    //{ "titleofparts",       "TitleOfParts",       MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "totaltime",          "TotalTime",          MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
+    { "words",              "Words",              MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },

-static int ooxml_is_int(const char *value, size_t len, int32_t *val)
-{
-    long val2;
-    char *endptr = NULL;
-
-    val2 = strtol(value, &endptr, 10);
-    if (endptr != value+len) {
-        return 0;
-    }
-
-    *val = (int32_t)(val2 & 0x0000ffff);
-
-    return 1;
-}
-
-static int ooxml_add_parse_error(json_object *wrkptr, const xmlChar *errstr)
-{
-    json_object *perr;
-
-    if (!wrkptr)
-        return CL_ENULLARG;
-
-    perr = cli_jsonarray(wrkptr, "ParseErrors");
-    if (perr == NULL) {
-        return CL_EMEM;
-    }
-
-    return cli_jsonstr(perr, NULL, errstr);
-}
-
-static int ooxml_parse_value(json_object *wrkptr, const char *arrname, const xmlChar *node_value)
-{
-    json_object *newobj, *arrobj;
-    int val;
-
-    if (!wrkptr)
-        return CL_ENULLARG;
-
-    arrobj = cli_jsonarray(wrkptr, arrname);
-    if (arrobj == NULL) {
-        return CL_EMEM;
-    }
-
-    if (ooxml_is_int((const char *)node_value, xmlStrlen(node_value), &val)) {
-        newobj = json_object_new_int(val);
-    }
-    else if (!xmlStrcmp(node_value, (const xmlChar *)"true")) {
-        newobj = json_object_new_boolean(1);
-    }
-    else if (!xmlStrcmp(node_value, (const xmlChar *)"false")) {
-        newobj = json_object_new_boolean(0);
-    }
-    else {
-        newobj = json_object_new_string((const char *)node_value);
-    }
-
-    if (NULL == newobj) {
-        cli_errmsg("ooxml_parse_value: no memory for json value for [%s]\n", arrname);
-        return CL_EMEM;
-    }
-
-    json_object_array_add(arrobj, newobj);
-    return CL_SUCCESS;
-}
-
-static const char *ooxml_keys[] = {
-    "coreproperties",
-    "title",
-    "subject",
-    "creator",
-    "keywords",
-    "comments",
-    "description",
-    "lastmodifiedby",
-    "revision",
-    "created",
-    "modified",
-    "category",
-    "contentstatus",
-
-    "properties",
-    "application",
-    "appversion",
-    "characters",
-    "characterswithspaces",
-    "company",
-    "digsig",
-    "docsecurity",
-    //"headingpairs",
-    "hiddenslides",
-    "hlinks",
-    "hyperlinkbase",
-    "hyperlinkschanged",
-    "lines",
-    "linksuptodate",
-    "manager",
-    "mmclips",
-    "notes",
-    "pages",
-    "paragraphs",
-    "presentationformat",
-    "properties",
-    "scalecrop",
-    "shareddoc",
-    "slides",
-    "template",
-    //"titlesofparts",
-    "totaltime",
-    "words"
+    /* Should NOT Exist */
+    { "bindata",            "BinaryData",         MSXML_SCAN_B64 | MSXML_JSON_COUNT | MSXML_JSON_ROOT }
 };
-static const char *ooxml_json_keys[] = {
-    "CoreProperties",
-    "Title",
-    "Subject",
-    "Author",
-    "Keywords",
-    "Comments",
-    "Description",
-    "LastAuthor",
-    "Revision",
-    "Created",
-    "Modified",
-    "Category",
-    "ContentStatus",
-
-    "ExtendedProperties",
-    "Application",
-    "AppVersion",
-    "Characters",
-    "CharactersWithSpaces",
-    "Company",
-    "DigSig",
-    "DocSecurity",
-    //"HeadingPairs",
-    "HiddenSlides",
-    "HLinks",
-    "HyperlinkBase",
-    "HyperlinksChanged",
-    "Lines",
-    "LinksUpToDate",
-    "Manager",
-    "MultimediaClips",
-    "Notes",
-    "Pages",
-    "Paragraphs",
-    "PresentationFormat",
-    "Properties",
-    "ScaleCrop",
-    "SharedDoc",
-    "Slides",
-    "Template",
-    //"TitlesOfParts",
-    "TotalTime",
-    "Words"
-};
-static size_t num_ooxml_keys = 40; //42
-
-static const char *ooxml_check_key(const char* key, size_t keylen)
-{
-    unsigned i;
-
-    if (keylen > OOXML_JSON_STRLEN_MAX-1) {
-        cli_dbgmsg("ooxml_check_key: key name too long\n");
-        return NULL;
-    }
-
-    for (i = 0; i < num_ooxml_keys; ++i) {
-        //cli_dbgmsg("%d %d %s %s %s %s\n", keylen, strlen(ooxml_keys[i]), key, keycmp, ooxml_keys[i], ooxml_json_keys[i]);
-        if (keylen == strlen(ooxml_keys[i]) && !strncasecmp(key, ooxml_keys[i], keylen)) {
-            return ooxml_json_keys[i];
-        }
-    }
-
-    return NULL;
-}
-
-static int ooxml_parse_element(cli_ctx *ctx, xmlTextReaderPtr reader, json_object *wrkptr, int rlvl, json_object *root)
-{
-    const char *element_tag = NULL, *end_tag = NULL;
-    const xmlChar *node_name = NULL, *node_value = NULL;
-    json_object *thisjobj = NULL;
-    int node_type, ret = CL_SUCCESS, endtag = 0, toval = 0, state = 1;
-
-    cli_dbgmsg("in ooxml_parse_element @ layer %d\n", rlvl);
-
-    /* check recursion level */
-    if (rlvl >= OOXML_JSON_RECLEVEL_MAX) {
-        cli_dbgmsg("ooxml_parse_element: reached ooxml json recursion limit\n");
-        cli_jsonbool(root, "HitRecursiveLimit", 1);
-        /* skip it */
-        state = xmlTextReaderNext(reader);
-        check_state(state);
-        return CL_SUCCESS;
-    }
-
-    /* acquire element type */
-    node_type = xmlTextReaderNodeType(reader);
-    if (node_type == -1)
-        return CL_EPARSE;
-
-    if (node_type != XML_READER_TYPE_ELEMENT) {
-        cli_dbgmsg("ooxml_parse_element: first node typed %d, not %d\n", node_type, XML_READER_TYPE_ELEMENT);
-        return CL_EFORMAT; /* first type is not an element */
-    }
-
-    node_name = xmlTextReaderConstLocalName(reader);
-    if (!node_name) {
-        cli_dbgmsg("ooxml_parse_element: element tag node nameless\n");
-        return CL_EPARSE; /* no name, nameless */
-    }
-    element_tag = ooxml_check_key((const char *)node_name, xmlStrlen(node_name));
-    if (!element_tag) {
-        cli_dbgmsg("ooxml_parse_element: invalid element tag [%s]\n", node_name);
-        /* skip it */
-        state = xmlTextReaderNext(reader);
-        check_state(state);
-        return CL_SUCCESS;
-    }
-
-    /* generate json object */
-    thisjobj = cli_jsonobj(wrkptr, element_tag);
-    if (!thisjobj) {
-        return CL_EMEM;
-    }
-    cli_dbgmsg("ooxml_parse_element: generated json object [%s]\n", element_tag);
-
-    if (rlvl == 0)
-        root = thisjobj;
-
-    /* handle attributes */
-    state = xmlTextReaderHasAttributes(reader);
-    if (state == 1) {
-        json_object *attributes;
-
-        attributes = cli_jsonobj(thisjobj, "Attributes");
-        if (!attributes) {
-            return CL_EPARSE;
-        }
-        cli_dbgmsg("ooxml_parse_element: retrieved json object [Attributes]\n");
-
-        while (xmlTextReaderMoveToNextAttribute(reader) == 1) {
-            const xmlChar *name, *value;
-            name = xmlTextReaderConstLocalName(reader);
-            value = xmlTextReaderConstValue(reader);
-            if (name == NULL || value == NULL) continue;
-
-            cli_dbgmsg("%s: %s\n", name, value);
-
-            cli_jsonstr(attributes, name, (const char *)value);
-        }
-    }
-    else if (state == -1)
-        return CL_EPARSE;
-
-    state = xmlTextReaderIsEmptyElement(reader);
-    if (state == 1) {
-        state = xmlTextReaderNext(reader);
-        check_state(state);
-        return CL_SUCCESS;
-    }
-    else if (state == -1)
-        return CL_EPARSE;
-
-    /* advance to first content node */
-    state = xmlTextReaderRead(reader);
-    check_state(state);
-
-    /* parse until the end element tag */
-    while (!endtag) {
-        if (cli_json_timeout_cycle_check(ctx, &toval) != CL_SUCCESS) {
-            return CL_ETIMEOUT;
-        }
-
-        node_type = xmlTextReaderNodeType(reader);
-        if (node_type == -1)
-            return CL_EPARSE;
-
-        switch (node_type) {
-        case XML_READER_TYPE_ELEMENT:
-            ret = ooxml_parse_element(ctx, reader, thisjobj, rlvl+1, root);
-            if (ret != CL_SUCCESS) {
-                return ret;
-            }
-            break;
-
-        case XML_READER_TYPE_END_ELEMENT:
-            cli_dbgmsg("in ooxml_parse_element @ layer %d closed\n", rlvl);
-            node_name = xmlTextReaderConstLocalName(reader);
-            if (!node_name) {
-                cli_dbgmsg("ooxml_parse_element: element end tag node nameless\n");
-                return CL_EPARSE; /* no name, nameless */
-            }
-
-            end_tag = ooxml_check_key((const char *)node_name, xmlStrlen(node_name));
-            if (!end_tag) {
-                cli_dbgmsg("ooxml_parse_element: invalid element end tag [%s]\n", node_name);
-                return CL_EFORMAT; /* unrecognized element tag */
-            }
-            if (strncmp(element_tag, end_tag, strlen(element_tag))) {
-                cli_dbgmsg("ooxml_parse_element: element tag does not match end tag\n");
-                return CL_EFORMAT;
-            }
-
-            /* advance to next element tag */
-            state = xmlTextReaderRead(reader);
-            check_state(state);
-
-            endtag = 1;
-            break;
-
-        case XML_READER_TYPE_TEXT:
-            node_value = xmlTextReaderConstValue(reader);
-
-            ret = ooxml_parse_value(thisjobj, "Value", node_value);
-            if (ret != CL_SUCCESS)
-                return ret;
-
-            cli_dbgmsg("ooxml_parse_element: added json value [%s: %s]\n", element_tag, node_value);
-
-            /* advance to next element tag */
-            state = xmlTextReaderRead(reader);
-            check_state(state);
-
-            break;
-
-        default:
-#if OOXML_DEBUG
-            node_name = xmlTextReaderConstLocalName(reader);
-            node_value = xmlTextReaderConstValue(reader);
-
-            cli_dbgmsg("ooxml_parse_element: unhandled xml node %s [%d]: %s\n", node_name, node_type, node_value);
-#endif
-            state = xmlTextReaderNext(reader);
-            check_state(state);
-            return CL_SUCCESS;
-        }
-    }
-
-    return CL_SUCCESS;
-}
+static size_t num_ooxml_keys = sizeof(ooxml_keys) / sizeof(struct key_entry);

 static int ooxml_updatelimits(int fd, cli_ctx *ctx)
 {
@ -433,12 +126,7 @@ static int ooxml_parse_document(int fd, cli_ctx *ctx)
        return CL_SUCCESS; // internal error from libxml2
    }

-    /* move reader to first element */
-    if (xmlTextReaderRead(reader) != 1) {
-        return CL_SUCCESS; /* libxml2 failed */
-    }
-
-    ret = ooxml_parse_element(ctx, reader, ctx->wrkproperty, 0, NULL);
+    ret = cli_msxml_parse_document(ctx, reader, ooxml_keys, num_ooxml_keys, 1);

    if (ret != CL_SUCCESS && ret != CL_ETIMEOUT && ret != CL_BREAK)
        cli_warnmsg("ooxml_parse_document: encountered issue in parsing properties document\n");
@ -455,9 +143,9 @@ static int ooxml_core_cb(int fd, cli_ctx *ctx)
    cli_dbgmsg("in ooxml_core_cb\n");
    ret = ooxml_parse_document(fd, ctx);
    if (ret == CL_EPARSE)
-        ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_CORE_XMLPARSER");
+        cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_CORE_XMLPARSER");
    else if (ret == CL_EFORMAT)
-        ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_CORE_MALFORMED");
+        cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_CORE_MALFORMED");

    return ret;
 }
@ -469,9 +157,9 @@ static int ooxml_extn_cb(int fd, cli_ctx *ctx)
    cli_dbgmsg("in ooxml_extn_cb\n");
    ret = ooxml_parse_document(fd, ctx);
    if (ret == CL_EPARSE)
-        ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_EXTN_XMLPARSER");
+        cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_EXTN_XMLPARSER");
    else if (ret == CL_EFORMAT)
-        ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_EXTN_MALFORMED");
+        cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_EXTN_MALFORMED");

    return ret;
 }
@ -499,7 +187,7 @@ static int ooxml_content_cb(int fd, cli_ctx *ctx)
    reader = xmlReaderForFd(fd, "[Content_Types].xml", NULL, CLAMAV_MIN_XMLREADER_FLAGS);
    if (reader == NULL) {
        cli_dbgmsg("ooxml_content_cb: xmlReaderForFd error for ""[Content_Types].xml""\n");
-        ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_XML_READER_FD");
+        cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_XML_READER_FD");

        ctx->scansize = sav_scansize;
        ctx->scannedfiles = sav_scannedfiles;
@ -608,37 +296,37 @@ static int ooxml_content_cb(int fd, cli_ctx *ctx)
    if (core) {
        cli_jsonint(ctx->wrkproperty, "CorePropertiesFileCount", core);
        if (core > 1)
-            ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_MULTIPLE_CORE_PROPFILES");
+            cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_MULTIPLE_CORE_PROPFILES");
    }
    else if (!mcore)
        cli_dbgmsg("cli_process_ooxml: file does not contain core properties file\n");
    if (mcore) {
        cli_jsonint(ctx->wrkproperty, "CorePropertiesMissingFileCount", mcore);
-        ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_MISSING_CORE_PROPFILES");
+        cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_MISSING_CORE_PROPFILES");
    }

    if (extn) {
        cli_jsonint(ctx->wrkproperty, "ExtendedPropertiesFileCount", extn);
        if (extn > 1)
-            ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_MULTIPLE_EXTN_PROPFILES");
+            cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_MULTIPLE_EXTN_PROPFILES");
    }
    else if (!mextn)
        cli_dbgmsg("cli_process_ooxml: file does not contain extended properties file\n");
    if (mextn) {
        cli_jsonint(ctx->wrkproperty, "ExtendedPropertiesMissingFileCount", mextn);
-        ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_MISSING_EXTN_PROPFILES");
+        cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_MISSING_EXTN_PROPFILES");
    }

    if (cust) {
        cli_jsonint(ctx->wrkproperty, "CustomPropertiesFileCount", cust);
        if (cust > 1)
-            ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_MULTIPLE_CUSTOM_PROPFILES");
+            cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_MULTIPLE_CUSTOM_PROPFILES");
    }
    else if (!mcust)
        cli_dbgmsg("cli_process_ooxml: file does not contain custom properties file\n");
    if (mcust) {
        cli_jsonint(ctx->wrkproperty, "CustomPropertiesMissingFileCount", mcust);
-        ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_MISSING_CUST_PROPFILES");
+        cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_MISSING_CUST_PROPFILES");
    }

    if (dsig) {
@ -694,7 +382,7 @@ int cli_process_ooxml(cli_ctx *ctx)
    uint32_t loff = 0;
    int tmp = CL_SUCCESS;

-    cli_dbgmsg("in cli_processooxml\n");
+    cli_dbgmsg("in cli_process_ooxml\n");
    if (!ctx) {
        return CL_ENULLARG;
    }
@ -702,35 +390,35 @@ int cli_process_ooxml(cli_ctx *ctx)
    /* find "[Content Types].xml" */
    tmp = unzip_search_single(ctx, "[Content_Types].xml", 18, &loff);
    if (tmp == CL_ETIMEOUT) {
-        ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_TIMEOUT");
+        cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_TIMEOUT");
        return CL_ETIMEOUT;
    }
    else if (tmp != CL_VIRUS) {
        cli_dbgmsg("cli_process_ooxml: failed to find ""[Content_Types].xml""!\n");
-        ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_NO_CONTENT_TYPES");
+        cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_NO_CONTENT_TYPES");
        return CL_EFORMAT;
    }
    cli_dbgmsg("cli_process_ooxml: found ""[Content_Types].xml"" @ %x\n", loff);

    tmp = unzip_single_internal(ctx, loff, ooxml_content_cb);
    if (tmp == CL_ETIMEOUT)
-        ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_TIMEOUT");
+        cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_TIMEOUT");
    else if (tmp == CL_EMEM)
-        ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_OUTOFMEM");
+        cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_OUTOFMEM");
    else if (tmp == CL_EMAXSIZE)
-        ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_EMAXSIZE");
+        cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_EMAXSIZE");
    else if (tmp == CL_EMAXFILES)
-        ooxml_add_parse_error(ctx->wrkproperty, "OOXML_ERROR_EMAXFILES");
+        cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_EMAXFILES");

    return tmp;
 #else
    UNUSEDPARAM(ctx);
-    cli_dbgmsg("in cli_processooxml\n");
+    cli_dbgmsg("in cli_process_ooxml\n");
 #if !HAVE_LIBXML2
-    cli_dbgmsg("cli_process_ooxml: libxml2 needs to enabled!");
+    cli_dbgmsg("cli_process_ooxml: libxml2 needs to enabled!\n");
 #endif
 #if !HAVE_JSON
-    cli_dbgmsg("cli_process_ooxml: libjson needs to enabled!");
+    cli_dbgmsg("cli_process_ooxml: libjson needs to enabled!\n");
 #endif
    return CL_SUCCESS;
 #endif
--- a/libclamav/others.c
+++ b/libclamav/others.c
@ -770,7 +770,6 @@ struct cl_settings *cl_engine_settings_copy(const struct cl_engine *engine)
    settings->cb_hash = engine->cb_hash;
    settings->cb_meta = engine->cb_meta;
    settings->cb_file_props = engine->cb_file_props;
-    settings->cb_file_props_data = engine->cb_file_props_data;
    settings->engine_options = engine->engine_options;

    settings->cb_stats_add_sample = engine->cb_stats_add_sample;
@ -843,7 +842,6 @@ int cl_engine_settings_apply(struct cl_engine *engine, const struct cl_settings
    engine->cb_hash = settings->cb_hash;
    engine->cb_meta = settings->cb_meta;
    engine->cb_file_props = settings->cb_file_props;
-    engine->cb_file_props_data = settings->cb_file_props_data;

    engine->cb_stats_add_sample = settings->cb_stats_add_sample;
    engine->cb_stats_remove_sample = settings->cb_stats_remove_sample;
@ -1363,8 +1361,7 @@ void cl_engine_set_clcb_meta(struct cl_engine *engine, clcb_meta callback)
    engine->cb_meta = callback;
 }

- void cl_engine_set_clcb_file_props(struct cl_engine *engine, clcb_file_props callback, void * cbdata)
+void cl_engine_set_clcb_file_props(struct cl_engine *engine, clcb_file_props callback)
 {
    engine->cb_file_props = callback;
-    engine->cb_file_props_data = cbdata;
 }
--- a/libclamav/others.h
+++ b/libclamav/others.h
@ -67,7 +67,7 @@
 * in re-enabling affected modules.
 */

-#define CL_FLEVEL 80
+#define CL_FLEVEL 81
 #define CL_FLEVEL_DCONF	CL_FLEVEL
 #define CL_FLEVEL_SIGTOOL CL_FLEVEL

@ -315,7 +315,6 @@ struct cl_engine {
    clcb_hash cb_hash;
    clcb_meta cb_meta;
    clcb_file_props cb_file_props;
-    void *cb_file_props_data;

    /* Used for bytecode */
    struct cli_all_bc bcs;
@ -391,7 +390,6 @@ struct cl_settings {
    clcb_hash cb_hash;
    clcb_meta cb_meta;
    clcb_file_props cb_file_props;
-    void *cb_file_props_data;

    /* Engine max settings */
    uint64_t maxembeddedpe;  /* max size to scan MSEXE for PE */
--- a/libclamav/pdf.c
+++ b/libclamav/pdf.c
@ -59,6 +59,7 @@
 #include "arc4.h"
 #include "rijndael.h"
 #include "textnorm.h"
+#include "conv.h"
 #include "json_api.h"

 #ifdef	CL_DEBUG
@ -746,7 +747,7 @@ static void aes_decrypt(const unsigned char *in, off_t *length, unsigned char *q
 }


-static char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, off_t *length, enum enc_method enc_method)
+char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, off_t *length, enum enc_method enc_method)
 {
    unsigned char *key, *q, result[16];
    unsigned n;
@ -846,7 +847,7 @@ static char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, of
    return (char *)q;
 }

-static enum enc_method get_enc_method(struct pdf_struct *pdf, struct pdf_obj *obj)
+enum enc_method get_enc_method(struct pdf_struct *pdf, struct pdf_obj *obj)
 {
    if (obj->flags & (1 << OBJ_EMBEDDED_FILE))
        return pdf->enc_method_embeddedfile;
@ -2244,7 +2245,7 @@ static enum enc_method parse_enc_method(const char *dict, unsigned len, const ch
    return ret;
 }

-static void pdf_handle_enc(struct pdf_struct *pdf)
+void pdf_handle_enc(struct pdf_struct *pdf)
 {
    struct pdf_obj *obj;
    uint32_t len, n, R, P, length, EM = 1, i, oulen;
@ -3214,8 +3215,12 @@ static void Author_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfnam
    if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
        return;

-    if (!(pdf->stats.author))
-        pdf->stats.author = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Author", NULL);
+    if (!(pdf->stats.author)) {
+        pdf->stats.author = cli_calloc(1, sizeof(struct pdf_stats_entry));
+        if (!(pdf->stats.author))
+            return;
+        pdf->stats.author->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Author", NULL, &(pdf->stats.author->meta));
+    }
 }
 #endif

@ -3230,8 +3235,12 @@ static void Creator_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfna
    if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
        return;

-    if (!(pdf->stats.creator))
-        pdf->stats.creator = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Creator", NULL);
+    if (!(pdf->stats.creator)) {
+        pdf->stats.creator = cli_calloc(1, sizeof(struct pdf_stats_entry));
+        if (!(pdf->stats.creator))
+            return;
+        pdf->stats.creator->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Creator", NULL, &(pdf->stats.creator->meta));
+    }
 }
 #endif

@ -3246,8 +3255,12 @@ static void ModificationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, str
    if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
        return;

-    if (!(pdf->stats.modificationdate))
-        pdf->stats.modificationdate = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/ModDate", NULL);
+    if (!(pdf->stats.modificationdate)) {
+        pdf->stats.modificationdate = cli_calloc(1, sizeof(struct pdf_stats_entry));
+        if (!(pdf->stats.modificationdate))
+            return;
+        pdf->stats.modificationdate->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/ModDate", NULL, &(pdf->stats.modificationdate->meta));
+    }
 }
 #endif

@ -3262,8 +3275,12 @@ static void CreationDate_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct
    if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
        return;

-    if (!(pdf->stats.creationdate))
-        pdf->stats.creationdate = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/CreationDate", NULL);
+    if (!(pdf->stats.creationdate)) {
+        pdf->stats.creationdate = cli_calloc(1, sizeof(struct pdf_stats_entry));
+        if (!(pdf->stats.creationdate))
+            return;
+        pdf->stats.creationdate->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/CreationDate", NULL, &(pdf->stats.creationdate->meta));
+    }
 }
 #endif

@ -3278,8 +3295,12 @@ static void Producer_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfn
    if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
        return;

-    if (!(pdf->stats.producer))
-        pdf->stats.producer = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Producer", NULL);
+    if (!(pdf->stats.producer)) {
+        pdf->stats.producer = cli_calloc(1, sizeof(struct pdf_stats_entry));
+        if (!(pdf->stats.producer))
+            return;
+        pdf->stats.producer->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Producer", NULL, &(pdf->stats.producer->meta));
+    }
 }
 #endif

@ -3294,8 +3315,12 @@ static void Title_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfname
    if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
        return;

-    if (!(pdf->stats.title))
-        pdf->stats.title = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Title", NULL);
+    if (!(pdf->stats.title)) {
+        pdf->stats.title = cli_calloc(1, sizeof(struct pdf_stats_entry));
+        if (!(pdf->stats.title))
+            return;
+        pdf->stats.title->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Title", NULL, &(pdf->stats.title->meta));
+    }
 }
 #endif

@ -3310,8 +3335,12 @@ static void Keywords_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfn
    if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
        return;

-    if (!(pdf->stats.keywords))
-        pdf->stats.keywords = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Keywords", NULL);
+    if (!(pdf->stats.keywords)) {
+        pdf->stats.keywords = cli_calloc(1, sizeof(struct pdf_stats_entry));
+        if (!(pdf->stats.keywords))
+            return;
+        pdf->stats.keywords->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Keywords", NULL, &(pdf->stats.keywords->meta));
+    }
 }
 #endif

@ -3326,8 +3355,12 @@ static void Subject_cb(struct pdf_struct *pdf, struct pdf_obj *obj, struct pdfna
    if (!(pdf->ctx->options & CL_SCAN_FILE_PROPERTIES))
        return;

-    if (!(pdf->stats.subject))
-        pdf->stats.subject = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Subject", NULL);
+    if (!(pdf->stats.subject)) {
+        pdf->stats.subject = cli_calloc(1, sizeof(struct pdf_stats_entry));
+        if (!(pdf->stats.subject))
+            return;
+        pdf->stats.subject->data = pdf_parse_string(pdf, obj, obj->start + pdf->map, obj_size(pdf, obj, 1), "/Subject", NULL, &(pdf->stats.subject->meta));
+    }
 }
 #endif

@ -3511,22 +3544,182 @@ static void pdf_export_json(struct pdf_struct *pdf)
        goto cleanup;
    }

-    if (pdf->stats.author)
-        cli_jsonstr(pdfobj, "Author", pdf->stats.author);
-    if (pdf->stats.creator)
-        cli_jsonstr(pdfobj, "Creator", pdf->stats.creator);
-    if (pdf->stats.producer)
-        cli_jsonstr(pdfobj, "Producer", pdf->stats.producer);
-    if (pdf->stats.modificationdate)
-        cli_jsonstr(pdfobj, "ModificationDate", pdf->stats.modificationdate);
-    if (pdf->stats.creationdate)
-        cli_jsonstr(pdfobj, "CreationDate", pdf->stats.creationdate);
-    if (pdf->stats.title)
-        cli_jsonstr(pdfobj, "Title", pdf->stats.title);
-    if (pdf->stats.subject)
-        cli_jsonstr(pdfobj, "Subject", pdf->stats.subject);
-    if (pdf->stats.keywords)
-        cli_jsonstr(pdfobj, "Keywords", pdf->stats.keywords);
+    if (pdf->stats.author) {
+        if (!pdf->stats.author->meta.success) {
+            char *out = pdf_finalize_string(pdf, pdf->stats.author->meta.obj, pdf->stats.author->data, pdf->stats.author->meta.length);
+            if (out) {
+                free(pdf->stats.author->data);
+                pdf->stats.author->data = out;
+                pdf->stats.author->meta.length = strlen(out);
+                pdf->stats.author->meta.success = 1;
+            }
+        }
+
+        if (pdf->stats.author->meta.success && cli_isutf8(pdf->stats.author->data, pdf->stats.author->meta.length)) {
+            cli_jsonstr(pdfobj, "Author", pdf->stats.author->data);
+        } else if (pdf->stats.author->data && pdf->stats.author->meta.length) {
+            char *b64 = cl_base64_encode(pdf->stats.author->data, pdf->stats.author->meta.length);
+            cli_jsonstr(pdfobj, "Author", b64);
+            cli_jsonbool(pdfobj, "Author_base64", 1);
+            free(b64);
+        } else {
+            cli_jsonstr(pdfobj, "Author", "");
+        }
+    }
+    if (pdf->stats.creator) {
+        if (!pdf->stats.creator->meta.success) {
+            char *out = pdf_finalize_string(pdf, pdf->stats.creator->meta.obj, pdf->stats.creator->data, pdf->stats.creator->meta.length);
+            if (out) {
+                free(pdf->stats.creator->data);
+                pdf->stats.creator->data = out;
+                pdf->stats.creator->meta.length = strlen(out);
+                pdf->stats.creator->meta.success = 1;
+            }
+        }
+
+        if (pdf->stats.creator->meta.success && cli_isutf8(pdf->stats.creator->data, pdf->stats.creator->meta.length)) {
+            cli_jsonstr(pdfobj, "Creator", pdf->stats.creator->data);
+        } else if (pdf->stats.creator->data && pdf->stats.creator->meta.length) {
+            char *b64 = cl_base64_encode(pdf->stats.creator->data, pdf->stats.creator->meta.length);
+            cli_jsonstr(pdfobj, "Creator", b64);
+            cli_jsonbool(pdfobj, "Creator_base64", 1);
+            free(b64);
+        } else {
+            cli_jsonstr(pdfobj, "Creator", "");
+        }
+    }
+    if (pdf->stats.producer) {
+        if (!pdf->stats.producer->meta.success) {
+            char *out = pdf_finalize_string(pdf, pdf->stats.producer->meta.obj, pdf->stats.producer->data, pdf->stats.producer->meta.length);
+            if (out) {
+                free(pdf->stats.producer->data);
+                pdf->stats.producer->data = out;
+                pdf->stats.producer->meta.length = strlen(out);
+                pdf->stats.producer->meta.success = 1;
+            }
+        }
+
+        if (pdf->stats.producer->meta.success && cli_isutf8(pdf->stats.producer->data, pdf->stats.producer->meta.length)) {
+            cli_jsonstr(pdfobj, "Producer", pdf->stats.producer->data);
+        } else if (pdf->stats.producer->data && pdf->stats.producer->meta.length) {
+            char *b64 = cl_base64_encode(pdf->stats.producer->data, pdf->stats.producer->meta.length);
+            cli_jsonstr(pdfobj, "Producer", b64);
+            cli_jsonbool(pdfobj, "Producer_base64", 1);
+            free(b64);
+        } else {
+            cli_jsonstr(pdfobj, "Producer", "");
+        }
+    }
+    if (pdf->stats.modificationdate) {
+        if (!pdf->stats.modificationdate->meta.success) {
+            char *out = pdf_finalize_string(pdf, pdf->stats.modificationdate->meta.obj, pdf->stats.modificationdate->data, pdf->stats.modificationdate->meta.length);
+            if (out) {
+                free(pdf->stats.modificationdate->data);
+                pdf->stats.modificationdate->data = out;
+                pdf->stats.modificationdate->meta.length = strlen(out);
+                pdf->stats.modificationdate->meta.success = 1;
+            }
+        }
+
+        if (pdf->stats.modificationdate->meta.success && cli_isutf8(pdf->stats.modificationdate->data, pdf->stats.modificationdate->meta.length)) {
+            cli_jsonstr(pdfobj, "ModificationDate", pdf->stats.modificationdate->data);
+        } else if (pdf->stats.modificationdate->data && pdf->stats.modificationdate->meta.length) {
+            char *b64 = cl_base64_encode(pdf->stats.modificationdate->data, pdf->stats.modificationdate->meta.length);
+            cli_jsonstr(pdfobj, "ModificationDate", b64);
+            cli_jsonbool(pdfobj, "ModificationDate_base64", 1);
+            free(b64);
+        } else {
+            cli_jsonstr(pdfobj, "ModificationDate", "");
+        }
+    }
+    if (pdf->stats.creationdate) {
+        if (!pdf->stats.creationdate->meta.success) {
+            char *out = pdf_finalize_string(pdf, pdf->stats.creationdate->meta.obj, pdf->stats.creationdate->data, pdf->stats.creationdate->meta.length);
+            if (out) {
+                free(pdf->stats.creationdate->data);
+                pdf->stats.creationdate->data = out;
+                pdf->stats.creationdate->meta.length = strlen(out);
+                pdf->stats.creationdate->meta.success = 1;
+            }
+        }
+
+        if (pdf->stats.creationdate->meta.success && cli_isutf8(pdf->stats.creationdate->data, pdf->stats.creationdate->meta.length)) {
+            cli_jsonstr(pdfobj, "CreationDate", pdf->stats.creationdate->data);
+        } else if (pdf->stats.creationdate->data && pdf->stats.creationdate->meta.length) {
+            char *b64 = cl_base64_encode(pdf->stats.creationdate->data, pdf->stats.creationdate->meta.length);
+            cli_jsonstr(pdfobj, "CreationDate", b64);
+            cli_jsonbool(pdfobj, "CreationDate_base64", 1);
+            free(b64);
+        } else {
+            cli_jsonstr(pdfobj, "CreationDate", "");
+        }
+    }
+    if (pdf->stats.title) {
+        if (!pdf->stats.title->meta.success) {
+            char *out = pdf_finalize_string(pdf, pdf->stats.title->meta.obj, pdf->stats.title->data, pdf->stats.title->meta.length);
+            if (out) {
+                free(pdf->stats.title->data);
+                pdf->stats.title->data = out;
+                pdf->stats.title->meta.length = strlen(out);
+                pdf->stats.title->meta.success = 1;
+            }
+        }
+
+        if (pdf->stats.title->meta.success && cli_isutf8(pdf->stats.title->data, pdf->stats.title->meta.length)) {
+            cli_jsonstr(pdfobj, "Title", pdf->stats.title->data);
+        } else if (pdf->stats.title->data && pdf->stats.title->meta.length) {
+            char *b64 = cl_base64_encode(pdf->stats.title->data, pdf->stats.title->meta.length);
+            cli_jsonstr(pdfobj, "Title", b64);
+            cli_jsonbool(pdfobj, "Title_base64", 1);
+            free(b64);
+        } else {
+            cli_jsonstr(pdfobj, "Title", "");
+        }
+    }
+    if (pdf->stats.subject) {
+        if (!pdf->stats.subject->meta.success) {
+            char *out = pdf_finalize_string(pdf, pdf->stats.subject->meta.obj, pdf->stats.subject->data, pdf->stats.subject->meta.length);
+            if (out) {
+                free(pdf->stats.subject->data);
+                pdf->stats.subject->data = out;
+                pdf->stats.subject->meta.length = strlen(out);
+                pdf->stats.subject->meta.success = 1;
+            }
+        }
+
+        if (pdf->stats.subject->meta.success && cli_isutf8(pdf->stats.subject->data, pdf->stats.subject->meta.length)) {
+            cli_jsonstr(pdfobj, "Subject", pdf->stats.subject->data);
+        } else if (pdf->stats.subject->data && pdf->stats.subject->meta.length) {
+            char *b64 = cl_base64_encode(pdf->stats.subject->data, pdf->stats.subject->meta.length);
+            cli_jsonstr(pdfobj, "Subject", b64);
+            cli_jsonbool(pdfobj, "Subject_base64", 1);
+            free(b64);
+        } else {
+            cli_jsonstr(pdfobj, "Subject", "");
+        }
+    }
+    if (pdf->stats.keywords) {
+        if (!pdf->stats.keywords->meta.success) {
+            char *out = pdf_finalize_string(pdf, pdf->stats.keywords->meta.obj, pdf->stats.keywords->data, pdf->stats.keywords->meta.length);
+            if (out) {
+                free(pdf->stats.keywords->data);
+                pdf->stats.keywords->data = out;
+                pdf->stats.keywords->meta.length = strlen(out);
+                pdf->stats.keywords->meta.success = 1;
+            }
+        }
+
+        if (pdf->stats.keywords->meta.success && cli_isutf8(pdf->stats.keywords->data, pdf->stats.keywords->meta.length)) {
+            cli_jsonstr(pdfobj, "Keywords", pdf->stats.keywords->data);
+        } else if (pdf->stats.keywords->data && pdf->stats.keywords->meta.length) {
+            char *b64 = cl_base64_encode(pdf->stats.keywords->data, pdf->stats.keywords->meta.length);
+            cli_jsonstr(pdfobj, "Keywords", b64);
+            cli_jsonbool(pdfobj, "Keywords_base64", 1);
+            free(b64);
+        } else {
+            cli_jsonstr(pdfobj, "Keywords", "");
+        }
+    }
    if (pdf->stats.ninvalidobjs)
        cli_jsonint(pdfobj, "InvalidObjectCount", pdf->stats.ninvalidobjs);
    if (pdf->stats.njs)
@ -3589,6 +3782,8 @@ static void pdf_export_json(struct pdf_struct *pdf)
        cli_jsonbool(pdfobj, "Encrypted", 1);
        if (pdf->flags & (1 << DECRYPTABLE_PDF))
            cli_jsonbool(pdfobj, "Decryptable", 1);
+        else
+            cli_jsonbool(pdfobj, "Decryptable", 0);
    }

    for (i=0; i < pdf->nobjs; i++) {
@ -3605,41 +3800,57 @@ static void pdf_export_json(struct pdf_struct *pdf)

 cleanup:
    if ((pdf->stats.author)) {
+        if (pdf->stats.author->data)
+            free(pdf->stats.author->data);
        free(pdf->stats.author);
        pdf->stats.author = NULL;
    }

    if (pdf->stats.creator) {
+        if (pdf->stats.creator->data)
+            free(pdf->stats.creator->data);
        free(pdf->stats.creator);
        pdf->stats.creator = NULL;
    }

    if (pdf->stats.producer) {
+        if (pdf->stats.producer->data)
+            free(pdf->stats.producer->data);
        free(pdf->stats.producer);
        pdf->stats.producer = NULL;
    }

    if (pdf->stats.modificationdate) {
+        if (pdf->stats.modificationdate->data)
+            free(pdf->stats.modificationdate->data);
        free(pdf->stats.modificationdate);
        pdf->stats.modificationdate = NULL;
    }

    if (pdf->stats.creationdate) {
+        if (pdf->stats.creationdate->data)
+            free(pdf->stats.creationdate->data);
        free(pdf->stats.creationdate);
        pdf->stats.creationdate = NULL;
    }

    if (pdf->stats.title) {
+        if (pdf->stats.title->data)
+            free(pdf->stats.title->data);
        free(pdf->stats.title);
        pdf->stats.title = NULL;
    }

    if (pdf->stats.subject) {
+        if (pdf->stats.subject->data)
+            free(pdf->stats.subject->data);
        free(pdf->stats.subject);
        pdf->stats.subject = NULL;
    }

    if (pdf->stats.keywords) {
+        if (pdf->stats.keywords->data)
+            free(pdf->stats.keywords->data);
        free(pdf->stats.keywords);
        pdf->stats.keywords = NULL;
    }
--- a/libclamav/pdf.h
+++ b/libclamav/pdf.h
@ -62,6 +62,17 @@ struct pdf_dict {
    struct pdf_dict_node *tail;
 };

+struct pdf_stats_entry {
+    char *data;
+
+    /* populated by pdf_parse_string */
+    struct pdf_stats_metadata {
+        int length;
+        struct pdf_obj *obj;
+        int success; /* if finalize succeeds */
+    } meta;
+};
+
 struct pdf_stats {
    int32_t ninvalidobjs;     /* Number of invalid objects */
    int32_t njs;              /* Number of javascript objects */
@ -88,14 +99,14 @@ struct pdf_stats {
    int32_t nrichmedia;       /* Number of RichMedia objects */
    int32_t nacroform;        /* Number of AcroForm objects */
    int32_t nxfa;             /* Number of XFA objects */
-    char *author;             /* Author of the PDF */
-    char *creator;            /* Application used to create the PDF */
-    char *producer;           /* Application used to produce the PDF */
-    char *creationdate;       /* Date the PDF was created */
-    char *modificationdate;   /* Date the PDF was modified */
-    char *title;              /* Title of the PDF */
-    char *subject;            /* Subject of the PDF */
-    char *keywords;           /* Keywords of the PDF */
+    struct pdf_stats_entry *author;             /* Author of the PDF */
+    struct pdf_stats_entry *creator;            /* Application used to create the PDF */
+    struct pdf_stats_entry *producer;           /* Application used to produce the PDF */
+    struct pdf_stats_entry *creationdate;       /* Date the PDF was created */
+    struct pdf_stats_entry *modificationdate;   /* Date the PDF was modified */
+    struct pdf_stats_entry *title;              /* Title of the PDF */
+    struct pdf_stats_entry *subject;            /* Subject of the PDF */
+    struct pdf_stats_entry *keywords;           /* Keywords of the PDF */
 };


@ -144,7 +155,12 @@ int pdf_extract_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t flags)
 int pdf_findobj(struct pdf_struct *pdf);
 struct pdf_obj *find_obj(struct pdf_struct *pdf, struct pdf_obj *obj, uint32_t objid);

-char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar);
+void pdf_handle_enc(struct pdf_struct *pdf);
+char *decrypt_any(struct pdf_struct *pdf, uint32_t id, const char *in, off_t *length, enum enc_method enc_method);
+enum enc_method get_enc_method(struct pdf_struct *pdf, struct pdf_obj *obj);
+
+char *pdf_finalize_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *in, size_t len);
+char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar, struct pdf_stats_metadata *stats);
 struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsz, char *begin, char **endchar);
 struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, size_t objsz, char *begin, char **endchar);
 int is_object_reference(char *begin, char **endchar, uint32_t *id);
--- a/libclamav/pdfng.c
+++ b/libclamav/pdfng.c
@ -68,82 +68,33 @@
 #include "rijndael.h"
 #include "textnorm.h"
 #include "json_api.h"
+#include "conv.h"

 char *pdf_convert_utf(char *begin, size_t sz);

 char *pdf_convert_utf(char *begin, size_t sz)
 {
    char *res=NULL;
+    char *buf, *outbuf;
 #if HAVE_ICONV
-    char *buf, *outbuf, *p1, *p2;
-    size_t sz2, inlen, outlen, i;
+    char *p1, *p2;
+    size_t inlen, outlen, i;
    char *encodings[] = {
        "UTF-16",
        NULL
    };
    iconv_t cd;
+#endif

-    buf = cli_calloc(1, sz);
+    buf = cli_calloc(1, sz+1);
    if (!(buf))
        return NULL;
+    memcpy(buf, begin, sz);

-    /* convert PDF specific escape sequences, like octal sequences */
-    sz2 = 0;
-    for (i = 0; i < sz; ++i) {
-        if ((i+1 < sz) && begin[i] == '\\') {
-            if ((i+3 < sz) &&
-                (isdigit(begin[i+1]) && isdigit(begin[i+2]) && isdigit(begin[i+3]))) {
-                /* octal sequence */
-                char octal[4], *check;
-                unsigned long value;
-
-                memcpy(octal, &begin[i+1], 3);
-                octal[3] = '\0';
-
-                value = (char)strtoul(octal, &check, 8);
-                /* check if all characters were converted */
-                if (check == &octal[3])
-                    buf[sz2++] = value;
-                i += 3;
-            } else {
-                /* other sequences */
-                switch(begin[i+1]) {
-                case 'n':
-                    buf[sz2++] = 0x0a;
-                    break;
-                case 'r':
-                    buf[sz2++] = 0x0d;
-                    break;
-                case 't':
-                    buf[sz2++] = 0x09;
-                    break;
-                case 'b':
-                    buf[sz2++] = 0x08;
-                    break;
-                case 'f':
-                    buf[sz2++] = 0x0c;
-                    break;
-                case '(':
-                    buf[sz2++] = 0x28;
-                    break;
-                case ')':
-                    buf[sz2++] = 0x29;
-                    break;
-                case '\\':
-                    buf[sz2++] = 0x5c;
-                    break;
-                default:
-                    /* IGNORE THE REVERSE SOLIDUS - PDF3000-2008 */
-                    break;
-                }
-            }
-        } else
-            buf[sz2++] = begin[i]; 
-    }
-    //memcpy(buf, begin, sz);
+#if HAVE_ICONV
    p1 = buf;

-    p2 = outbuf = cli_calloc(1, sz2+1);
+    p2 = outbuf = cli_calloc(1, sz+1);
    if (!(outbuf)) {
        free(buf);
        return NULL;
@ -152,7 +103,7 @@ char *pdf_convert_utf(char *begin, size_t sz)
    for (i=0; encodings[i] != NULL; i++) {
        p1 = buf;
        p2 = outbuf;
-        inlen = outlen = sz2;
+        inlen = outlen = sz;

        cd = iconv_open("UTF-8", encodings[i]);
        if (cd == (iconv_t)(-1)) {
@ -162,32 +113,31 @@ char *pdf_convert_utf(char *begin, size_t sz)

        iconv(cd, (char **)(&p1), &inlen, &p2, &outlen);

-        if (outlen == sz2) {
+        if (outlen == sz) {
            /* Decoding unsuccessful right from the start */
            iconv_close(cd);
            continue;
        }

-        outbuf[sz2 - outlen] = '\0';
+        outbuf[sz - outlen] = '\0';

        res = strdup(outbuf);
        iconv_close(cd);
        break;
    }
+#else
+    outbuf = cli_utf16_to_utf8(buf, sz, UTF16_BOM);
+    if (!outbuf) {
+        free(buf);
+        return NULL;
+    }

+    res = strdup(outbuf);
+#endif
    free(buf);
    free(outbuf);

    return res;
-#else
-    res = cli_calloc(sz+1, 1);
-    if ((res)) {
-        memcpy(res, begin, sz);
-        res[sz] = '\0';
-    }
-
-    return res;
-#endif
 }

 int is_object_reference(char *begin, char **endchar, uint32_t *id)
@ -274,13 +224,154 @@ int is_object_reference(char *begin, char **endchar, uint32_t *id)
    return 0;
 }

-char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar)
+static char *pdf_decrypt_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *in, off_t *length)
+{
+    enum enc_method enc;
+
+    /* handled only once in cli_pdf() */
+    //pdf_handle_enc(pdf);
+    if (pdf->flags & (1 << DECRYPTABLE_PDF)) {
+        enc = get_enc_method(pdf, obj);
+        return decrypt_any(pdf, obj->id, in, length, enc);
+    }
+    return NULL;
+}
+
+char *pdf_finalize_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *in, size_t len)
+{
+    char *wrkstr, *output = NULL;
+    size_t wrklen = len, outlen;
+    unsigned int i, likelyutf = 0;
+
+    if (!in)
+        return NULL;
+
+    /* get a working copy */
+    wrkstr = cli_calloc(len+1, sizeof(char));
+    if (!wrkstr)
+        return NULL;
+    memcpy(wrkstr, in, len);
+
+    //cli_errmsg("pdf_final: start(%d):   %s\n", wrklen, wrkstr);
+
+    /* convert PDF specific escape sequences, like octal sequences */
+    /* TODO: replace the escape sequences directly in the wrkstr   */
+    if (strchr(wrkstr, '\\')) {
+        output = cli_calloc(wrklen+1, sizeof(char));
+        if (!output)
+            return NULL;
+
+        outlen = 0;
+        for (i = 0; i < wrklen; ++i) {
+            if ((i+1 < wrklen) && wrkstr[i] == '\\') {
+                if ((i+3 < wrklen) &&
+                    (isdigit(wrkstr[i+1]) && isdigit(wrkstr[i+2]) && isdigit(wrkstr[i+3]))) {
+                    /* octal sequence */
+                    char octal[4], *check;
+                    unsigned long value;
+
+                    memcpy(octal, &wrkstr[i+1], 3);
+                    octal[3] = '\0';
+
+                    value = (char)strtoul(octal, &check, 8);
+                    /* check if all characters were converted */
+                    if (check == &octal[3])
+                        output[outlen++] = value;
+                    i += 3; /* 4 with for loop [\ddd] */
+                } else {
+                    /* other sequences */
+                    switch(wrkstr[i+1]) {
+                    case 'n':
+                        output[outlen++] = 0x0a;
+                        break;
+                    case 'r':
+                        output[outlen++] = 0x0d;
+                        break;
+                    case 't':
+                        output[outlen++] = 0x09;
+                        break;
+                    case 'b':
+                        output[outlen++] = 0x08;
+                        break;
+                    case 'f':
+                        output[outlen++] = 0x0c;
+                        break;
+                    case '(':
+                        output[outlen++] = 0x28;
+                        break;
+                    case ')':
+                        output[outlen++] = 0x29;
+                        break;
+                    case '\\':
+                        output[outlen++] = 0x5c;
+                        break;
+                    default:
+                        /* IGNORE THE REVERSE SOLIDUS - PDF3000-2008 */
+                        break;
+                    }
+                    i += 1; /* 2 with for loop [\c] */
+                }
+            } else {
+                output[outlen++] = wrkstr[i];
+            }
+        }
+
+        free(wrkstr);
+        wrkstr = cli_strdup(output);
+        free(output);
+        wrklen = outlen;
+    }
+
+    //cli_errmsg("pdf_final: escaped(%d): %s\n", wrklen, wrkstr);
+
+    /* check for encryption and decrypt */
+    if (pdf->flags & (1 << ENCRYPTED_PDF))
+    {
+        off_t tmpsz = (off_t)wrklen;
+        output = pdf_decrypt_string(pdf, obj, wrkstr, &tmpsz);
+        outlen = (size_t)tmpsz;
+        free(wrkstr);
+        if (output) {
+            wrkstr = cli_calloc(outlen+1, sizeof(char));
+            if (!wrkstr) {
+                free(output);
+                return NULL;
+            }
+            memcpy(wrkstr, output, outlen);
+            free(output);
+            wrklen = outlen;
+        } else {
+            return NULL;
+        }
+    }
+
+    //cli_errmsg("pdf_final: decrypt(%d): %s\n", wrklen, wrkstr);
+
+    /* check for UTF-* and convert to UTF-8 */
+    for (i = 0; i < wrklen; ++i) {
+        if (((unsigned char)wrkstr[i] > (unsigned char)0x7f) || (wrkstr[i] == '\0')) {
+            likelyutf = 1;
+            break;
+        }
+    }
+
+    if (likelyutf) {
+        output = pdf_convert_utf(wrkstr, wrklen);
+        free(wrkstr);
+        wrkstr = output;
+    }
+
+    //cli_errmsg("pdf_final: postutf(%d): %s\n", wrklen, wrkstr);
+
+    return wrkstr;
+}
+
+char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *objstart, size_t objsize, const char *str, char **endchar, struct pdf_stats_metadata *meta)
 {
    const char *q = objstart;
    char *p1, *p2;
    size_t len, checklen;
-    char *res;
-    int likelyutf = 0;
+    char *res = NULL;
    uint32_t objid;
    size_t i;

@ -294,8 +385,6 @@ char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
     * Fourth, Attempt to decode from UTF-* to UTF-8
     */

-    res = NULL;
-
    if (str) {
        checklen = strlen(str);

@ -407,26 +496,29 @@ char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
            switch (*p3) {
                case '(':
                case '<':
-                    res = pdf_parse_string(pdf, obj, p3, objsize2, NULL, NULL);
-                    free(begin);
+                    res = pdf_parse_string(pdf, obj, p3, objsize2, NULL, NULL, meta);
                    break;
                default:
-                    for (i=0; i < objsize2; i++) {
-                        if (p3[i] >= 0x7f) {
-                            likelyutf=1;
-                            break;
-                        }
-                    }
-
-                    res = likelyutf ? pdf_convert_utf(p3, objsize2) : NULL;
-
-                    if (!(res)) {
-                        res = begin;
+                    res = pdf_finalize_string(pdf, obj, begin, objsize2);
+                    if (!res) {
+                        res = cli_calloc(1, objsize2+1);
+                        if (!(res))
+                            return NULL;
+                        memcpy(res, begin, objsize2);
                        res[objsize2] = '\0';
-                    } else {
-                        free(begin);
+
+                        if (meta) {
+                            meta->length = objsize2;
+                            meta->obj = obj;
+                            meta->success = 0;
+                        }
+                    } else if (meta) {
+                        meta->length = strlen(res);
+                        meta->obj = obj;
+                        meta->success = 1;
                    }
            }
+            free(begin);
        }

        close(fd);
@ -471,9 +563,6 @@ char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *
    while (p2 < objstart + objsize) {
        int shouldbreak=0;

-        if (!likelyutf && (*((unsigned char *)p2) > (unsigned char)0x7f || *p2 == '\0'))
-            likelyutf = 1;
-
        switch (*p2) {
            case '\\':
                p2++;
@ -496,22 +585,25 @@ char *pdf_parse_string(struct pdf_struct *pdf, struct pdf_obj *obj, const char *

    len = (size_t)(p2 - p1) + 1;

-    if (likelyutf == 0) {
-        /* We're not UTF-*, so just make a copy of the string and return that */
+    res = pdf_finalize_string(pdf, obj, p1, len);
+    if (!res) {
        res = cli_calloc(1, len+1);
        if (!(res))
            return NULL;
-
        memcpy(res, p1, len);
        res[len] = '\0';
-        if (endchar)
-            *endchar = p2;

-        return res;
+        if (meta) {
+            meta->length = len;
+            meta->obj = obj;
+            meta->success = 0;
+        }
+    } else if (meta) {
+        meta->length = strlen(res);
+        meta->obj = obj;
+        meta->success = 1;
    }

-    res = pdf_convert_utf(p1, len);
-
    if (res && endchar)
        *endchar = p2;

@ -672,7 +764,7 @@ struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, siz

        switch (begin[0]) {
            case '(':
-                val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &p1);
+                val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &p1, NULL);
                begin = p1+2;
                break;
            case '[':
@ -688,7 +780,7 @@ struct pdf_dict *pdf_parse_dict(struct pdf_struct *pdf, struct pdf_obj *obj, siz
                    }
                }

-                val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &p1);
+                val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &p1, NULL);
                begin = p1+2;
                break;
            default:
@ -870,7 +962,7 @@ struct pdf_array *pdf_parse_array(struct pdf_struct *pdf, struct pdf_obj *obj, s

                /* Not a dictionary. Intentially fall through. */
            case '(':
-                val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &begin);
+                val = pdf_parse_string(pdf, obj, begin, objsz, NULL, &begin, NULL);
                begin += 2;
                break;
            case '[':
--- a/libclamav/pe.c
+++ b/libclamav/pe.c
@ -1642,7 +1642,7 @@ int cli_scanpe(cli_ctx *ctx)

        if(exe_sections[0].rsz > CLI_MAX_ALLOCATION)
            break;
-        if(!exe_sections[0].rsz)
+        if(exe_sections[0].rsz < 5)
            break;
        if(!(code=fmap_need_off_once(map, exe_sections[0].raw, exe_sections[0].rsz)))
            break;
@ -2457,7 +2457,21 @@ int cli_scanpe(cli_ctx *ctx)

            for(i = 0 ; i < nsections; i++) {
                if(exe_sections[i].raw) {
-                    if(!exe_sections[i].rsz || (unsigned int)fmap_readn(map, dest + exe_sections[i].rva - min, exe_sections[i].raw, exe_sections[i].ursz) != exe_sections[i].ursz) {
+			unsigned int r_ret;
+
+			if (!exe_sections[i].rsz)
+				goto out_no_petite;
+
+			if (!CLI_ISCONTAINED(dest, dsize,
+					     dest + exe_sections[i].rva - min,
+					     exe_sections[i].ursz))
+				goto out_no_petite;
+
+			r_ret = fmap_readn(map, dest + exe_sections[i].rva - min,
+					exe_sections[i].raw,
+					exe_sections[i].ursz);
+		    if (r_ret != exe_sections[i].ursz) {
+out_no_petite:
                        free(exe_sections);
                        free(dest);
                        return CL_CLEAN;
--- a/libclamav/petite.c
+++ b/libclamav/petite.c
@ -393,6 +393,11 @@ int petite_inflate2x_1to9(char *buf, uint32_t minrva, uint32_t bufsz, struct cli
 	      free(usects);
 	      return 1;
 	    }
+	    if (backbytes >= INT_MAX / 2) {
+		    free(usects);
+		    cli_dbgmsg("Petite: probably invalid file\n");
+		    return 1;
+	    }
 	    backbytes = backbytes*2 + oob;
 	    if ( (oob = doubledl(&ssrc, &mydl, buf, bufsz)) == -1 ) {
 	      free(usects);
@ -409,6 +414,11 @@ int petite_inflate2x_1to9(char *buf, uint32_t minrva, uint32_t bufsz, struct cli
 		free(usects);
 		return 1;
 	      }
+	      if (backbytes >= INT_MAX / 2) {
+		      free(usects);
+		      cli_dbgmsg("Petite: probably invalid file\n");
+		      return 1;
+	      }
 	      backbytes = backbytes*2 + oob;
 	      backsize--;
 	    } while (backsize);
--- a/libclamav/rebuildpe.c
+++ b/libclamav/rebuildpe.c
@ -146,63 +146,63 @@ int cli_rebuildpe_align(char *buffer, struct cli_exe_section *sections, int sect
  if(datasize > CLI_MAX_ALLOCATION)
    return 0;

-  if((pefile = (char *) cli_calloc(rawbase+datasize, 1))) {
-    memcpy(pefile, HEADERS, 0x148);
+  pefile = (char *) cli_calloc(rawbase+datasize, 1);
+  if(!pefile)
+      return 0;

-    datasize = PESALIGN(rawbase, 0x1000);
+  memcpy(pefile, HEADERS, 0x148);

-    fakepe = (struct IMAGE_PE_HEADER *)(pefile+0xd0);
-    fakepe->NumberOfSections = EC16(sects+gotghost);
-    fakepe->AddressOfEntryPoint = EC32(ep);
-    fakepe->ImageBase = EC32(base);
-    fakepe->SizeOfHeaders = EC32(rawbase);
-    memset(pefile+0x148, 0, 0x80);
-    cli_writeint32(pefile+0x148+0x10, ResRva);
-    cli_writeint32(pefile+0x148+0x14, ResSize);
-    curpe = pefile+0x148+0x80;
+  datasize = PESALIGN(rawbase, 0x1000);

-    if (gotghost) {
+  fakepe = (struct IMAGE_PE_HEADER *)(pefile+0xd0);
+  fakepe->NumberOfSections = EC16(sects+gotghost);
+  fakepe->AddressOfEntryPoint = EC32(ep);
+  fakepe->ImageBase = EC32(base);
+  fakepe->SizeOfHeaders = EC32(rawbase);
+  memset(pefile+0x148, 0, 0x80);
+  cli_writeint32(pefile+0x148+0x10, ResRva);
+  cli_writeint32(pefile+0x148+0x14, ResSize);
+  curpe = pefile+0x148+0x80;
+
+  if (gotghost) {
      snprintf(curpe, 8, "empty");
      cli_writeint32(curpe+8, sections[0].rva-datasize); /* vsize */
      cli_writeint32(curpe+12, datasize); /* rva */
      cli_writeint32(curpe+0x24, 0xffffffff);
      curpe+=40;
      datasize+=PESALIGN(sections[0].rva-datasize, 0x1000);
-    }
+  }

-    for (i=0; i < sects; i++) {
+  for (i=0; i < sects; i++) {
      snprintf(curpe, 8, ".clam%.2d", i+1);
      if (!align) {
-        cli_writeint32(curpe+8, sections[i].vsz);
-        cli_writeint32(curpe+12, sections[i].rva);
-        cli_writeint32(curpe+16, sections[i].rsz);
-        cli_writeint32(curpe+20, rawbase);
+          cli_writeint32(curpe+8, sections[i].vsz);
+          cli_writeint32(curpe+12, sections[i].rva);
+          cli_writeint32(curpe+16, sections[i].rsz);
+          cli_writeint32(curpe+20, rawbase);
      } else {
-        cli_writeint32(curpe+8, PESALIGN(sections[i].vsz, align));
-        cli_writeint32(curpe+12, PESALIGN(sections[i].rva, align));
-        cli_writeint32(curpe+16, PESALIGN(sections[i].rsz, align));
-        cli_writeint32(curpe+20, rawbase);
+          cli_writeint32(curpe+8, PESALIGN(sections[i].vsz, align));
+          cli_writeint32(curpe+12, PESALIGN(sections[i].rva, align));
+          cli_writeint32(curpe+16, PESALIGN(sections[i].rsz, align));
+          cli_writeint32(curpe+20, rawbase);
      }
      /* already zeroed
-      cli_writeint32(curpe+24, 0);
-      cli_writeint32(curpe+28, 0);
-      cli_writeint32(curpe+32, 0);
+         cli_writeint32(curpe+24, 0);
+         cli_writeint32(curpe+28, 0);
+         cli_writeint32(curpe+32, 0);
      */
      cli_writeint32(curpe+0x24, 0xffffffff);
      memcpy(pefile+rawbase, buffer+sections[i].raw, sections[i].rsz);
      curpe+=40;
      if (!align) {
-        rawbase+=PESALIGN(sections[i].rsz, 0x200);
-        datasize+=PESALIGN(sections[i].vsz, 0x1000);
+          rawbase+=PESALIGN(sections[i].rsz, 0x200);
+          datasize+=PESALIGN(sections[i].vsz, 0x1000);
      } else {
-        rawbase+=PESALIGN(PESALIGN(sections[i].rsz, align), 0x200);
-        datasize+=PESALIGN(PESALIGN(sections[i].vsz, align), 0x1000);
+          rawbase+=PESALIGN(PESALIGN(sections[i].rsz, align), 0x200);
+          datasize+=PESALIGN(PESALIGN(sections[i].vsz, align), 0x1000);
      }
-    }
-    fakepe->SizeOfImage = EC32(datasize);
-  } else {
-    return 0;
  }
+  fakepe->SizeOfImage = EC32(datasize);

  i = (cli_writen(file, pefile, rawbase)!=-1);
  free(pefile);
--- a/libclamav/scanners.c
+++ b/libclamav/scanners.c
@ -105,6 +105,7 @@
 #include "ooxml.h"
 #include "xdp.h"
 #include "json_api.h"
+#include "msxml.h"

 #ifdef HAVE_BZLIB_H
 #include <bzlib.h>
@ -2212,6 +2213,12 @@ static int cli_scanraw(cli_ctx *ctx, cli_file_t type, uint8_t typercg, cli_file_
                case CL_TYPE_XDP:
                    ret = cli_scanxdp(ctx);
                    break;
+                case CL_TYPE_XML_WORD:
+                    ret = cli_scanmsxml(ctx);
+                    break;
+                case CL_TYPE_XML_XL:
+                    ret = cli_scanmsxml(ctx);
+                    break;
                case CL_TYPE_RARSFX:
                    if(type != CL_TYPE_RAR && have_rar && SCAN_ARCHIVE && (DCONF_ARCH & ARCH_CONF_RAR)) {
                        char *tmpname = NULL;
@ -2602,7 +2609,9 @@ static int magic_scandesc(cli_ctx *ctx, cli_file_t type)
                //type == CL_TYPE_ZIP ||
                type == CL_TYPE_OOXML_WORD ||
                type == CL_TYPE_OOXML_PPT ||
-                type == CL_TYPE_OOXML_XL) { 
+                type == CL_TYPE_OOXML_XL ||
+                type == CL_TYPE_XML_WORD ||
+                type == CL_TYPE_XML_XL) {
                ctx->properties = json_object_new_object();
                if (NULL == ctx->properties) {
                    cli_errmsg("magic_scandesc: no memory for json properties object\n");
@ -2750,6 +2759,14 @@ static int magic_scandesc(cli_ctx *ctx, cli_file_t type)
 	case CL_TYPE_IGNORED:
 	    break;

+    case CL_TYPE_XML_WORD:
+        ret = cli_scanmsxml(ctx);
+        break;
+
+    case CL_TYPE_XML_XL:
+        ret = cli_scanmsxml(ctx);
+        break;
+
    case CL_TYPE_XDP:
        ret = cli_scanxdp(ctx);
        break;
@ -3467,15 +3484,46 @@ static int scan_common(int desc, cl_fmap_t *map, const char **virname, unsigned
            int ret = CL_SUCCESS;
            cli_dbgmsg("%s\n", jstring);

-           /* Scan the json string unless a virus was detected */
            if (rc != CL_VIRUS) {
-                ctx.options &= ~CL_SCAN_FILE_PROPERTIES;
-                rc = cli_mem_scandesc(jstring, strlen(jstring), &ctx);
+                /* run bytecode preclass hook; generate fmap if needed for running hook */
+                struct cli_bc_ctx *bc_ctx = cli_bytecode_context_alloc();
+                if (!bc_ctx) {
+                    cli_errmsg("scan_common: can't allocate memory for bc_ctx\n");
+                    rc = CL_EMEM;
+                }
+                else {
+                    fmap_t *pc_map = map;
+
+                    if (!pc_map) {
+                        perf_start(&ctx, PERFT_MAP);
+                        if(!(pc_map = fmap(desc, 0, sb.st_size))) {
+                            perf_stop(&ctx, PERFT_MAP);
+                            rc = CL_EMEM;
+                        }
+                        perf_stop(&ctx, PERFT_MAP);
+                    }
+
+                    if (pc_map) {
+                        cli_bytecode_context_setctx(bc_ctx, &ctx);
+                        rc = cli_bytecode_runhook(&ctx, ctx.engine, bc_ctx, BC_PRECLASS, pc_map);
+                        cli_bytecode_context_destroy(bc_ctx);
+
+                        if (!map)
+                            funmap(pc_map);
+                    }
+                }
+
+                /* backwards compatibility: scan the json string unless a virus was detected */
+                if (rc != CL_VIRUS && ctx.engine->root[13]->ac_lsigs) {
+                    cli_warnmsg("scan_common: running depeciated preclass bytecodes for target type 13\n");
+                    ctx.options &= ~CL_SCAN_FILE_PROPERTIES;
+                    rc = cli_mem_scandesc(jstring, strlen(jstring), &ctx);
+                }
            }

            /* Invoke file props callback */
            if (ctx.engine->cb_file_props != NULL) {
-                ret = ctx.engine->cb_file_props(jstring, rc, ctx.engine->cb_file_props_data);
+                ret = ctx.engine->cb_file_props(jstring, rc, ctx.cb_ctx);
                if (ret != CL_SUCCESS)
                    rc = ret;
            }
--- a/libclamav/str.c
+++ b/libclamav/str.c
@ -690,3 +690,49 @@ char *cli_utf16_to_utf8(const char *utf16, size_t length, utf16_type type)
    s2[j] = '\0';
    return s2;
 }
+
+int cli_isutf8(const char *buf, unsigned int len)
+{
+	unsigned int i, j;
+
+    for(i = 0; i < len; i++) {
+        if((buf[i] & 0x80) == 0) {  /* 0xxxxxxx is plain ASCII */
+            continue;
+        } else if((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */
+            return 0;
+        } else {
+            unsigned int following;
+
+            if((buf[i] & 0x20) == 0) {		/* 110xxxxx */
+                /* c = buf[i] & 0x1f; */
+                following = 1;
+            } else if((buf[i] & 0x10) == 0) {	/* 1110xxxx */
+                /* c = buf[i] & 0x0f; */
+                following = 2;
+            } else if((buf[i] & 0x08) == 0) {	/* 11110xxx */
+                /* c = buf[i] & 0x07; */
+                following = 3;
+            } else if((buf[i] & 0x04) == 0) {	/* 111110xx */
+                /* c = buf[i] & 0x03; */
+                following = 4;
+            } else if((buf[i] & 0x02) == 0) {	/* 1111110x */
+                /* c = buf[i] & 0x01; */
+                following = 5;
+            } else {
+                return 0;
+            }
+
+            for(j = 0; j < following; j++) {
+                if(++i >= len)
+                    return 0;
+
+                if((buf[i] & 0x80) == 0 || (buf[i] & 0x40))
+                    return 0;
+
+                /* c = (c << 6) + (buf[i] & 0x3f); */
+            }
+        }
+    }
+
+    return 1;
+}
--- a/libclamav/str.h
+++ b/libclamav/str.h
@ -64,5 +64,7 @@ typedef enum {
 } utf16_type;
 char *cli_utf16_to_utf8(const char *utf16, size_t length, utf16_type type);

+int cli_isutf8(const char *buf, unsigned int len);
+
 size_t cli_strlcat(char *dst, const char *src, size_t sz); /* libclamav/strlcat.c */
 #endif
--- a/libclamav/swf.c
+++ b/libclamav/swf.c
@ -39,7 +39,7 @@
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <sys/stat.h>
-#ifdef	HAVE_UNISTD_H
+#ifdef        HAVE_UNISTD_H
 #include <unistd.h>
 #endif
 #include <time.h>
@ -49,68 +49,69 @@
 #include "swf.h"
 #include "clamav.h"
 #include "scanners.h"
+#include "lzma_iface.h"

-#define EC16(v)	le16_to_host(v)
-#define EC32(v)	le32_to_host(v)
+#define EC16(v)        le16_to_host(v)
+#define EC32(v)        le32_to_host(v)

-#define INITBITS								\
-{										\
-    if(fmap_readn(map, &get_c, offset, sizeof(get_c)) == sizeof(get_c)) {	\
-	bitpos = 8;								\
-	bitbuf = (unsigned int) get_c;						\
-	offset += sizeof(get_c);						\
-    } else {									\
-	cli_warnmsg("cli_scanswf: INITBITS: Can't read file or file truncated\n");	\
-	return CL_EFORMAT;							\
-    }										\
+#define INITBITS                                                                \
+{                                                                               \
+    if(fmap_readn(map, &get_c, offset, sizeof(get_c)) == sizeof(get_c)) {       \
+        bitpos = 8;                                                             \
+        bitbuf = (unsigned int) get_c;                                          \
+        offset += sizeof(get_c);                                                \
+    } else {                                                                    \
+        cli_warnmsg("cli_scanswf: INITBITS: Can't read file or file truncated\n"); \
+        return CL_EFORMAT;                                                      \
+    }                                                                           \
 }

-#define GETBITS(v, n)								\
-{										\
-    getbits_n = n;								\
-    bits = 0;									\
-    while(getbits_n > bitpos) {							\
-	getbits_n -= bitpos;							\
-	bits |= bitbuf << getbits_n;						\
-	if(fmap_readn(map, &get_c, offset, sizeof(get_c)) == sizeof(get_c)) {	\
-	    bitbuf = (unsigned int) get_c;					\
-	    bitpos = 8;								\
-	    offset += sizeof(get_c);						\
-	} else {								\
-	    cli_warnmsg("cli_scanswf: GETBITS: Can't read file or file truncated\n");	\
-	    return CL_EFORMAT;							\
-	}									\
-    }										\
-    bitpos -= getbits_n;							\
-    bits |= bitbuf >> bitpos;							\
-    bitbuf &= 0xff >> (8 - bitpos);						\
-    v = bits & 0xffff;								\
+#define GETBITS(v, n)                                                           \
+{                                                                               \
+    getbits_n = n;                                                              \
+    bits = 0;                                                                   \
+    while(getbits_n > bitpos) {                                                 \
+        getbits_n -= bitpos;                                                    \
+        bits |= bitbuf << getbits_n;                                            \
+        if(fmap_readn(map, &get_c, offset, sizeof(get_c)) == sizeof(get_c)) {   \
+            bitbuf = (unsigned int) get_c;                                      \
+            bitpos = 8;                                                         \
+            offset += sizeof(get_c);                                            \
+        } else {                                                                \
+            cli_warnmsg("cli_scanswf: GETBITS: Can't read file or file truncated\n"); \
+            return CL_EFORMAT;                                                  \
+        }                                                                       \
+    }                                                                           \
+    bitpos -= getbits_n;                                                        \
+    bits |= bitbuf >> bitpos;                                                   \
+    bitbuf &= 0xff >> (8 - bitpos);                                             \
+    v = bits & 0xffff;                                                          \
 }

-#define GETWORD(v)								\
-{										\
-    if(fmap_readn(map, &get_c, offset, sizeof(get_c)) == sizeof(get_c)) {	\
-	getword_1 = (unsigned int) get_c;					\
-	offset += sizeof(get_c);						\
-    } else {									\
-	cli_warnmsg("cli_scanswf: GETWORD: Can't read file or file truncated\n");	\
-	return CL_EFORMAT;							\
-    }										\
-    if(fmap_readn(map, &get_c, offset, sizeof(get_c)) == sizeof(get_c)) {	\
-	getword_2 = (unsigned int) get_c;					\
-	offset += sizeof(get_c);						\
-    } else {									\
-	cli_warnmsg("cli_scanswf: GETWORD: Can't read file or file truncated\n");	\
-	return CL_EFORMAT;							\
-    }										\
-    v = (uint16_t)(getword_1 & 0xff) | ((getword_2 & 0xff) << 8);		\
+#define GETWORD(v)                                                              \
+{                                                                               \
+    if(fmap_readn(map, &get_c, offset, sizeof(get_c)) == sizeof(get_c)) {       \
+        getword_1 = (unsigned int) get_c;                                       \
+        offset += sizeof(get_c);                                                \
+    } else {                                                                    \
+        cli_warnmsg("cli_scanswf: GETWORD: Can't read file or file truncated\n"); \
+        return CL_EFORMAT;                                                      \
+    }                                                                           \
+    if(fmap_readn(map, &get_c, offset, sizeof(get_c)) == sizeof(get_c)) {       \
+        getword_2 = (unsigned int) get_c;                                       \
+        offset += sizeof(get_c);                                                \
+    } else {                                                                    \
+        cli_warnmsg("cli_scanswf: GETWORD: Can't read file or file truncated\n"); \
+        return CL_EFORMAT;                                                      \
+    }                                                                           \
+    v = (uint16_t)(getword_1 & 0xff) | ((getword_2 & 0xff) << 8);               \
 }

-#define GETDWORD(v)								\
-{										\
-    GETWORD(getdword_1);							\
-    GETWORD(getdword_2);							\
-    v = (uint32_t)(getdword_1 | (getdword_2 << 16));				\
+#define GETDWORD(v)                                                             \
+{                                                                               \
+    GETWORD(getdword_1);                                                        \
+    GETWORD(getdword_2);                                                        \
+    v = (uint32_t)(getdword_1 | (getdword_2 << 16));                            \
 }

 struct swf_file_hdr {
@ -119,30 +120,200 @@ struct swf_file_hdr {
    uint32_t filesize;
 };

-static int scancws(cli_ctx *ctx, struct swf_file_hdr *hdr)
+static int scanzws(cli_ctx *ctx, struct swf_file_hdr *hdr)
 {
-	z_stream stream;
-	char inbuff[FILEBUFF], outbuff[FILEBUFF];
-	fmap_t *map = *ctx->fmap;
-	int offset = 8, ret, zret, outsize = 8, count, zend;
-	char *tmpname;
-	int fd;
+        struct CLI_LZMA lz;
+        unsigned char inbuff[FILEBUFF], outbuff[FILEBUFF];
+        fmap_t *map = *ctx->fmap;
+        /* strip off header */
+        off_t offset = 8;
+        uint32_t d_insize;
+        size_t outsize = 8;
+        int ret, lret, count;
+        char *tmpname;
+        int fd;

    if((ret = cli_gentempfd(ctx->engine->tmpdir, &tmpname, &fd)) != CL_SUCCESS) {
-	cli_errmsg("scancws: Can't generate temporary file\n");
-	return ret;
+        cli_errmsg("scanzws: Can't generate temporary file\n");
+        return ret;
    }

    hdr->signature[0] = 'F';
    if(cli_writen(fd, hdr, sizeof(struct swf_file_hdr)) != sizeof(struct swf_file_hdr)) {
-	cli_errmsg("scancws: Can't write to file %s\n", tmpname);
+        cli_errmsg("scanzws: Can't write to file %s\n", tmpname);
        close(fd);
-	if(cli_unlink(tmpname)) {
-	    free(tmpname);
-	    return CL_EUNLINK;
-	}
-	free(tmpname);
-	return CL_EWRITE;
+        if(cli_unlink(tmpname)) {
+            free(tmpname);
+            return CL_EUNLINK;
+        }
+        free(tmpname);
+        return CL_EWRITE;
+    }
+
+    /* read 4 bytes (for compressed 32-bit filesize) [not used for LZMA] */
+    if (fmap_readn(map, &d_insize, offset, sizeof(d_insize)) != sizeof(d_insize)) {
+        cli_errmsg("scanzws: Error reading SWF file\n");
+        close(fd);
+        if (cli_unlink(tmpname)) {
+            free(tmpname);
+            return CL_EUNLINK;
+        }
+        free(tmpname);
+        return CL_EREAD;
+    }
+    offset += sizeof(d_insize);
+
+    /* check if declared input size matches actual output size */
+    /* map->len = header (8 bytes) + d_insize (4 bytes) + flags (5 bytes) + compressed stream */
+    if (d_insize != (map->len - 17)) {
+        cli_warnmsg("SWF: declared input length != compressed stream size, %u != %llu\n",
+                    d_insize, (long long unsigned)(map->len - 17));
+    } else {
+        cli_dbgmsg("SWF: declared input length == compressed stream size, %u == %llu\n",
+                    d_insize, (long long unsigned)(map->len - 17));
+    }
+
+    /* first buffer required for initializing LZMA */
+    ret = fmap_readn(map, inbuff, offset, FILEBUFF);
+    if (ret < 0) {
+        cli_errmsg("scanzws: Error reading SWF file\n");
+        close(fd);
+        if (cli_unlink(tmpname)) {
+            free(tmpname);
+            return CL_EUNLINK;
+        }
+        free(tmpname);
+        return CL_EUNPACK;
+    }
+    if (!ret)
+        return CL_EFORMAT; /* likely truncated */
+    offset += ret;
+
+    memset(&lz, 0, sizeof(lz));
+    lz.next_in = inbuff;
+    lz.next_out = outbuff;
+    lz.avail_in = ret;
+    lz.avail_out = FILEBUFF;
+
+    lret = cli_LzmaInit(&lz, hdr->filesize);
+    if (lret != LZMA_RESULT_OK) {
+        cli_errmsg("scanzws: LzmaInit() failed\n");
+        close(fd);
+        if (cli_unlink(tmpname)) {
+            free(tmpname);
+            return CL_EUNLINK;
+        }
+        free(tmpname);
+        return CL_EUNPACK;
+    }
+
+    while (lret == LZMA_RESULT_OK) {
+        if (lz.avail_in == 0) {
+            lz.next_in = inbuff;
+
+            ret = fmap_readn(map, inbuff, offset, FILEBUFF);
+            if (ret < 0) {
+                cli_errmsg("scanzws: Error reading SWF file\n");
+                cli_LzmaShutdown(&lz);
+                close(fd);
+                if (cli_unlink(tmpname)) {
+                    free(tmpname);
+                    return CL_EUNLINK;
+                }
+                free(tmpname);
+                return CL_EUNPACK;
+            }
+            if (!ret)
+                break;
+            lz.avail_in = ret;
+            offset += ret;
+        }
+        lret = cli_LzmaDecode(&lz);
+        count = FILEBUFF - lz.avail_out;
+        if (count) {
+            if (cli_checklimits("SWF", ctx, outsize + count, 0, 0) != CL_SUCCESS)
+                break;
+            if (cli_writen(fd, outbuff, count) != count) {
+                cli_errmsg("scanzws: Can't write to file %s\n", tmpname);
+                cli_LzmaShutdown(&lz);
+                close(fd);
+                if (cli_unlink(tmpname)) {
+                    free(tmpname);
+                    return CL_EUNLINK;
+                }
+                free(tmpname);
+                return CL_EWRITE;
+            }
+            outsize += count;
+        }
+        lz.next_out = outbuff;
+        lz.avail_out = FILEBUFF;
+    }
+
+    cli_LzmaShutdown(&lz);
+
+    if (lret != LZMA_STREAM_END && lret != LZMA_RESULT_OK) {
+        /* outsize starts at 8, therefore, if its still 8, nothing was decompressed */
+        if (outsize == 8) {
+            cli_infomsg(ctx, "scanzws: Error decompressing SWF file. No data decompressed.\n");
+            close(fd);
+            if (cli_unlink(tmpname)) {
+                free(tmpname);
+                return CL_EUNLINK;
+            }
+            free(tmpname);
+            return CL_EUNPACK;
+        }
+        cli_infomsg(ctx, "scanzws: Error decompressing SWF file. Scanning what was decompressed.\n");
+    }
+    cli_dbgmsg("SWF: Decompressed[LZMA] to %s, size %d\n", tmpname, outsize);
+
+    /* check if declared output size matches actual output size */
+    if (hdr->filesize != outsize) {
+        cli_warnmsg("SWF: declared output length != inflated stream size, %u != %llu\n",
+                    hdr->filesize, (long long unsigned)outsize);
+    } else {
+        cli_dbgmsg("SWF: declared output length == inflated stream size, %u == %llu\n",
+                   hdr->filesize, (long long unsigned)outsize);
+    }
+
+    ret = cli_magic_scandesc(fd, ctx);
+
+    close(fd);
+    if (!(ctx->engine->keeptmp)) {
+        if (cli_unlink(tmpname)) {
+            free(tmpname);
+            return CL_EUNLINK;
+        }
+    }
+    free(tmpname);
+    return ret;
+}
+
+static int scancws(cli_ctx *ctx, struct swf_file_hdr *hdr)
+{
+        z_stream stream;
+        char inbuff[FILEBUFF], outbuff[FILEBUFF];
+        fmap_t *map = *ctx->fmap;
+        int offset = 8, ret, zret, outsize = 8, count, zend;
+        char *tmpname;
+        int fd;
+
+    if((ret = cli_gentempfd(ctx->engine->tmpdir, &tmpname, &fd)) != CL_SUCCESS) {
+        cli_errmsg("scancws: Can't generate temporary file\n");
+        return ret;
+    }
+
+    hdr->signature[0] = 'F';
+    if(cli_writen(fd, hdr, sizeof(struct swf_file_hdr)) != sizeof(struct swf_file_hdr)) {
+        cli_errmsg("scancws: Can't write to file %s\n", tmpname);
+        close(fd);
+        if(cli_unlink(tmpname)) {
+            free(tmpname);
+            return CL_EUNLINK;
+        }
+        free(tmpname);
+        return CL_EWRITE;
    }

    stream.avail_in = 0;
@ -155,56 +326,56 @@ static int scancws(cli_ctx *ctx, struct swf_file_hdr *hdr)

    zret = inflateInit(&stream);
    if(zret != Z_OK) {
-	cli_errmsg("scancws: inflateInit() failed\n");
+        cli_errmsg("scancws: inflateInit() failed\n");
        close(fd);
-	if(cli_unlink(tmpname)) {
-	    free(tmpname);
-	    return CL_EUNLINK;
-	}
-	free(tmpname);
-	return CL_EUNPACK;
+        if(cli_unlink(tmpname)) {
+            free(tmpname);
+            return CL_EUNLINK;
+        }
+        free(tmpname);
+        return CL_EUNPACK;
    }

    do {
-	if(stream.avail_in == 0) {
-	    stream.next_in = (Bytef *)inbuff;
-	    ret = fmap_readn(map, inbuff, offset, FILEBUFF);
-	    if(ret < 0) {
-		cli_errmsg("scancws: Error reading SWF file\n");
-		close(fd);
-		if(cli_unlink(tmpname)) {
-		    free(tmpname);
-            inflateEnd(&stream);
-		    return CL_EUNLINK;
-		}
-		free(tmpname);
-        inflateEnd(&stream);
-		return CL_EUNPACK;
-	    }
-	    if(!ret)
-		break;
-	    stream.avail_in = ret;
-	    offset += ret;
-	}
-	zret = inflate(&stream, Z_SYNC_FLUSH);
-	count = FILEBUFF - stream.avail_out;
-	if(count) {
-	    if(cli_checklimits("SWF", ctx, outsize + count, 0, 0) != CL_SUCCESS)
-		break;
-	    if(cli_writen(fd, outbuff, count) != count) {
-		cli_errmsg("scancws: Can't write to file %s\n", tmpname);
-		close(fd);
-		if(cli_unlink(tmpname)) {
-		    free(tmpname);
-		    return CL_EUNLINK;
-		}
-		free(tmpname);
-		return CL_EWRITE;
-	    }
-	    outsize += count;
-	}
-	stream.next_out = (Bytef *)outbuff;
-	stream.avail_out = FILEBUFF;
+        if(stream.avail_in == 0) {
+            stream.next_in = (Bytef *)inbuff;
+            ret = fmap_readn(map, inbuff, offset, FILEBUFF);
+            if(ret < 0) {
+                cli_errmsg("scancws: Error reading SWF file\n");
+                close(fd);
+                inflateEnd(&stream);
+                if(cli_unlink(tmpname)) {
+                    free(tmpname);
+                    return CL_EUNLINK;
+                }
+                free(tmpname);
+                return CL_EUNPACK;
+            }
+            if(!ret)
+                break;
+            stream.avail_in = ret;
+            offset += ret;
+        }
+        zret = inflate(&stream, Z_SYNC_FLUSH);
+        count = FILEBUFF - stream.avail_out;
+        if(count) {
+            if(cli_checklimits("SWF", ctx, outsize + count, 0, 0) != CL_SUCCESS)
+                break;
+            if(cli_writen(fd, outbuff, count) != count) {
+                cli_errmsg("scancws: Can't write to file %s\n", tmpname);
+                inflateEnd(&stream);
+                close(fd);
+                if(cli_unlink(tmpname)) {
+                    free(tmpname);
+                    return CL_EUNLINK;
+                }
+                free(tmpname);
+                return CL_EWRITE;
+            }
+            outsize += count;
+        }
+        stream.next_out = (Bytef *)outbuff;
+        stream.avail_out = FILEBUFF;
    } while(zret == Z_OK);

    zend = inflateEnd(&stream);
@ -226,16 +397,25 @@ static int scancws(cli_ctx *ctx, struct swf_file_hdr *hdr)
        }
        cli_infomsg(ctx, "scancws: Error decompressing SWF file. Scanning what was decompressed.\n");
    }
-    cli_dbgmsg("SWF: Decompressed to %s, size %d\n", tmpname, outsize);
+    cli_dbgmsg("SWF: Decompressed[zlib] to %s, size %d\n", tmpname, outsize);
+
+    /* check if declared output size matches actual output size */
+    if (hdr->filesize != outsize) {
+        cli_warnmsg("SWF: declared output length != inflated stream size, %u != %llu\n",
+                    hdr->filesize, (long long unsigned)outsize);
+    } else {
+        cli_dbgmsg("SWF: declared output length == inflated stream size, %u == %llu\n",
+                   hdr->filesize, (long long unsigned)outsize);
+    }

    ret = cli_magic_scandesc(fd, ctx);

    close(fd);
    if(!ctx->engine->keeptmp) {
-	if(cli_unlink(tmpname)) {
-	    free(tmpname);
-	    return CL_EUNLINK;
-	}
+        if(cli_unlink(tmpname)) {
+            free(tmpname);
+            return CL_EUNLINK;
+        }
    }
    free(tmpname);
    return ret;
@ -243,11 +423,11 @@ static int scancws(cli_ctx *ctx, struct swf_file_hdr *hdr)

 static const char *tagname(tag_id id)
 {
-	unsigned int i;
+        unsigned int i;

    for(i = 0; tag_names[i].name; i++)
-	if(tag_names[i].id == id)
-	    return tag_names[i].name;
+        if(tag_names[i].id == id)
+            return tag_names[i].name;
    return NULL;
 }

@ -265,19 +445,22 @@ int cli_scanswf(cli_ctx *ctx)
    cli_dbgmsg("in cli_scanswf()\n");

    if(fmap_readn(map, &file_hdr, offset, sizeof(file_hdr)) != sizeof(file_hdr)) {
-	cli_dbgmsg("SWF: Can't read file header\n");
-	return CL_CLEAN;
+        cli_dbgmsg("SWF: Can't read file header\n");
+        return CL_CLEAN;
    }
    offset += sizeof(file_hdr);

    if(!strncmp(file_hdr.signature, "CWS", 3)) {
-	cli_dbgmsg("SWF: Compressed file\n");
-	return scancws(ctx, &file_hdr);
+        cli_dbgmsg("SWF: zlib compressed file\n");
+        return scancws(ctx, &file_hdr);
+    } else if(!strncmp(file_hdr.signature, "ZWS", 3)) {
+        cli_dbgmsg("SWF: LZMA compressed file\n");
+        return scanzws(ctx, &file_hdr);
    } else if(!strncmp(file_hdr.signature, "FWS", 3)) {
-	cli_dbgmsg("SWF: Uncompressed file\n");
+        cli_dbgmsg("SWF: Uncompressed file\n");
    } else {
-	cli_dbgmsg("SWF: Not a SWF file\n");
-	return CL_CLEAN;
+        cli_dbgmsg("SWF: Not a SWF file\n");
+        return CL_CLEAN;
    }

    cli_dbgmsg("SWF: Version: %u\n", file_hdr.version);
@ -306,62 +489,62 @@ int cli_scanswf(cli_ctx *ctx)
    }

    while(offset < map->len) {
-	GETWORD(tag_hdr);
-	tag_type = tag_hdr >> 6;
-	if(tag_type == 0)
-	    break;
-	tag_len = tag_hdr & 0x3f;
-	if(tag_len == 0x3f)
-	    GETDWORD(tag_len);
+        GETWORD(tag_hdr);
+        tag_type = tag_hdr >> 6;
+        if(tag_type == 0)
+            break;
+        tag_len = tag_hdr & 0x3f;
+        if(tag_len == 0x3f)
+            GETDWORD(tag_len);

-	pt = tagname(tag_type);
-	cli_dbgmsg("SWF: %s\n", pt ? pt : "UNKNOWN TAG");
-	cli_dbgmsg("SWF: Tag length: %u\n", tag_len);
-	if (tag_len > map->len) {
-	    cli_dbgmsg("SWF: Invalid tag length.\n");
-	    return CL_EFORMAT;
-	}
-	if ((offset + tag_len) < offset) {
-	    cli_warnmsg("SWF: Tag length too large.\n");
-	    break;
-	}
-	if(!pt) {
-	    offset += tag_len;
-	    continue;
-	}
+        pt = tagname(tag_type);
+        cli_dbgmsg("SWF: %s\n", pt ? pt : "UNKNOWN TAG");
+        cli_dbgmsg("SWF: Tag length: %u\n", tag_len);
+        if (tag_len > map->len) {
+            cli_dbgmsg("SWF: Invalid tag length.\n");
+            return CL_EFORMAT;
+        }
+        if ((offset + tag_len) < offset) {
+            cli_warnmsg("SWF: Tag length too large.\n");
+            break;
+        }
+        if(!pt) {
+            offset += tag_len;
+            continue;
+        }

-	switch(tag_type) {
-	    case TAG_SCRIPTLIMITS: {
-		unsigned int recursion, timeout;
-		GETWORD(recursion);
-		GETWORD(timeout);
-		cli_dbgmsg("SWF: scriptLimits recursion %u timeout %u\n", recursion, timeout);
-		break;
-	    }
+        switch(tag_type) {
+            case TAG_SCRIPTLIMITS: {
+                unsigned int recursion, timeout;
+                GETWORD(recursion);
+                GETWORD(timeout);
+                cli_dbgmsg("SWF: scriptLimits recursion %u timeout %u\n", recursion, timeout);
+                break;
+            }

-	    case TAG_FILEATTRIBUTES:
-		GETDWORD(val);
-		cli_dbgmsg("SWF: File attributes:\n");
-		if(val & SWF_ATTR_USENETWORK)
-		    cli_dbgmsg("    * Use network\n");
-		if(val & SWF_ATTR_RELATIVEURLS)
-		    cli_dbgmsg("    * Relative URLs\n");
-		if(val & SWF_ATTR_SUPPRESSCROSSDOMAINCACHE)
-		    cli_dbgmsg("    * Suppress cross domain cache\n");
-		if(val & SWF_ATTR_ACTIONSCRIPT3)
-		    cli_dbgmsg("    * ActionScript 3.0\n");
-		if(val & SWF_ATTR_HASMETADATA)
-		    cli_dbgmsg("    * Has metadata\n");
-		if(val & SWF_ATTR_USEDIRECTBLIT)
-		    cli_dbgmsg("    * Use hardware acceleration\n");
-		if(val & SWF_ATTR_USEGPU)
-		    cli_dbgmsg("    * Use GPU\n");
-		break;
+            case TAG_FILEATTRIBUTES:
+                GETDWORD(val);
+                cli_dbgmsg("SWF: File attributes:\n");
+                if(val & SWF_ATTR_USENETWORK)
+                    cli_dbgmsg("    * Use network\n");
+                if(val & SWF_ATTR_RELATIVEURLS)
+                    cli_dbgmsg("    * Relative URLs\n");
+                if(val & SWF_ATTR_SUPPRESSCROSSDOMAINCACHE)
+                    cli_dbgmsg("    * Suppress cross domain cache\n");
+                if(val & SWF_ATTR_ACTIONSCRIPT3)
+                    cli_dbgmsg("    * ActionScript 3.0\n");
+                if(val & SWF_ATTR_HASMETADATA)
+                    cli_dbgmsg("    * Has metadata\n");
+                if(val & SWF_ATTR_USEDIRECTBLIT)
+                    cli_dbgmsg("    * Use hardware acceleration\n");
+                if(val & SWF_ATTR_USEGPU)
+                    cli_dbgmsg("    * Use GPU\n");
+                break;

-	    default:
-		offset += tag_len;
-		continue;
-	}
+            default:
+                offset += tag_len;
+                continue;
+        }
    }

    return CL_CLEAN;
--- a/libclamav/upack.c
+++ b/libclamav/upack.c
@ -302,6 +302,8 @@ int unupack(int upack, char *dest, uint32_t dsize, char *buff, uint32_t vma, uin
 			loc_esi += 4;
 			cli_dbgmsg("Upack: ecx counter: %08x\n", j);

+			if (((uint64_t)count+j) * 4 > UINT_MAX)
+				return -1;
 			if (!CLI_ISCONTAINED(dest, dsize, loc_esi, (j*4)) || !CLI_ISCONTAINED(dest, dsize, loc_edi, ((j+count)*4)))
 				return -1;
 			for (;j--; loc_edi+=4, loc_esi+=4)
@ -359,6 +361,8 @@ int unupack(int upack, char *dest, uint32_t dsize, char *buff, uint32_t vma, uin
 			loc_edi += 4;
 			loc_ebx = loc_edi;
 		
+			if (((uint64_t)count+6) * 4 > UINT_MAX)
+				return -1;
 			if (!CLI_ISCONTAINED(dest, dsize, loc_edi, ((6+count)*4)))
 				return -1;
 			cli_writeint32(loc_edi, 0xffffffff);
@ -432,6 +436,13 @@ int unupack(int upack, char *dest, uint32_t dsize, char *buff, uint32_t vma, uin
 	section.rsz = end_edi-loc_edi;
 	section.vsz = end_edi-loc_edi;

+	/* bb#11282 - prevent dest+va/dest from passing an invalid dereference to cli_rebuildpe */
+	/* check should trigger on broken PE files where the section exists outside of the file */
+	if ((!upack && ((va + section.rsz) > dsize)) || (upack && (section.rsz > dsize))) {
+		cli_dbgmsg("Upack: Rebuilt section exceeds allocated buffer; breaks cli_rebuildpe() bb#11282\n");
+		return 0;
+	}
+
 	if (!cli_rebuildpe(dest + (upack?0:va), &section, 1, base, original_ep, 0, 0, file)) {
 		cli_dbgmsg("Upack: Rebuilding failed\n");
 		return 0;
--- a/libclamav/upx.c
+++ b/libclamav/upx.c
@ -128,13 +128,13 @@ static int pefromupx (const char *src, uint32_t ssize, char *dst, uint32_t *dsiz
    return 0;

  while ((valign=magic[sectcnt++])) {
-    if ( ep - upx1 + valign <= ssize-5  &&    /* Wondering how we got so far?! */
+    if (CLI_ISCONTAINED(src, ssize - 5, src + ep - upx1 + valign - 2, 2) &&
 	 src[ep - upx1 + valign - 2] == '\x8d' && /* lea edi, ...                  */
 	 src[ep - upx1 + valign - 1] == '\xbe' )  /* ... [esi + offset]          */
      break;
  }

-  if (!valign && ep - upx1 + 0x80 < ssize-8) {
+  if (!valign && CLI_ISCONTAINED(src, ssize - 8, src + ep - upx1 + 0x80, 8)) {
    const char *pt = &src[ep - upx1 + 0x80];
    cli_dbgmsg("UPX: bad magic - scanning for imports\n");
    
--- a/libclamav/xar.c
+++ b/libclamav/xar.c
@ -186,7 +186,8 @@ static int xar_get_toc_data_values(xmlTextReaderPtr reader, long *length, long *
                cli_dbgmsg("cli_scanxar: <archived-checksum>:\n");
                xar_get_checksum_values(reader, a_cksum, a_hash);
                
-            } else if (xmlStrEqual(name, (const xmlChar *)"extracted-checksum") &&
+            } else if ((xmlStrEqual(name, (const xmlChar *)"extracted-checksum") ||
+                        xmlStrEqual(name, (const xmlChar *)"unarchived-checksum")) &&
                       xmlTextReaderNodeType(reader) == XML_READER_TYPE_ELEMENT) {
                cli_dbgmsg("cli_scanxar: <extracted-checksum>:\n");
                xar_get_checksum_values(reader, e_cksum, e_hash);
@ -561,11 +562,7 @@ int cli_scanxar(cli_ctx *ctx)


        a_hash_ctx = xar_hash_init(a_hash, &a_sc, &a_mc);
-        if (a_hash_ctx == NULL)
-            goto exit_tmpfile;
        e_hash_ctx = xar_hash_init(e_hash, &e_sc, &e_mc);
-        if (e_hash_ctx == NULL)
-            goto exit_tmpfile;

        switch (encoding) {
        case CL_TYPE_GZ:
@ -606,7 +603,8 @@ int cli_scanxar(cli_ctx *ctx)

                    bytes = sizeof(buff) - strm.avail_out;

-                    xar_hash_update(e_hash_ctx, buff, bytes, e_hash);
+                    if (e_hash_ctx != NULL)
+                        xar_hash_update(e_hash_ctx, buff, bytes, e_hash);
                   
                    if (cli_writen(fd, buff, bytes) < 0) {
                        cli_dbgmsg("cli_scanxar: cli_writen error file %s.\n", tmpname);
@ -627,7 +625,8 @@ int cli_scanxar(cli_ctx *ctx)
                    break;

                avail_in -= strm.avail_in;
-                xar_hash_update(a_hash_ctx, next_in, avail_in, a_hash);
+                if (a_hash_ctx != NULL)
+                    xar_hash_update(a_hash_ctx, next_in, avail_in, a_hash);
            }

            inflateEnd(&strm);
@ -665,7 +664,8 @@ int cli_scanxar(cli_ctx *ctx)
                lz.next_in = blockp;
                lz.avail_in = CLI_LZMA_HDR_SIZE;

-                xar_hash_update(a_hash_ctx, blockp, CLI_LZMA_HDR_SIZE, a_hash);
+                if (a_hash_ctx != NULL)
+                    xar_hash_update(a_hash_ctx, blockp, CLI_LZMA_HDR_SIZE, a_hash);

                lret = cli_LzmaInit(&lz, 0);
                if (lret != LZMA_RESULT_OK) {
@ -716,8 +716,10 @@ int cli_scanxar(cli_ctx *ctx)
                        cli_dbgmsg("cli_scanxar: cli_LzmaDecode() produces no output for "
                                   "avail_in %lu, avail_out %lu.\n", avail_in, avail_out);

-                    xar_hash_update(a_hash_ctx, next_in, in_consumed, a_hash);                    
-                    xar_hash_update(e_hash_ctx, buff, avail_out, e_hash);
+                    if (a_hash_ctx != NULL)
+                        xar_hash_update(a_hash_ctx, next_in, in_consumed, a_hash);                    
+                    if (e_hash_ctx != NULL)
+                        xar_hash_update(e_hash_ctx, buff, avail_out, e_hash);

                    /* Write a decompressed block. */
                    /* cli_dbgmsg("Writing %li bytes to LZMA decompress temp file, " */
@ -770,7 +772,8 @@ int cli_scanxar(cli_ctx *ctx)
                    goto exit_tmpfile;
                }
                
-                xar_hash_update(a_hash_ctx, blockp, length, a_hash);
+                if (a_hash_ctx != NULL)
+                    xar_hash_update(a_hash_ctx, blockp, length, a_hash);
                
                if (cli_writen(fd, blockp, write_len) < 0) {
                    cli_dbgmsg("cli_scanxar: cli_writen error %li bytes @ %li.\n", length, at);
@ -782,25 +785,36 @@ int cli_scanxar(cli_ctx *ctx)
        }

        if (rc == CL_SUCCESS) {
-            xar_hash_final(a_hash_ctx, result, a_hash);
-            a_hash_ctx = NULL;
+            if (a_hash_ctx != NULL) {
+                xar_hash_final(a_hash_ctx, result, a_hash);
+                a_hash_ctx = NULL;
+            } else {
+                cli_dbgmsg("cli_scanxar: archived-checksum missing.\n");
+                cksum_fails++;
+            }
            if (a_cksum != NULL) {
                expected = cli_hex2str((char *)a_cksum);
                if (xar_hash_check(a_hash, result, expected) != 0) {
-                    cli_dbgmsg("cli_scanxar: archived-checksum missing or mismatch.\n");
+                    cli_dbgmsg("cli_scanxar: archived-checksum mismatch.\n");
                    cksum_fails++;
                } else {
                    cli_dbgmsg("cli_scanxar: archived-checksum matched.\n");                
                }
                free(expected);
            }
-            xar_hash_final(e_hash_ctx, result, e_hash);
-            e_hash_ctx = NULL;
+
+            if (e_hash_ctx != NULL) {
+                xar_hash_final(e_hash_ctx, result, e_hash);
+                e_hash_ctx = NULL;
+            } else {
+                cli_dbgmsg("cli_scanxar: extracted-checksum(unarchived-checksum) missing.\n");
+                cksum_fails++;
+            }
            if (e_cksum != NULL) {
                if (do_extract_cksum) {
                    expected = cli_hex2str((char *)e_cksum);
                    if (xar_hash_check(e_hash, result, expected) != 0) {
-                        cli_dbgmsg("cli_scanxar: extracted-checksum missing or mismatch.\n");
+                        cli_dbgmsg("cli_scanxar: extracted-checksum mismatch.\n");
                        cksum_fails++;
                    } else {
                        cli_dbgmsg("cli_scanxar: extracted-checksum matched.\n");                
--- a/libclamav/xz_iface.c
+++ b/libclamav/xz_iface.c
@ -75,7 +75,7 @@ int cli_XzDecode(struct CLI_XZ *XZ) {
        return XZ_STREAM_END;
    if (XZ->status == CODER_STATUS_NOT_FINISHED && XZ->avail_out == 0)
        return XZ_RESULT_OK;
-    if (res != SZ_OK)
+    if (((inbytes == 0) && (outbytes == 0)) || res != SZ_OK)
 	return XZ_RESULT_DATA_ERROR;
    return XZ_RESULT_OK;
 }
--- a/libclamav/yc.c
+++ b/libclamav/yc.c
@ -81,6 +81,7 @@ static int yc_poly_emulator(cli_ctx *ctx, char *base, unsigned int filesize, cha
  unsigned char al;
  unsigned char cl = ecx & 0xff;
  unsigned int j,i;
+  unsigned int max_jmp_loop = 100000000;

  for(i=0;i<ecx&&i<max_emu;i++) /* Byte looper - Decrypts every byte and write it back */
    {
@ -103,6 +104,9 @@ static int yc_poly_emulator(cli_ctx *ctx, char *base, unsigned int filesize, cha
            if (yc_bounds_check(ctx, base, filesize, decryptor_offset, j)) {
                return 2;
            }
+	      if (!max_jmp_loop)
+	          return 2;
+	      max_jmp_loop--;
 	      j = j + decryptor_offset[j];
 	      break;

--- a/m4/reorganization/version.m4
+++ b/m4/reorganization/version.m4
@ -3,7 +3,7 @@ VERSION="devel-`date +%Y%m%d`"
 dnl VERSION="1.0rc1"

 LC_CURRENT=7
-LC_REVISION=24
+LC_REVISION=26
 LC_AGE=1
 LIBCLAMAV_VERSION="$LC_CURRENT":"$LC_REVISION":"$LC_AGE"
 AC_SUBST([LIBCLAMAV_VERSION])
--- a/unit_tests/check_clamav.c
+++ b/unit_tests/check_clamav.c
@ -623,6 +623,7 @@ static Suite *test_cl_suite(void)
    Suite *s = suite_create("cl_api");
    TCase *tc_cl = tcase_create("cl_dup");
    TCase *tc_cl_scan = tcase_create("cl_scan");
+    char *user_timeout = NULL;
    int expect = expected_testfiles;
    suite_add_tcase (s, tc_cl);
    tcase_add_test(tc_cl, test_cl_free);
@ -661,6 +662,12 @@ static Suite *test_cl_suite(void)
    tcase_add_loop_test(tc_cl_scan, test_cl_scanmap_callback_handle_allscan, 0, expect);
    tcase_add_loop_test(tc_cl_scan, test_cl_scanmap_callback_mem, 0, expect);
    tcase_add_loop_test(tc_cl_scan, test_cl_scanmap_callback_mem_allscan, 0, expect);
+
+    user_timeout = getenv("T");
+    if (user_timeout) {
+        int timeout = atoi(user_timeout);
+        tcase_set_timeout(tc_cl_scan, timeout);
+    }
 #endif
    return s;
 }
--- a/win32/libclamav.vcxproj
+++ b/win32/libclamav.vcxproj
@ -357,6 +357,8 @@
    <ClCompile Include="..\libclamav\mpool.c" />
    <ClCompile Include="..\libclamav\msexpand.c" />
    <ClCompile Include="..\libclamav\mspack.c" />
+    <ClCompile Include="..\libclamav\msxml.c" />
+    <ClCompile Include="..\libclamav\msxml_parser.c" />
    <ClCompile Include="..\libclamav\nsis\bzlib.c">
      <ObjectFileName Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)\nsis_bzlib</ObjectFileName>
      <ObjectFileName Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)\nsis_bzlib</ObjectFileName>
--- a/win32/libclamav.vcxproj.filters
+++ b/win32/libclamav.vcxproj.filters
@ -201,6 +201,12 @@
    <ClCompile Include="..\libclamav\mspack.c">
      <Filter>Source Files</Filter>
    </ClCompile>
+    <ClCompile Include="..\libclamav\msxml.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\libclamav\msxml_parser.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
    <ClCompile Include="..\libclamav\ole2_extract.c">
      <Filter>Source Files</Filter>
    </ClCompile>
--- a/win32/res/common.rc
+++ b/win32/res/common.rc
@ -6,8 +6,8 @@
 #define REPO_VERSION VERSION
 #endif

-#define RES_VER_Q 0,98,0,0
-#define RES_VER_S "ClamAV 0.98"
+#define RES_VER_Q 0,98,7,0
+#define RES_VER_S "ClamAV 0.98.7"

 VS_VERSION_INFO VERSIONINFO
    FILEVERSION RES_VER_Q