ooxml_hwp: add support for filetyping and preclassification

This commit is contained in:
Kevin Lin 2015-12-17 16:16:55 -05:00
parent 523e4264e0
commit c6f7be5536
5 changed files with 138 additions and 46 deletions

View file

@ -125,6 +125,7 @@ static const struct ftmap_s {
{ "CL_TYPE_HWP3", CL_TYPE_HWP3 }, { "CL_TYPE_HWP3", CL_TYPE_HWP3 },
{ "CL_TYPE_XML_HWP", CL_TYPE_XML_HWP }, { "CL_TYPE_XML_HWP", CL_TYPE_XML_HWP },
{ "CL_TYPE_HWPOLE2", CL_TYPE_HWPOLE2 }, { "CL_TYPE_HWPOLE2", CL_TYPE_HWPOLE2 },
{ "CL_TYPE_OOXML_HWP", CL_TYPE_OOXML_HWP },
{ NULL, CL_TYPE_IGNORED } { NULL, CL_TYPE_IGNORED }
}; };
@ -223,6 +224,9 @@ int is_tar(const unsigned char *buf, unsigned int nbytes);
#define OOXML_CONTENTTYPES "[ContentTypes].xml" #define OOXML_CONTENTTYPES "[ContentTypes].xml"
#define OOXML_CONTENTTYPES_LEN (sizeof(OOXML_CONTENTTYPES)-1) #define OOXML_CONTENTTYPES_LEN (sizeof(OOXML_CONTENTTYPES)-1)
#define OOXML_HWP_CONTENTS "Contents/content.hpf"
#define OOXML_HWP_CONTENTS_LEN (sizeof(OOXML_HWP_CONTENTS)-1)
cli_file_t cli_filetype2(fmap_t *map, const struct cl_engine *engine, cli_file_t basetype) cli_file_t cli_filetype2(fmap_t *map, const struct cl_engine *engine, cli_file_t basetype)
{ {
unsigned char buffer[MAGIC_BUFFER_SIZE]; unsigned char buffer[MAGIC_BUFFER_SIZE];
@ -328,20 +332,29 @@ cli_file_t cli_filetype2(fmap_t *map, const struct cl_engine *engine, cli_file_t
if (zlen >= OOXML_DOCPROPS_DIR_LEN) { if (zlen >= OOXML_DOCPROPS_DIR_LEN) {
if (0 == memcmp(znamep, OOXML_DOCPROPS_DIR, OOXML_DOCPROPS_DIR_LEN)) { if (0 == memcmp(znamep, OOXML_DOCPROPS_DIR, OOXML_DOCPROPS_DIR_LEN)) {
likely_ooxml = 1; likely_ooxml = 1;
} else {
if (zlen >= OOXML_CONTENTTYPES_LEN) {
if (0 == memcmp(znamep, OOXML_CONTENTTYPES, OOXML_CONTENTTYPES_LEN)) {
likely_ooxml = 1;
}
} else {
znamep = NULL;
break;
}
} }
} else { } else {
znamep = NULL; znamep = NULL;
break; break;
} }
if (zlen >= OOXML_CONTENTTYPES_LEN) {
if (0 == memcmp(znamep, OOXML_CONTENTTYPES, OOXML_CONTENTTYPES_LEN)) {
likely_ooxml = 1;
}
} else {
znamep = NULL;
break;
}
if (zlen >= OOXML_HWP_CONTENTS_LEN) {
if (0 == memcmp(znamep, OOXML_HWP_CONTENTS, OOXML_HWP_CONTENTS_LEN)) {
cli_dbgmsg("Recognized OOXML HWP file\n");
return CL_TYPE_OOXML_HWP;
}
} else {
znamep = NULL;
break;
}
if (++lhc > 2) { if (++lhc > 2) {
/* only check first three zip headers unless likely ooxml */ /* only check first three zip headers unless likely ooxml */

View file

@ -86,6 +86,7 @@ typedef enum {
CL_TYPE_OOXML_XL, CL_TYPE_OOXML_XL,
CL_TYPE_INTERNAL, CL_TYPE_INTERNAL,
CL_TYPE_HWP3, CL_TYPE_HWP3,
CL_TYPE_OOXML_HWP,
/* Section for partition types */ /* Section for partition types */
CL_TYPE_PART_ANY, /* unknown partition type */ CL_TYPE_PART_ANY, /* unknown partition type */

View file

@ -25,6 +25,7 @@
#include "clamav.h" #include "clamav.h"
#include "cltypes.h" #include "cltypes.h"
#include "filetypes.h"
#include "others.h" #include "others.h"
#include "unzip.h" #include "unzip.h"
#if HAVE_JSON #if HAVE_JSON
@ -47,6 +48,7 @@
#if HAVE_LIBXML2 && HAVE_JSON #if HAVE_LIBXML2 && HAVE_JSON
/*** OOXML MSDOC ***/
static const struct key_entry ooxml_keys[] = { static const struct key_entry ooxml_keys[] = {
{ "coreproperties", "CoreProperties", MSXML_JSON_ROOT | MSXML_JSON_ATTRIB }, { "coreproperties", "CoreProperties", MSXML_JSON_ROOT | MSXML_JSON_ATTRIB },
{ "title", "Title", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE }, { "title", "Title", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
@ -341,6 +343,48 @@ static int ooxml_content_cb(int fd, cli_ctx *ctx)
xmlFreeTextReader(reader); xmlFreeTextReader(reader);
return ret; return ret;
} }
/*** OOXML HWP ***/
static const struct key_entry ooxml_hwp_keys[] = {
{ "hcfversion", "HCFVersion", MSXML_JSON_ROOT | MSXML_JSON_ATTRIB },
{ "package", "Properties", MSXML_JSON_ROOT | MSXML_JSON_ATTRIB },
{ "metadata", "Metadata", MSXML_JSON_ROOT | MSXML_JSON_ATTRIB },
{ "title", "Title", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "language", "Language", MSXML_JSON_WRKPTR | MSXML_JSON_VALUE },
{ "meta", "MetaFields", MSXML_JSON_WRKPTR | MSXML_JSON_ATTRIB | MSXML_JSON_COUNT | MSXML_JSON_MULTI },
{ "item", "Contents", MSXML_JSON_WRKPTR | MSXML_JSON_ATTRIB | MSXML_JSON_COUNT | MSXML_JSON_MULTI }
};
static size_t num_ooxml_hwp_keys = sizeof(ooxml_hwp_keys) / sizeof(struct key_entry);
static int ooxml_hwp_cb(int fd, cli_ctx *ctx)
{
int ret = CL_SUCCESS;
xmlTextReaderPtr reader = NULL;
cli_dbgmsg("in ooxml_hwp_cb\n");
/* perform engine limit checks in temporary tracking session */
ret = ooxml_updatelimits(fd, ctx);
if (ret != CL_CLEAN)
return ret;
reader = xmlReaderForFd(fd, "ooxml_hwp.xml", NULL, CLAMAV_MIN_XMLREADER_FLAGS);
if (reader == NULL) {
cli_dbgmsg("ooxml_hwp_cb: xmlReaderForFd error\n");
return CL_SUCCESS; // internal error from libxml2
}
ret = cli_msxml_parse_document(ctx, reader, ooxml_hwp_keys, num_ooxml_hwp_keys, 1, NULL);
if (ret != CL_SUCCESS && ret != CL_ETIMEOUT && ret != CL_BREAK)
cli_warnmsg("ooxml_hwp_cb: encountered issue in parsing properties document\n");
xmlTextReaderClose(reader);
xmlFreeTextReader(reader);
return ret;
}
#endif /* HAVE_LIBXML2 && HAVE_JSON */ #endif /* HAVE_LIBXML2 && HAVE_JSON */
int cli_ooxml_filetype(cli_ctx *ctx, fmap_t *map) int cli_ooxml_filetype(cli_ctx *ctx, fmap_t *map)
@ -376,41 +420,71 @@ int cli_ooxml_filetype(cli_ctx *ctx, fmap_t *map)
return CL_SUCCESS; return CL_SUCCESS;
} }
int cli_process_ooxml(cli_ctx *ctx) int cli_process_ooxml(cli_ctx *ctx, int type)
{ {
#if HAVE_LIBXML2 && HAVE_JSON #if HAVE_LIBXML2 && HAVE_JSON
uint32_t loff = 0; uint32_t loff = 0;
int tmp = CL_SUCCESS; int ret = CL_SUCCESS;
cli_dbgmsg("in cli_process_ooxml\n"); cli_dbgmsg("in cli_process_ooxml\n");
if (!ctx) { if (!ctx) {
return CL_ENULLARG; return CL_ENULLARG;
} }
/* find "[Content Types].xml" */ if (type == CL_TYPE_OOXML_HWP) {
tmp = unzip_search_single(ctx, "[Content_Types].xml", 18, &loff); /* two files: version.xml and Contents/content.hpf */
if (tmp == CL_ETIMEOUT) { ret = unzip_search_single(ctx, "version.xml", 11, &loff);
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_TIMEOUT"); if (ret == CL_ETIMEOUT) {
return CL_ETIMEOUT; cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_TIMEOUT");
} return CL_ETIMEOUT;
else if (tmp != CL_VIRUS) { }
cli_dbgmsg("cli_process_ooxml: failed to find ""[Content_Types].xml""!\n"); else if (ret != CL_VIRUS) {
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_NO_CONTENT_TYPES"); cli_dbgmsg("cli_process_ooxml: failed to find ""version.xml""!\n");
return CL_EFORMAT; cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_NO_HWP_VERSION");
} return CL_EFORMAT;
cli_dbgmsg("cli_process_ooxml: found ""[Content_Types].xml"" @ %x\n", loff); }
ret = unzip_single_internal(ctx, loff, ooxml_hwp_cb);
tmp = unzip_single_internal(ctx, loff, ooxml_content_cb); if (ret == CL_SUCCESS) {
if (tmp == CL_ETIMEOUT) ret = unzip_search_single(ctx, "Contents/content.hpf", 20, &loff);
if (ret == CL_ETIMEOUT) {
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_TIMEOUT");
return CL_ETIMEOUT;
}
else if (ret != CL_VIRUS) {
cli_dbgmsg("cli_process_ooxml: failed to find ""Contents/content.hpf""!\n");
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_NO_HWP_CONTENT");
return CL_EFORMAT;
}
ret = unzip_single_internal(ctx, loff, ooxml_hwp_cb);
}
} else {
/* find "[Content Types].xml" */
ret = unzip_search_single(ctx, "[Content_Types].xml", 19, &loff);
if (ret == CL_ETIMEOUT) {
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_TIMEOUT");
return CL_ETIMEOUT;
}
else if (ret != CL_VIRUS) {
cli_dbgmsg("cli_process_ooxml: failed to find ""[Content_Types].xml""!\n");
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_NO_CONTENT_TYPES");
return CL_EFORMAT;
}
cli_dbgmsg("cli_process_ooxml: found ""[Content_Types].xml"" @ %x\n", loff);
ret = unzip_single_internal(ctx, loff, ooxml_content_cb);
}
if (ret == CL_ETIMEOUT)
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_TIMEOUT"); cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_TIMEOUT");
else if (tmp == CL_EMEM) else if (ret == CL_EMEM)
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_OUTOFMEM"); cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_OUTOFMEM");
else if (tmp == CL_EMAXSIZE) else if (ret == CL_EMAXSIZE)
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_EMAXSIZE"); cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_EMAXSIZE");
else if (tmp == CL_EMAXFILES) else if (ret == CL_EMAXFILES)
cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_EMAXFILES"); cli_json_parse_error(ctx->wrkproperty, "OOXML_ERROR_EMAXFILES");
return tmp; return ret;
#else #else
UNUSEDPARAM(ctx); UNUSEDPARAM(ctx);
cli_dbgmsg("in cli_process_ooxml\n"); cli_dbgmsg("in cli_process_ooxml\n");

View file

@ -26,7 +26,8 @@
#endif #endif
#include "others.h" #include "others.h"
int cli_ooxml_filetype(cli_ctx *, fmap_t *); int cli_ooxml_filetype(cli_ctx *, fmap_t *);
int cli_process_ooxml(cli_ctx *); int cli_process_ooxml(cli_ctx *, int);
#endif #endif

View file

@ -2686,7 +2686,8 @@ static int magic_scandesc(cli_ctx *ctx, cli_file_t type)
type == CL_TYPE_XML_XL || type == CL_TYPE_XML_XL ||
type == CL_TYPE_HWP3 || type == CL_TYPE_HWP3 ||
type == CL_TYPE_XML_HWP || type == CL_TYPE_XML_HWP ||
type == CL_TYPE_HWPOLE2) { type == CL_TYPE_HWPOLE2 ||
type == CL_TYPE_OOXML_HWP) {
ctx->properties = json_object_new_object(); ctx->properties = json_object_new_object();
if (NULL == ctx->properties) { if (NULL == ctx->properties) {
cli_errmsg("magic_scandesc: no memory for json properties object\n"); cli_errmsg("magic_scandesc: no memory for json properties object\n");
@ -2890,22 +2891,24 @@ static int magic_scandesc(cli_ctx *ctx, cli_file_t type)
} }
break; break;
case CL_TYPE_OOXML_WORD: case CL_TYPE_OOXML_WORD:
case CL_TYPE_OOXML_PPT: case CL_TYPE_OOXML_PPT:
case CL_TYPE_OOXML_XL: case CL_TYPE_OOXML_XL:
case CL_TYPE_OOXML_HWP:
#if HAVE_JSON #if HAVE_JSON
if ((ctx->options & CL_SCAN_FILE_PROPERTIES) && (ctx->wrkproperty != NULL)) { if ((ctx->options & CL_SCAN_FILE_PROPERTIES) && (ctx->wrkproperty != NULL)) {
ret = cli_process_ooxml(ctx); ret = cli_process_ooxml(ctx, type);
if (ret == CL_EMEM || ret == CL_ENULLARG) {
/* critical error */ if (ret == CL_EMEM || ret == CL_ENULLARG) {
break; /* critical error */
} break;
else if (ret != CL_SUCCESS) { }
/* allow for the CL_TYPE_ZIP scan to occur; cli_process_ooxml other possible returns: */ else if (ret != CL_SUCCESS) {
/* CL_ETIMEOUT, CL_EMAXSIZE, CL_EMAXFILES, CL_EPARSE, CL_EFORMAT, CL_BREAK, CL_ESTAT */ /* allow for the CL_TYPE_ZIP scan to occur; cli_process_ooxml other possible returns: */
ret = CL_SUCCESS; /* CL_ETIMEOUT, CL_EMAXSIZE, CL_EMAXFILES, CL_EPARSE, CL_EFORMAT, CL_BREAK, CL_ESTAT */
} ret = CL_SUCCESS;
} }
}
#endif #endif
case CL_TYPE_ZIP: case CL_TYPE_ZIP:
ctx->container_type = CL_TYPE_ZIP; ctx->container_type = CL_TYPE_ZIP;