clamav/libclamav/openioc.c

336 lines
10 KiB
C
Raw Normal View History

/*
2025-02-14 10:24:30 -05:00
* Copyright (C) 2014-2025 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
*
* Authors: Steven Morgan <smorgan@sourcefire.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#if HAVE_CONFIG_H
#include "clamav-config.h"
#endif
#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
#include <errno.h>
#include <string.h>
#include "mpool.h"
#include "readdb.h"
2014-07-01 19:38:01 -04:00
#include "clamav.h"
#include "others.h"
#include "openioc.h"
#include <libxml/xmlreader.h>
struct openioc_hash {
Swap clean cache from MD5 to SHA2-256 Change the clean-cache to use SHA2-256 instead of MD5. Note that all references are changed to specify "SHA2-256" now instead of "SHA256", for clarity. But there is no plan to add support for SHA3 algorithms at this time. Significant code cleanup. E.g.: - Implemented goto-done error handling. - Used `uint8_t *` instead of `unsigned char *`. - Use `bool` for boolean checks, rather than `int. - Used `#defines` instead of magic numbers. - Removed duplicate `#defines` for things like hash length. Add new option to calculate and record additional hash types when the "generate metadata JSON" feature is enabled: - libclamav option: `CL_SCAN_GENERAL_STORE_EXTRA_HASHES` - clamscan option: `--json-store-extra-hashes` (default off) - clamd.conf option: `JsonStoreExtraHashes` (default 'no') Renamed the sigtool option `--sha256` to `--sha2-256`. The original option is still functional, but is deprecated. For the "generate metadata JSON" feature, the file hash is now stored as "sha2-256" instead of "FileMD5". If you enable the "extra hashes" option, then it will also record "md5" and "sha1". Deprecate and disable the internal "SHA collect" feature. This option had been hidden behind C #ifdef checks for an option that wasn't exposed through CMake, so it was basically unavailable anyways. Changes to calculate file hashes when they're needed and no sooner. For the FP feature in the matcher module, I have mimiced the optimization in the FMAP scan routine which makes it so that it can calculate multiple hashes in a single pass of the file. The `HandlerType` feature stores a hash of the file in the scan ctx to prevent retyping the exact same data more than once. I removed that hash field and replaced it with an attribute flag that is applied to the new recursion stack layer when retyping a file. This also closes a minor bug that would prevent retyping a file with an all-zero hash. :) The work upgrading cache.c to support SHA2-256 sized hashes thanks to: https://github.com/m-sola CLAM-255 CLAM-1858 CLAM-1859 CLAM-1860
2025-06-03 19:03:20 -04:00
uint8_t *hash;
void *next;
};
static const xmlChar *openioc_read(xmlTextReaderPtr reader)
{
const xmlChar *name;
if (xmlTextReaderRead(reader) != 1)
return NULL;
name = xmlTextReaderConstLocalName(reader);
if (name != NULL) {
cli_dbgmsg("openioc_parse: xmlTextReaderRead read %s%s\n", name,
xmlTextReaderNodeType(reader) == XML_READER_TYPE_END_ELEMENT ? " end tag" : "");
}
return name;
}
static int openioc_is_context_hash(xmlTextReaderPtr reader)
{
xmlChar *document = xmlTextReaderGetAttribute(reader, (const xmlChar *)"document");
xmlChar *search = xmlTextReaderGetAttribute(reader, (const xmlChar *)"search");
int rc = 0;
if ((document != NULL && search != NULL) &&
!xmlStrcmp(document, (const xmlChar *)"FileItem") &&
(!xmlStrcmp(search, (const xmlChar *)"FileItem/Md5sum") ||
!xmlStrcmp(search, (const xmlChar *)"FileItem/Sha1sum") ||
!xmlStrcmp(search, (const xmlChar *)"FileItem/Sha256sum")))
rc = 1;
if (document != NULL)
xmlFree(document);
if (search != NULL)
xmlFree(search);
return rc;
}
static int openioc_parse_content(xmlTextReaderPtr reader, struct openioc_hash **elems, int context_hash)
{
const xmlChar *xmlval;
struct openioc_hash *elem;
int rc = CL_SUCCESS;
if (context_hash == 0) {
xmlChar *type = xmlTextReaderGetAttribute(reader, (const xmlChar *)"type");
if (type == NULL) {
cli_dbgmsg("openioc_parse: xmlTextReaderGetAttribute no type attribute "
"for <Content> element\n");
return rc;
} else {
if (xmlStrcasecmp(type, (const xmlChar *)"sha1") &&
xmlStrcasecmp(type, (const xmlChar *)"sha256") &&
xmlStrcasecmp(type, (const xmlChar *)"md5")) {
xmlFree(type);
return rc;
}
}
xmlFree(type);
}
if (xmlTextReaderRead(reader) == 1 && xmlTextReaderNodeType(reader) == XML_READER_TYPE_TEXT) {
xmlval = xmlTextReaderConstValue(reader);
if (xmlval) {
elem = calloc(1, sizeof(struct openioc_hash));
if (NULL == elem) {
cli_dbgmsg("openioc_parse: calloc fails for openioc_hash.\n");
return CL_EMEM;
}
elem->hash = xmlStrdup(xmlval);
elem->next = *elems;
*elems = elem;
} else {
cli_dbgmsg("openioc_parse: xmlTextReaderConstValue() returns NULL for Content md5 value.\n");
}
} else {
cli_dbgmsg("openioc_parse: No text for XML Content element.\n");
}
return rc;
}
static int openioc_parse_indicatoritem(xmlTextReaderPtr reader, struct openioc_hash **elems)
{
const xmlChar *name;
int rc = CL_SUCCESS;
int context_hash = 0;
while (1) {
name = openioc_read(reader);
if (name == NULL)
break;
if (xmlStrEqual(name, (const xmlChar *)"Context") &&
xmlTextReaderNodeType(reader) == XML_READER_TYPE_ELEMENT) {
context_hash = openioc_is_context_hash(reader);
} else if (xmlStrEqual(name, (const xmlChar *)"Content") &&
xmlTextReaderNodeType(reader) == XML_READER_TYPE_ELEMENT) {
rc = openioc_parse_content(reader, elems, context_hash);
if (rc != CL_SUCCESS) {
break;
}
} else if (xmlStrEqual(name, (const xmlChar *)"IndicatorItem") &&
xmlTextReaderNodeType(reader) == XML_READER_TYPE_END_ELEMENT) {
break;
}
}
return rc;
}
static int openioc_parse_indicator(xmlTextReaderPtr reader, struct openioc_hash **elems)
{
const xmlChar *name;
int rc = CL_SUCCESS;
while (1) {
name = openioc_read(reader);
if (name == NULL)
return rc;
if (xmlStrEqual(name, (const xmlChar *)"Indicator") &&
xmlTextReaderNodeType(reader) == XML_READER_TYPE_ELEMENT) {
rc = openioc_parse_indicator(reader, elems);
if (rc != CL_SUCCESS) {
cli_dbgmsg("openioc_parse: openioc_parse_indicator recursion error.\n");
break;
}
} else if (xmlStrEqual(name, (const xmlChar *)"IndicatorItem") &&
xmlTextReaderNodeType(reader) == XML_READER_TYPE_ELEMENT) {
rc = openioc_parse_indicatoritem(reader, elems);
if (rc != CL_SUCCESS) {
break;
}
} else if (xmlStrEqual(name, (const xmlChar *)"Indicator") &&
xmlTextReaderNodeType(reader) == XML_READER_TYPE_END_ELEMENT) {
break;
}
}
return rc;
}
int openioc_parse(const char *fname, int fd, struct cl_engine *engine, unsigned int options)
{
int rc;
xmlTextReaderPtr reader = NULL;
const xmlChar *name;
struct openioc_hash *elems = NULL, *elem = NULL;
const char *iocp = NULL;
uint16_t ioclen;
char *virusname;
int hash_count = 0;
if (fname == NULL)
return CL_ENULLARG;
if (fd < 0)
return CL_EARG;
cli_dbgmsg("openioc_parse: XML parsing file %s\n", fname);
reader = xmlReaderForFd(fd, NULL, NULL, CLAMAV_MIN_XMLREADER_FLAGS);
if (reader == NULL) {
cli_dbgmsg("openioc_parse: xmlReaderForFd error\n");
return CL_EOPEN;
}
rc = xmlTextReaderRead(reader);
while (rc == 1) {
name = xmlTextReaderConstLocalName(reader);
cli_dbgmsg("openioc_parse: xmlTextReaderRead read %s\n", name);
if (xmlStrEqual(name, (const xmlChar *)"Indicator") &&
xmlTextReaderNodeType(reader) == XML_READER_TYPE_ELEMENT) {
rc = openioc_parse_indicator(reader, &elems);
if (rc != CL_SUCCESS) {
xmlTextReaderClose(reader);
xmlFreeTextReader(reader);
return rc;
}
}
if (xmlStrEqual(name, (const xmlChar *)"ioc") &&
xmlTextReaderNodeType(reader) == XML_READER_TYPE_END_ELEMENT) {
break;
}
rc = xmlTextReaderRead(reader);
}
iocp = strrchr(fname, *PATHSEP);
if (NULL == iocp)
iocp = fname;
else
iocp++;
ioclen = (uint16_t)strlen(fname);
if (elems != NULL) {
if (NULL == engine->hm_hdb) {
engine->hm_hdb = MPOOL_CALLOC(engine->mempool, 1, sizeof(struct cli_matcher));
if (NULL == engine->hm_hdb) {
xmlTextReaderClose(reader);
xmlFreeTextReader(reader);
return CL_EMEM;
}
#ifdef USE_MPOOL
engine->hm_hdb->mempool = engine->mempool;
#endif
}
}
while (elems != NULL) {
const char *sp;
char *hash, *vp;
int i, hashlen;
elem = elems;
elems = elems->next;
hash = (char *)(elem->hash);
while (isspace(*hash))
hash++;
hashlen = strlen(hash);
if (hashlen == 0) {
xmlFree(elem->hash);
free(elem);
continue;
}
vp = hash + hashlen - 1;
while (isspace(*vp) && vp > hash) {
*vp-- = '\0';
hashlen--;
}
virusname = calloc(1, ioclen + hashlen + 2);
2014-03-24 16:56:59 -04:00
if (NULL == virusname) {
cli_dbgmsg("openioc_parse: calloc for virname memory failed.\n");
xmlTextReaderClose(reader);
xmlFreeTextReader(reader);
return CL_EMEM;
}
sp = fname;
vp = virusname;
for (i = 0; i < ioclen; i++, sp++, vp++) {
switch (*sp) {
case '\\':
case '/':
case '?':
case '%':
case '*':
case ':':
case '|':
case '"':
case '<':
case '>':
*vp = '_';
break;
default:
if (isspace(*sp))
*vp = '_';
else
*vp = *sp;
}
}
*vp++ = '.';
sp = hash;
for (i = 0; i < hashlen; i++, sp++) {
if (isxdigit(*sp)) {
*vp++ = *sp;
}
}
vp = virusname;
virusname = CLI_MPOOL_VIRNAME(engine->mempool, virusname, options & CL_DB_OFFICIAL);
if (!(virusname)) {
cli_dbgmsg("openioc_parse: MPOOL_MALLOC for virname memory failed.\n");
xmlTextReaderClose(reader);
xmlFreeTextReader(reader);
free(vp);
return CL_EMEM;
}
free(vp);
FIPS & FIPS-like limits on hash algs for cryptographic uses ClamAV will not function when using a FIPS-enabled OpenSSL 3.x. This is because ClamAV uses MD5 and SHA1 algorithms for a variety of purposes including matching for malware detection, matching to prevent false positives on known-clean files, and for verification of MD5-based RSA digital signatures for determining CVD (signature database archive) authenticity. Interestingly, FIPS had been intentionally bypassed when creating hashes based whole buffers and whole files (by descriptor or `FILE`-pointer): https://github.com/Cisco-Talos/clamav/commit/78d4a9985a06a418dd1338c94ee5db461035d75b Note: this bypassed FIPS the 1.x way with: `EVP_MD_CTX_set_flags(ctx, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);` It was NOT disabled when using `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. That likely worked by coincidence in that the hash was already calculated most of the time. It certainly would have made use of those functions if the hash had not been calculated prior: https://github.com/Cisco-Talos/clamav/blob/78d4a9985a06a418dd1338c94ee5db461035d75b/libclamav/matcher.c#L743 Regardless, bypassing FIPS entirely is not the correct solution. The FIPS restrictions against using MD5 and SHA1 are valid, particularly when verifying CVD digital siganatures, but also I think when using a hash to determine if the file is known-clean (i.e. the "clean cache" and also MD5-based and SHA1-based FP signatures). This commit extends the work to bypass FIPS using the newer 3.x method: `md = EVP_MD_fetch(NULL, alg, "-fips");` It does this for the legacy `cl_hash*()` functions including `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. It also introduces extended versions that allow the caller to choose if they want to bypass FIPS: - `cl_hash_data_ex()` - `cl_hash_init_ex()` - `cl_update_hash_ex()` - `cl_finish_hash_ex()` - `cl_hash_destroy_ex()` - `cl_hash_file_fd_ex()` See the `flags` parameter for each. Ironically, this commit does NOT use the new functions at this time. The rational is that ClamAV may need MD5, SHA1, and SHA-256 hashes of the same files both for determining if the file is malware, and for determining if the file is clean. So instead, this commit will do a checks when: 1. Creating a new ClamAV scanning engine. If FIPS-mode enabled, it will automatically toggle the "FIPS limits" engine option. When loading signatures, if the engine "FIPS limits" option is enabled, then MD5 and SHA1 FP signatures will be skipped. 2. Before verifying a CVD (e.g. also for loading, unpacking when verification enabled). If "FIPS limits" or FIPS-mode are enabled, then the legacy MD5-based RSA method is disabled. Note: This commit also refactors the interface for `cl_cvdverify_ex()` and `cl_cvdunpack_ex()` so they take a `flags` parameters, rather than a single `bool`. As these functions are new in this version, it does not break the ABI. The cache was already switched to use SHA2-256, so that's not a concern for checking FIPS-mode / FIPS limits options. This adds an option for `freshclam.conf` and `clamd.conf`: FIPSCryptoHashLimits yes And an equivalent command-line option for `clamscan` and `sigtool`: --fips-limits You may programmatically enable FIPS-limits for a ClamAV engine like this: ```C cl_engine_set_num(engine, CL_ENGINE_FIPS_LIMITS, 1); ``` CLAM-2792
2025-07-01 20:41:47 -04:00
rc = hm_addhash_str(engine, HASH_PURPOSE_WHOLE_FILE_DETECT, hash, 0, virusname);
if (rc != CL_SUCCESS)
cli_dbgmsg("openioc_parse: hm_addhash_str failed with %i hash len %i for %s.\n",
rc, hashlen, virusname);
else
hash_count++;
xmlFree(elem->hash);
free(elem);
}
if (hash_count == 0)
cli_warnmsg("openioc_parse: No hash signatures extracted from %s.\n", fname);
else
cli_dbgmsg("openioc_parse: %i hash signature%s extracted from %s.\n",
hash_count, hash_count == 1 ? "" : "s", fname);
xmlTextReaderClose(reader);
xmlFreeTextReader(reader);
return CL_SUCCESS;
}