clamav/libclamav/clamav.h

2662 lines
108 KiB
C
Raw Normal View History

2003-07-29 15:48:06 +00:00
/*
2025-02-14 10:24:30 -05:00
* Copyright (C) 2013-2025 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
2013-03-08 17:24:50 -05:00
* Copyright (C) 2007-2013 Sourcefire, Inc.
*
* Authors: Tomasz Kojm
2003-07-29 15:48:06 +00:00
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
2003-07-29 15:48:06 +00:00
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
2003-07-29 15:48:06 +00:00
*/
#ifndef __CLAMAV_H
#define __CLAMAV_H
2014-07-02 13:45:56 -04:00
#ifdef _WIN32
#ifndef OWN_WINSOCK
#include <winsock2.h>
#endif
#endif
2014-07-01 19:38:01 -04:00
#include <openssl/ssl.h>
#include <openssl/err.h>
2012-07-17 14:33:07 -04:00
/* Certain OSs already use 64bit variables in their stat struct */
#if (!defined(__FreeBSD__) && !defined(__APPLE__))
#define STAT64_OK 1
#else
#define STAT64_OK 0
#endif
2012-07-17 14:33:07 -04:00
#if defined(HAVE_STAT64) && STAT64_OK
2012-07-17 16:34:12 -04:00
#include <unistd.h>
#define STATBUF struct stat64
#define CLAMSTAT stat64
#define LSTAT lstat64
#define FSTAT fstat64
#define safe_open(a, b) open(a, b | O_LARGEFILE)
#else
#define STATBUF struct stat
#define CLAMSTAT stat
#define LSTAT lstat
#define FSTAT fstat
/* Nothing is safe in windows, not even open, safe_open defined under /win32 */
#ifndef _WIN32
#define safe_open open
#endif
#endif
/* Apple does not define __pid_t */
#ifdef __APPLE__
typedef pid_t __pid_t;
#endif
#define UNUSEDPARAM(x) (void)(x)
2003-07-29 15:48:06 +00:00
#include <sys/types.h>
#include <sys/stat.h>
#include <stdbool.h>
#include "clamav-types.h"
#include "clamav-version.h"
2003-07-29 15:48:06 +00:00
#ifdef __cplusplus
extern "C" {
2003-07-29 15:48:06 +00:00
#endif
#define CL_COUNT_PRECISION 4096
/* return codes */
typedef enum cl_error_t {
/* libclamav specific */
CL_CLEAN = 0,
CL_SUCCESS = 0,
CL_VIRUS,
CL_ENULLARG,
CL_EARG,
CL_EMALFDB,
CL_ECVD,
CL_EVERIFY,
CL_EUNPACK,
/* I/O and memory errors */
CL_EOPEN,
CL_ECREAT,
CL_EUNLINK,
CL_ESTAT,
CL_EREAD,
CL_ESEEK,
CL_EWRITE,
CL_EDUP,
CL_EACCES,
CL_ETMPFILE,
CL_ETMPDIR,
CL_EMAP,
CL_EMEM,
CL_ETIMEOUT,
/* internal (not reported outside libclamav) */
CL_BREAK,
CL_EMAXREC,
CL_EMAXSIZE,
CL_EMAXFILES,
2009-07-08 12:45:06 +03:00
CL_EFORMAT,
2011-04-05 16:33:38 +02:00
CL_EPARSE,
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
CL_EBYTECODE, /** may be reported in testmode */
CL_EBYTECODE_TESTFAIL, /** may be reported in testmode */
CL_ELOCK,
CL_EBUSY,
CL_ESTATE,
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
CL_VERIFIED, /** The binary has been deemed trusted */
CL_ERROR, /** Unspecified / generic error */
2010-07-16 13:50:36 +02:00
/* no error codes below this line please */
CL_ELAST_ERROR
} cl_error_t;
/* db options */
// clang-format off
#define CL_DB_PHISHING 0x2
#define CL_DB_PHISHING_URLS 0x8
#define CL_DB_PUA 0x10
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
#define CL_DB_CVDNOTMP 0x20 /** @deprecated obsolete */
#define CL_DB_OFFICIAL 0x40 /** internal */
#define CL_DB_PUA_MODE 0x80
#define CL_DB_PUA_INCLUDE 0x100
#define CL_DB_PUA_EXCLUDE 0x200
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
#define CL_DB_COMPILED 0x400 /** internal */
#define CL_DB_DIRECTORY 0x800 /** internal */
#define CL_DB_OFFICIAL_ONLY 0x1000
#define CL_DB_BYTECODE 0x2000
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
#define CL_DB_SIGNED 0x4000 /** internal */
FIPS & FIPS-like limits on hash algs for cryptographic uses ClamAV will not function when using a FIPS-enabled OpenSSL 3.x. This is because ClamAV uses MD5 and SHA1 algorithms for a variety of purposes including matching for malware detection, matching to prevent false positives on known-clean files, and for verification of MD5-based RSA digital signatures for determining CVD (signature database archive) authenticity. Interestingly, FIPS had been intentionally bypassed when creating hashes based whole buffers and whole files (by descriptor or `FILE`-pointer): https://github.com/Cisco-Talos/clamav/commit/78d4a9985a06a418dd1338c94ee5db461035d75b Note: this bypassed FIPS the 1.x way with: `EVP_MD_CTX_set_flags(ctx, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);` It was NOT disabled when using `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. That likely worked by coincidence in that the hash was already calculated most of the time. It certainly would have made use of those functions if the hash had not been calculated prior: https://github.com/Cisco-Talos/clamav/blob/78d4a9985a06a418dd1338c94ee5db461035d75b/libclamav/matcher.c#L743 Regardless, bypassing FIPS entirely is not the correct solution. The FIPS restrictions against using MD5 and SHA1 are valid, particularly when verifying CVD digital siganatures, but also I think when using a hash to determine if the file is known-clean (i.e. the "clean cache" and also MD5-based and SHA1-based FP signatures). This commit extends the work to bypass FIPS using the newer 3.x method: `md = EVP_MD_fetch(NULL, alg, "-fips");` It does this for the legacy `cl_hash*()` functions including `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. It also introduces extended versions that allow the caller to choose if they want to bypass FIPS: - `cl_hash_data_ex()` - `cl_hash_init_ex()` - `cl_update_hash_ex()` - `cl_finish_hash_ex()` - `cl_hash_destroy_ex()` - `cl_hash_file_fd_ex()` See the `flags` parameter for each. Ironically, this commit does NOT use the new functions at this time. The rational is that ClamAV may need MD5, SHA1, and SHA-256 hashes of the same files both for determining if the file is malware, and for determining if the file is clean. So instead, this commit will do a checks when: 1. Creating a new ClamAV scanning engine. If FIPS-mode enabled, it will automatically toggle the "FIPS limits" engine option. When loading signatures, if the engine "FIPS limits" option is enabled, then MD5 and SHA1 FP signatures will be skipped. 2. Before verifying a CVD (e.g. also for loading, unpacking when verification enabled). If "FIPS limits" or FIPS-mode are enabled, then the legacy MD5-based RSA method is disabled. Note: This commit also refactors the interface for `cl_cvdverify_ex()` and `cl_cvdunpack_ex()` so they take a `flags` parameters, rather than a single `bool`. As these functions are new in this version, it does not break the ABI. The cache was already switched to use SHA2-256, so that's not a concern for checking FIPS-mode / FIPS limits options. This adds an option for `freshclam.conf` and `clamd.conf`: FIPSCryptoHashLimits yes And an equivalent command-line option for `clamscan` and `sigtool`: --fips-limits You may programmatically enable FIPS-limits for a ClamAV engine like this: ```C cl_engine_set_num(engine, CL_ENGINE_FIPS_LIMITS, 1); ``` CLAM-2792
2025-07-01 20:41:47 -04:00
#define CL_DB_BYTECODE_UNSIGNED 0x8000 /** Caution: You should never run bytecode signatures from untrusted sources.
* Doing so may result in arbitrary code execution. */
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
#define CL_DB_UNSIGNED 0x10000 /** internal */
#define CL_DB_BYTECODE_STATS 0x20000
#define CL_DB_ENHANCED 0x40000
#define CL_DB_PCRE_STATS 0x80000
#define CL_DB_YARA_EXCLUDE 0x100000
#define CL_DB_YARA_ONLY 0x200000
FIPS & FIPS-like limits on hash algs for cryptographic uses ClamAV will not function when using a FIPS-enabled OpenSSL 3.x. This is because ClamAV uses MD5 and SHA1 algorithms for a variety of purposes including matching for malware detection, matching to prevent false positives on known-clean files, and for verification of MD5-based RSA digital signatures for determining CVD (signature database archive) authenticity. Interestingly, FIPS had been intentionally bypassed when creating hashes based whole buffers and whole files (by descriptor or `FILE`-pointer): https://github.com/Cisco-Talos/clamav/commit/78d4a9985a06a418dd1338c94ee5db461035d75b Note: this bypassed FIPS the 1.x way with: `EVP_MD_CTX_set_flags(ctx, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);` It was NOT disabled when using `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. That likely worked by coincidence in that the hash was already calculated most of the time. It certainly would have made use of those functions if the hash had not been calculated prior: https://github.com/Cisco-Talos/clamav/blob/78d4a9985a06a418dd1338c94ee5db461035d75b/libclamav/matcher.c#L743 Regardless, bypassing FIPS entirely is not the correct solution. The FIPS restrictions against using MD5 and SHA1 are valid, particularly when verifying CVD digital siganatures, but also I think when using a hash to determine if the file is known-clean (i.e. the "clean cache" and also MD5-based and SHA1-based FP signatures). This commit extends the work to bypass FIPS using the newer 3.x method: `md = EVP_MD_fetch(NULL, alg, "-fips");` It does this for the legacy `cl_hash*()` functions including `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. It also introduces extended versions that allow the caller to choose if they want to bypass FIPS: - `cl_hash_data_ex()` - `cl_hash_init_ex()` - `cl_update_hash_ex()` - `cl_finish_hash_ex()` - `cl_hash_destroy_ex()` - `cl_hash_file_fd_ex()` See the `flags` parameter for each. Ironically, this commit does NOT use the new functions at this time. The rational is that ClamAV may need MD5, SHA1, and SHA-256 hashes of the same files both for determining if the file is malware, and for determining if the file is clean. So instead, this commit will do a checks when: 1. Creating a new ClamAV scanning engine. If FIPS-mode enabled, it will automatically toggle the "FIPS limits" engine option. When loading signatures, if the engine "FIPS limits" option is enabled, then MD5 and SHA1 FP signatures will be skipped. 2. Before verifying a CVD (e.g. also for loading, unpacking when verification enabled). If "FIPS limits" or FIPS-mode are enabled, then the legacy MD5-based RSA method is disabled. Note: This commit also refactors the interface for `cl_cvdverify_ex()` and `cl_cvdunpack_ex()` so they take a `flags` parameters, rather than a single `bool`. As these functions are new in this version, it does not break the ABI. The cache was already switched to use SHA2-256, so that's not a concern for checking FIPS-mode / FIPS limits options. This adds an option for `freshclam.conf` and `clamd.conf`: FIPSCryptoHashLimits yes And an equivalent command-line option for `clamscan` and `sigtool`: --fips-limits You may programmatically enable FIPS-limits for a ClamAV engine like this: ```C cl_engine_set_num(engine, CL_ENGINE_FIPS_LIMITS, 1); ``` CLAM-2792
2025-07-01 20:41:47 -04:00
#define CL_DB_FIPS_LIMITS 0x400000 /** Disable MD5 and SHA1 methods for trusting files and verifying certificates.
* Means: 1. CVD must be signed with external `.sign` file.
* 2. Disables support for `.fp` signatures. */
/* recommended db settings */
#define CL_DB_STDOPT (CL_DB_PHISHING | CL_DB_PHISHING_URLS | CL_DB_BYTECODE)
/*** scan options ***/
struct cl_scan_options {
uint32_t general;
uint32_t parse;
uint32_t heuristic;
uint32_t mail;
uint32_t dev;
};
/* general */
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
#define CL_SCAN_GENERAL_ALLMATCHES 0x1 /** scan in all-match mode */
#define CL_SCAN_GENERAL_COLLECT_METADATA 0x2 /** collect metadata (--gen-json) */
#define CL_SCAN_GENERAL_HEURISTICS 0x4 /** option to enable heuristic alerts */
#define CL_SCAN_GENERAL_HEURISTIC_PRECEDENCE 0x8 /** allow heuristic match to take precedence */
#define CL_SCAN_GENERAL_UNPRIVILEGED 0x10 /** scanner will not have read access to files */
#define CL_SCAN_GENERAL_STORE_HTML_URIS 0x20 /** when collect-metadata enabled: store uris found in html <a and <form tags */
#define CL_SCAN_GENERAL_STORE_PDF_URIS 0x40 /** when collect-metadata enabled: store uris found in pdf /URI tags */
#define CL_SCAN_GENERAL_STORE_EXTRA_HASHES 0x80 /** when collect-metadata enabled: calculate and store each type of supported file hash */
/* parsing capabilities options */
#define CL_SCAN_PARSE_ARCHIVE 0x1
#define CL_SCAN_PARSE_ELF 0x2
#define CL_SCAN_PARSE_PDF 0x4
#define CL_SCAN_PARSE_SWF 0x8
#define CL_SCAN_PARSE_HWP3 0x10
#define CL_SCAN_PARSE_XMLDOCS 0x20
#define CL_SCAN_PARSE_MAIL 0x40
#define CL_SCAN_PARSE_OLE2 0x80
#define CL_SCAN_PARSE_HTML 0x100
#define CL_SCAN_PARSE_PE 0x200
#define CL_SCAN_PARSE_ONENOTE 0x400
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
#define CL_SCAN_PARSE_IMAGE 0x800 /** option to enable/disable parsing images (graphics) */
#define CL_SCAN_PARSE_IMAGE_FUZZY_HASH 0x1000 /** option to enable/disable image fuzzy hash calculation. */
/* heuristic alerting options */
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
#define CL_SCAN_HEURISTIC_BROKEN 0x2 /** alert on broken PE and broken ELF files */
#define CL_SCAN_HEURISTIC_EXCEEDS_MAX 0x4 /** alert when files exceed scan limits (filesize, max scansize, or max recursion depth) */
#define CL_SCAN_HEURISTIC_PHISHING_SSL_MISMATCH 0x8 /** alert on SSL mismatches */
#define CL_SCAN_HEURISTIC_PHISHING_CLOAK 0x10 /** alert on cloaked URLs in emails */
#define CL_SCAN_HEURISTIC_MACROS 0x20 /** alert on OLE2 files containing macros */
#define CL_SCAN_HEURISTIC_ENCRYPTED_ARCHIVE 0x40 /** alert if archive is encrypted (rar, zip, etc) */
#define CL_SCAN_HEURISTIC_ENCRYPTED_DOC 0x80 /** alert if a document is encrypted (pdf, docx, etc) */
#define CL_SCAN_HEURISTIC_PARTITION_INTXN 0x100 /** alert if partition table size doesn't make sense */
#define CL_SCAN_HEURISTIC_STRUCTURED 0x200 /** data loss prevention options, i.e. alert when detecting personal information */
#define CL_SCAN_HEURISTIC_STRUCTURED_SSN_NORMAL 0x400 /** alert when detecting social security numbers */
#define CL_SCAN_HEURISTIC_STRUCTURED_SSN_STRIPPED 0x800 /** alert when detecting stripped social security numbers */
#define CL_SCAN_HEURISTIC_STRUCTURED_CC 0x1000 /** alert when detecting credit card numbers */
#define CL_SCAN_HEURISTIC_BROKEN_MEDIA 0x2000 /** alert if a file does not match the identified file format, works with JPEG, TIFF, GIF, PNG */
/* mail scanning options */
#define CL_SCAN_MAIL_PARTIAL_MESSAGE 0x1
/* dev options */
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
#define CL_SCAN_DEV_COLLECT_SHA 0x1 /** @deprecated (functionality removed) */
#define CL_SCAN_DEV_COLLECT_PERFORMANCE_INFO 0x2 /** collect performance timings */
2004-09-13 16:44:01 +00:00
/* cl_countsigs options */
#define CL_COUNTSIGS_OFFICIAL 0x1
#define CL_COUNTSIGS_UNOFFICIAL 0x2
#define CL_COUNTSIGS_ALL (CL_COUNTSIGS_OFFICIAL | CL_COUNTSIGS_UNOFFICIAL)
2013-11-18 20:22:42 +00:00
/* For the new engine_options bit field in the engine */
#define ENGINE_OPTIONS_NONE 0x0
#define ENGINE_OPTIONS_DISABLE_CACHE 0x1
#define ENGINE_OPTIONS_FORCE_TO_DISK 0x2
2014-05-27 22:45:01 -04:00
#define ENGINE_OPTIONS_DISABLE_PE_STATS 0x4
#define ENGINE_OPTIONS_DISABLE_PE_CERTS 0x8
#define ENGINE_OPTIONS_PE_DUMPCERTS 0x10
libclamav: Add engine option to toggle temp directory recursion Temp directory recursion in ClamAV is when each layer of a scan gets its own temp directory in the parent layer's temp directory. In addition to temp directory recursion, ClamAV has been creating a new subdirectory for each file scan as a risk-adverse method to ensure no temporary file leaks fill up the disk. Creating a directory is relatively slow on Windows in particular if scanning a lot of very small files. This commit: 1. Separates the temp directory recursion feature from the leave-temps feature so that libclamav can leave temp files without making subdirectories for each file scanned. 2. Makes it so that when temp directory recursion is off, libclamav will just use the configure temp directory for all files. The new option to enable temp directory recursion is for libclamav-only at this time. It is off by default, and you can enable it like this: ```c cl_engine_set_num(engine, CL_ENGINE_TMPDIR_RECURSION, 1); ``` For the `clamscan` and `clamd` programs, temp directory recursion will be enabled when `--leave-temps` / `LeaveTemporaryFiles` is enabled. The difference is that when disabled, it will return to using the configured temp directory without making a subdirectory for each file scanned, so as to improve scan performance for small files, mostly on Windows. Under the hood, this commit also: 1. Cleans up how we keep track of tmpdirs for each layer. The goal here is to align how we keep track of layer-specific stuff using the scan_layer structure. 2. Cleans up how we record metadata JSON for embedded files. Note: Embedded files being different from Contained files, as they are extracted not with a parser, but by finding them with file type magic signatures. CLAM-1583
2025-06-09 20:42:31 -04:00
#define ENGINE_OPTIONS_TMPDIR_RECURSION 0x20
FIPS & FIPS-like limits on hash algs for cryptographic uses ClamAV will not function when using a FIPS-enabled OpenSSL 3.x. This is because ClamAV uses MD5 and SHA1 algorithms for a variety of purposes including matching for malware detection, matching to prevent false positives on known-clean files, and for verification of MD5-based RSA digital signatures for determining CVD (signature database archive) authenticity. Interestingly, FIPS had been intentionally bypassed when creating hashes based whole buffers and whole files (by descriptor or `FILE`-pointer): https://github.com/Cisco-Talos/clamav/commit/78d4a9985a06a418dd1338c94ee5db461035d75b Note: this bypassed FIPS the 1.x way with: `EVP_MD_CTX_set_flags(ctx, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);` It was NOT disabled when using `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. That likely worked by coincidence in that the hash was already calculated most of the time. It certainly would have made use of those functions if the hash had not been calculated prior: https://github.com/Cisco-Talos/clamav/blob/78d4a9985a06a418dd1338c94ee5db461035d75b/libclamav/matcher.c#L743 Regardless, bypassing FIPS entirely is not the correct solution. The FIPS restrictions against using MD5 and SHA1 are valid, particularly when verifying CVD digital siganatures, but also I think when using a hash to determine if the file is known-clean (i.e. the "clean cache" and also MD5-based and SHA1-based FP signatures). This commit extends the work to bypass FIPS using the newer 3.x method: `md = EVP_MD_fetch(NULL, alg, "-fips");` It does this for the legacy `cl_hash*()` functions including `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. It also introduces extended versions that allow the caller to choose if they want to bypass FIPS: - `cl_hash_data_ex()` - `cl_hash_init_ex()` - `cl_update_hash_ex()` - `cl_finish_hash_ex()` - `cl_hash_destroy_ex()` - `cl_hash_file_fd_ex()` See the `flags` parameter for each. Ironically, this commit does NOT use the new functions at this time. The rational is that ClamAV may need MD5, SHA1, and SHA-256 hashes of the same files both for determining if the file is malware, and for determining if the file is clean. So instead, this commit will do a checks when: 1. Creating a new ClamAV scanning engine. If FIPS-mode enabled, it will automatically toggle the "FIPS limits" engine option. When loading signatures, if the engine "FIPS limits" option is enabled, then MD5 and SHA1 FP signatures will be skipped. 2. Before verifying a CVD (e.g. also for loading, unpacking when verification enabled). If "FIPS limits" or FIPS-mode are enabled, then the legacy MD5-based RSA method is disabled. Note: This commit also refactors the interface for `cl_cvdverify_ex()` and `cl_cvdunpack_ex()` so they take a `flags` parameters, rather than a single `bool`. As these functions are new in this version, it does not break the ABI. The cache was already switched to use SHA2-256, so that's not a concern for checking FIPS-mode / FIPS limits options. This adds an option for `freshclam.conf` and `clamd.conf`: FIPSCryptoHashLimits yes And an equivalent command-line option for `clamscan` and `sigtool`: --fips-limits You may programmatically enable FIPS-limits for a ClamAV engine like this: ```C cl_engine_set_num(engine, CL_ENGINE_FIPS_LIMITS, 1); ``` CLAM-2792
2025-07-01 20:41:47 -04:00
#define ENGINE_OPTIONS_FIPS_LIMITS 0x40
// clang-format on
2013-11-18 20:22:42 +00:00
struct cl_engine;
struct cl_settings;
2004-09-13 16:44:01 +00:00
/* ----------------------------------------------------------------------------
* Enable global libclamav features.
*/
/**
* @brief Enable debug messages
*/
extern void cl_debug(void);
/**
* @brief Set libclamav to always create section hashes for PE files.
*
* Section hashes are used in .mdb signature.
*/
extern void cl_always_gen_section_hash(void);
/* ----------------------------------------------------------------------------
* Scan engine functions.
*/
/**
* @brief This function initializes the openssl crypto system.
*
* Called by cl_init() and does not need to be cleaned up as de-init
* is handled automatically by openssl 1.0.2.h and 1.1.0
*
* @return Always returns 0
*/
int cl_initialize_crypto(void);
/**
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
* @brief Clean up ssl crypto inits.
*
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
* @deprecated This function is deprecated and will be removed in a future release.
* Call to EVP_cleanup() has been removed since cleanup is now handled by
* auto-deinit as of openssl 1.0.2h and 1.1.0.
*/
void cl_cleanup_crypto(void);
#define CL_INIT_DEFAULT 0x0
/**
* @brief Initialize the ClamAV library.
*
* @param initoptions Unused.
2023-11-26 15:01:19 -08:00
* @return cl_error_t CL_SUCCESS if everything initialized correctly.
*/
bb12506: Fix phishing/heuristic alert verbosity Some detections, like phishing, are considered heuristic alerts because they match based on behavior more than on content. A subset of these are considered "potentially unwanted" (low-severity). These low-severity alerts include: - phishing - PDFs with obfuscated object names - bytecode signature alerts that start with "BC.Heuristics" The concept is that unless you enable "heuristic precedence" (a method of lowing the threshold to immediateley alert on low-severity detections), the scan should continue after a match in case a higher severity match is found. Only at the end will it print the low-severity match if nothing else was found. The current implementation is buggy though. Scanning of archives does not correctly bail out for the entire archive if one email contains a phishing link. Instead, it sets the "heuristic found" flag then and alerts for every subsequent file in the archive because it doesn't know if the heuristic was found in an embedded file or the target file. Because it's just a heuristic and the status is "clean", it keeps scanning. This patch corrects the behavior by checking if a low-severity alerts were found at the end of scanning the target file, instead of at the end of each embedded file. Additionally, this patch fixes an in issue with phishing alerts wherein heuristic precedence mode did not cause a scan to stop after the first alert. The above changes required restructuring to create an fmap inside of cl_scandesc_callback() so that scan_common() could be modified to require an fmap and set up so that the current *ctx->fmap pointer is never NULL when scan_common() evaluates match results. Also fixed a couple minor bugs in the phishing unit tests and cleaned up the test code for improved legitibility and type safety.
2020-04-18 10:46:57 -04:00
extern cl_error_t cl_init(unsigned int initoptions);
/**
* @brief Allocate a new scanning engine and initialize default settings.
*
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
* The engine must be freed with `cl_engine_free()`.
*
* @return struct cl_engine* Pointer to the scanning engine.
*/
extern struct cl_engine *cl_engine_new(void);
enum cl_engine_field {
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
CL_ENGINE_MAX_SCANSIZE, /** uint64_t */
CL_ENGINE_MAX_FILESIZE, /** uint64_t */
CL_ENGINE_MAX_RECURSION, /** uint32_t */
CL_ENGINE_MAX_FILES, /** uint32_t */
CL_ENGINE_MIN_CC_COUNT, /** uint32_t */
CL_ENGINE_MIN_SSN_COUNT, /** uint32_t */
CL_ENGINE_PUA_CATEGORIES, /** (char *) */
CL_ENGINE_DB_OPTIONS, /** uint32_t */
CL_ENGINE_DB_VERSION, /** uint32_t */
CL_ENGINE_DB_TIME, /** time_t */
CL_ENGINE_AC_ONLY, /** uint32_t */
CL_ENGINE_AC_MINDEPTH, /** uint32_t */
CL_ENGINE_AC_MAXDEPTH, /** uint32_t */
CL_ENGINE_TMPDIR, /** (char *) */
CL_ENGINE_KEEPTMP, /** uint32_t */
CL_ENGINE_BYTECODE_SECURITY, /** uint32_t */
CL_ENGINE_BYTECODE_TIMEOUT, /** uint32_t */
CL_ENGINE_BYTECODE_MODE, /** uint32_t */
CL_ENGINE_MAX_EMBEDDEDPE, /** uint64_t */
CL_ENGINE_MAX_HTMLNORMALIZE, /** uint64_t */
CL_ENGINE_MAX_HTMLNOTAGS, /** uint64_t */
CL_ENGINE_MAX_SCRIPTNORMALIZE, /** uint64_t */
CL_ENGINE_MAX_ZIPTYPERCG, /** uint64_t */
CL_ENGINE_FORCETODISK, /** uint32_t */
CL_ENGINE_CACHE_SIZE, /** uint32_t */
CL_ENGINE_DISABLE_CACHE, /** uint32_t */
CL_ENGINE_DISABLE_PE_STATS, /** uint32_t */
CL_ENGINE_STATS_TIMEOUT, /** uint32_t */
CL_ENGINE_MAX_PARTITIONS, /** uint32_t */
CL_ENGINE_MAX_ICONSPE, /** uint32_t */
CL_ENGINE_MAX_RECHWP3, /** uint32_t */
CL_ENGINE_MAX_SCANTIME, /** uint32_t */
CL_ENGINE_PCRE_MATCH_LIMIT, /** uint64_t */
CL_ENGINE_PCRE_RECMATCH_LIMIT, /** uint64_t */
CL_ENGINE_PCRE_MAX_FILESIZE, /** uint64_t */
CL_ENGINE_DISABLE_PE_CERTS, /** uint32_t */
CL_ENGINE_PE_DUMPCERTS, /** uint32_t */
CL_ENGINE_CVDCERTSDIR, /** (char *) */
CL_ENGINE_TMPDIR_RECURSION, /** uint32_t */
FIPS & FIPS-like limits on hash algs for cryptographic uses ClamAV will not function when using a FIPS-enabled OpenSSL 3.x. This is because ClamAV uses MD5 and SHA1 algorithms for a variety of purposes including matching for malware detection, matching to prevent false positives on known-clean files, and for verification of MD5-based RSA digital signatures for determining CVD (signature database archive) authenticity. Interestingly, FIPS had been intentionally bypassed when creating hashes based whole buffers and whole files (by descriptor or `FILE`-pointer): https://github.com/Cisco-Talos/clamav/commit/78d4a9985a06a418dd1338c94ee5db461035d75b Note: this bypassed FIPS the 1.x way with: `EVP_MD_CTX_set_flags(ctx, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);` It was NOT disabled when using `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. That likely worked by coincidence in that the hash was already calculated most of the time. It certainly would have made use of those functions if the hash had not been calculated prior: https://github.com/Cisco-Talos/clamav/blob/78d4a9985a06a418dd1338c94ee5db461035d75b/libclamav/matcher.c#L743 Regardless, bypassing FIPS entirely is not the correct solution. The FIPS restrictions against using MD5 and SHA1 are valid, particularly when verifying CVD digital siganatures, but also I think when using a hash to determine if the file is known-clean (i.e. the "clean cache" and also MD5-based and SHA1-based FP signatures). This commit extends the work to bypass FIPS using the newer 3.x method: `md = EVP_MD_fetch(NULL, alg, "-fips");` It does this for the legacy `cl_hash*()` functions including `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. It also introduces extended versions that allow the caller to choose if they want to bypass FIPS: - `cl_hash_data_ex()` - `cl_hash_init_ex()` - `cl_update_hash_ex()` - `cl_finish_hash_ex()` - `cl_hash_destroy_ex()` - `cl_hash_file_fd_ex()` See the `flags` parameter for each. Ironically, this commit does NOT use the new functions at this time. The rational is that ClamAV may need MD5, SHA1, and SHA-256 hashes of the same files both for determining if the file is malware, and for determining if the file is clean. So instead, this commit will do a checks when: 1. Creating a new ClamAV scanning engine. If FIPS-mode enabled, it will automatically toggle the "FIPS limits" engine option. When loading signatures, if the engine "FIPS limits" option is enabled, then MD5 and SHA1 FP signatures will be skipped. 2. Before verifying a CVD (e.g. also for loading, unpacking when verification enabled). If "FIPS limits" or FIPS-mode are enabled, then the legacy MD5-based RSA method is disabled. Note: This commit also refactors the interface for `cl_cvdverify_ex()` and `cl_cvdunpack_ex()` so they take a `flags` parameters, rather than a single `bool`. As these functions are new in this version, it does not break the ABI. The cache was already switched to use SHA2-256, so that's not a concern for checking FIPS-mode / FIPS limits options. This adds an option for `freshclam.conf` and `clamd.conf`: FIPSCryptoHashLimits yes And an equivalent command-line option for `clamscan` and `sigtool`: --fips-limits You may programmatically enable FIPS-limits for a ClamAV engine like this: ```C cl_engine_set_num(engine, CL_ENGINE_FIPS_LIMITS, 1); ``` CLAM-2792
2025-07-01 20:41:47 -04:00
CL_ENGINE_FIPS_LIMITS, /** uint32_t */
2010-01-22 14:36:56 +02:00
};
enum bytecode_security {
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
CL_BYTECODE_TRUST_ALL = 0, /** @deprecated obsolete */
CL_BYTECODE_TRUST_SIGNED, /** default */
CL_BYTECODE_TRUST_NOTHING /** paranoid setting */
};
enum bytecode_mode {
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
CL_BYTECODE_MODE_AUTO = 0, /** JIT if possible, fallback to interpreter */
CL_BYTECODE_MODE_JIT, /** force JIT */
CL_BYTECODE_MODE_INTERPRETER, /** force interpreter */
CL_BYTECODE_MODE_TEST, /** both JIT and interpreter, compare results, all failures are fatal */
CL_BYTECODE_MODE_OFF /** for query only, not settable */
};
struct cli_section_hash {
unsigned char md5[16];
size_t len;
};
typedef struct cli_stats_sections {
size_t nsections;
struct cli_section_hash *sections;
} stats_section_t;
/**
* @brief Set a numerical engine option.
*
Record names of extracted files A way is needed to record scanned file names for two purposes: 1. File names (and extensions) must be stored in the json metadata properties recorded when using the --gen-json clamscan option. Future work may use this to compare file extensions with detected file types. 2. File names are useful when interpretting tmp directory output when using the --leave-temps option. This commit enables file name retention for later use by storing file names in the fmap header structure, if a file name exists. To store the names in fmaps, an optional name argument has been added to any internal scan API's that create fmaps and every call to these APIs has been modified to pass a file name or NULL if a file name is not required. The zip and gpt parsers required some modification to record file names. The NSIS and XAR parsers fail to collect file names at all and will require future work to support file name extraction. Also: - Added recursive extraction to the tmp directory when the --leave-temps option is enabled. When not enabled, the tmp directory structure remains flat so as to prevent the likelihood of exceeding MAX_PATH. The current tmp directory is stored in the scan context. - Made the cli_scanfile() internal API non-static and added it to scanners.h so it would be accessible outside of scanners.c in order to remove code duplication within libmspack.c. - Added function comments to scanners.h and matcher.h - Converted a TDB-type macros and LSIG-type macros to enums for improved type safey. - Converted more return status variables from `int` to `cl_error_t` for improved type safety, and corrected ooxml file typing functions so they use `cli_file_t` exclusively rather than mixing types with `cl_error_t`. - Restructured the magic_scandesc() function to use goto's for error handling and removed the early_ret_from_magicscan() macro and magic_scandesc_cleanup() function. This makes the code easier to read and made it easier to add the recursive tmp directory cleanup to magic_scandesc(). - Corrected zip, egg, rar filename extraction issues. - Removed use of extra sub-directory layer for zip, egg, and rar file extraction. For Zip, this also involved changing the extracted filenames to be randomly generated rather than using the "zip.###" file name scheme.
2020-03-19 21:23:54 -04:00
* Caution: changing options for an engine that is in-use is not thread-safe!
*
* @param engine An initialized scan engine.
* @param cl_engine_field A CL_ENGINE option.
* @param num The new engine option value.
* @return cl_error_t CL_SUCCESS if successfully set.
* @return cl_error_t CL_EARG if the field number was incorrect.
* @return cl_error_t CL_ENULLARG null arguments were provided.
*/
bb12506: Fix phishing/heuristic alert verbosity Some detections, like phishing, are considered heuristic alerts because they match based on behavior more than on content. A subset of these are considered "potentially unwanted" (low-severity). These low-severity alerts include: - phishing - PDFs with obfuscated object names - bytecode signature alerts that start with "BC.Heuristics" The concept is that unless you enable "heuristic precedence" (a method of lowing the threshold to immediateley alert on low-severity detections), the scan should continue after a match in case a higher severity match is found. Only at the end will it print the low-severity match if nothing else was found. The current implementation is buggy though. Scanning of archives does not correctly bail out for the entire archive if one email contains a phishing link. Instead, it sets the "heuristic found" flag then and alerts for every subsequent file in the archive because it doesn't know if the heuristic was found in an embedded file or the target file. Because it's just a heuristic and the status is "clean", it keeps scanning. This patch corrects the behavior by checking if a low-severity alerts were found at the end of scanning the target file, instead of at the end of each embedded file. Additionally, this patch fixes an in issue with phishing alerts wherein heuristic precedence mode did not cause a scan to stop after the first alert. The above changes required restructuring to create an fmap inside of cl_scandesc_callback() so that scan_common() could be modified to require an fmap and set up so that the current *ctx->fmap pointer is never NULL when scan_common() evaluates match results. Also fixed a couple minor bugs in the phishing unit tests and cleaned up the test code for improved legitibility and type safety.
2020-04-18 10:46:57 -04:00
extern cl_error_t cl_engine_set_num(struct cl_engine *engine, enum cl_engine_field field, long long num);
/**
* @brief Get a numerical engine option.
*
* @param engine An initialized scan engine.
* @param cl_engine_field A CL_ENGINE option.
* @param err (optional) A cl_error_t status code.
* @return long long The numerical option value.
*/
2009-03-12 15:23:20 +00:00
extern long long cl_engine_get_num(const struct cl_engine *engine, enum cl_engine_field field, int *err);
/**
* @brief Set a string engine option.
*
Record names of extracted files A way is needed to record scanned file names for two purposes: 1. File names (and extensions) must be stored in the json metadata properties recorded when using the --gen-json clamscan option. Future work may use this to compare file extensions with detected file types. 2. File names are useful when interpretting tmp directory output when using the --leave-temps option. This commit enables file name retention for later use by storing file names in the fmap header structure, if a file name exists. To store the names in fmaps, an optional name argument has been added to any internal scan API's that create fmaps and every call to these APIs has been modified to pass a file name or NULL if a file name is not required. The zip and gpt parsers required some modification to record file names. The NSIS and XAR parsers fail to collect file names at all and will require future work to support file name extraction. Also: - Added recursive extraction to the tmp directory when the --leave-temps option is enabled. When not enabled, the tmp directory structure remains flat so as to prevent the likelihood of exceeding MAX_PATH. The current tmp directory is stored in the scan context. - Made the cli_scanfile() internal API non-static and added it to scanners.h so it would be accessible outside of scanners.c in order to remove code duplication within libmspack.c. - Added function comments to scanners.h and matcher.h - Converted a TDB-type macros and LSIG-type macros to enums for improved type safey. - Converted more return status variables from `int` to `cl_error_t` for improved type safety, and corrected ooxml file typing functions so they use `cli_file_t` exclusively rather than mixing types with `cl_error_t`. - Restructured the magic_scandesc() function to use goto's for error handling and removed the early_ret_from_magicscan() macro and magic_scandesc_cleanup() function. This makes the code easier to read and made it easier to add the recursive tmp directory cleanup to magic_scandesc(). - Corrected zip, egg, rar filename extraction issues. - Removed use of extra sub-directory layer for zip, egg, and rar file extraction. For Zip, this also involved changing the extracted filenames to be randomly generated rather than using the "zip.###" file name scheme.
2020-03-19 21:23:54 -04:00
* If the string option has already been set, the existing string will be free'd
* and the new string will replace it.
*
* Caution: changing options for an engine that is in-use is not thread-safe!
*
* @param engine An initialized scan engine.
* @param cl_engine_field A CL_ENGINE option.
* @param str The new engine option value.
* @return cl_error_t CL_SUCCESS if successfully set.
* @return cl_error_t CL_EARG if the field number was incorrect.
* @return cl_error_t CL_EMEM if a memory allocation error occurred.
* @return cl_error_t CL_ENULLARG null arguments were provided.
*/
bb12506: Fix phishing/heuristic alert verbosity Some detections, like phishing, are considered heuristic alerts because they match based on behavior more than on content. A subset of these are considered "potentially unwanted" (low-severity). These low-severity alerts include: - phishing - PDFs with obfuscated object names - bytecode signature alerts that start with "BC.Heuristics" The concept is that unless you enable "heuristic precedence" (a method of lowing the threshold to immediateley alert on low-severity detections), the scan should continue after a match in case a higher severity match is found. Only at the end will it print the low-severity match if nothing else was found. The current implementation is buggy though. Scanning of archives does not correctly bail out for the entire archive if one email contains a phishing link. Instead, it sets the "heuristic found" flag then and alerts for every subsequent file in the archive because it doesn't know if the heuristic was found in an embedded file or the target file. Because it's just a heuristic and the status is "clean", it keeps scanning. This patch corrects the behavior by checking if a low-severity alerts were found at the end of scanning the target file, instead of at the end of each embedded file. Additionally, this patch fixes an in issue with phishing alerts wherein heuristic precedence mode did not cause a scan to stop after the first alert. The above changes required restructuring to create an fmap inside of cl_scandesc_callback() so that scan_common() could be modified to require an fmap and set up so that the current *ctx->fmap pointer is never NULL when scan_common() evaluates match results. Also fixed a couple minor bugs in the phishing unit tests and cleaned up the test code for improved legitibility and type safety.
2020-04-18 10:46:57 -04:00
extern cl_error_t cl_engine_set_str(struct cl_engine *engine, enum cl_engine_field field, const char *str);
/**
* @brief Get a string engine option.
*
* @param engine An initialized scan engine.
* @param cl_engine_field A CL_ENGINE option.
* @param err (optional) A cl_error_t status code.
* @return const char * The string option value.
*/
2009-03-12 15:23:20 +00:00
extern const char *cl_engine_get_str(const struct cl_engine *engine, enum cl_engine_field field, int *err);
/**
* @brief Copy the settings from an existing scan engine.
*
* The cl_settings pointer is allocated and must be freed with cl_engine_settings_free().
*
* @param engine An configured scan engine.
* @return struct cl_settings* The settings.
*/
extern struct cl_settings *cl_engine_settings_copy(const struct cl_engine *engine);
/**
* @brief Apply settings from a settings structure to a scan engine.
*
Record names of extracted files A way is needed to record scanned file names for two purposes: 1. File names (and extensions) must be stored in the json metadata properties recorded when using the --gen-json clamscan option. Future work may use this to compare file extensions with detected file types. 2. File names are useful when interpretting tmp directory output when using the --leave-temps option. This commit enables file name retention for later use by storing file names in the fmap header structure, if a file name exists. To store the names in fmaps, an optional name argument has been added to any internal scan API's that create fmaps and every call to these APIs has been modified to pass a file name or NULL if a file name is not required. The zip and gpt parsers required some modification to record file names. The NSIS and XAR parsers fail to collect file names at all and will require future work to support file name extraction. Also: - Added recursive extraction to the tmp directory when the --leave-temps option is enabled. When not enabled, the tmp directory structure remains flat so as to prevent the likelihood of exceeding MAX_PATH. The current tmp directory is stored in the scan context. - Made the cli_scanfile() internal API non-static and added it to scanners.h so it would be accessible outside of scanners.c in order to remove code duplication within libmspack.c. - Added function comments to scanners.h and matcher.h - Converted a TDB-type macros and LSIG-type macros to enums for improved type safey. - Converted more return status variables from `int` to `cl_error_t` for improved type safety, and corrected ooxml file typing functions so they use `cli_file_t` exclusively rather than mixing types with `cl_error_t`. - Restructured the magic_scandesc() function to use goto's for error handling and removed the early_ret_from_magicscan() macro and magic_scandesc_cleanup() function. This makes the code easier to read and made it easier to add the recursive tmp directory cleanup to magic_scandesc(). - Corrected zip, egg, rar filename extraction issues. - Removed use of extra sub-directory layer for zip, egg, and rar file extraction. For Zip, this also involved changing the extracted filenames to be randomly generated rather than using the "zip.###" file name scheme.
2020-03-19 21:23:54 -04:00
* Caution: changing options for an engine that is in-use is not thread-safe!
*
* @param engine A scan engine.
* @param settings The settings.
* @return cl_error_t CL_SUCCESS if successful.
* @return cl_error_t CL_EMEM if a memory allocation error occurred.
*/
bb12506: Fix phishing/heuristic alert verbosity Some detections, like phishing, are considered heuristic alerts because they match based on behavior more than on content. A subset of these are considered "potentially unwanted" (low-severity). These low-severity alerts include: - phishing - PDFs with obfuscated object names - bytecode signature alerts that start with "BC.Heuristics" The concept is that unless you enable "heuristic precedence" (a method of lowing the threshold to immediateley alert on low-severity detections), the scan should continue after a match in case a higher severity match is found. Only at the end will it print the low-severity match if nothing else was found. The current implementation is buggy though. Scanning of archives does not correctly bail out for the entire archive if one email contains a phishing link. Instead, it sets the "heuristic found" flag then and alerts for every subsequent file in the archive because it doesn't know if the heuristic was found in an embedded file or the target file. Because it's just a heuristic and the status is "clean", it keeps scanning. This patch corrects the behavior by checking if a low-severity alerts were found at the end of scanning the target file, instead of at the end of each embedded file. Additionally, this patch fixes an in issue with phishing alerts wherein heuristic precedence mode did not cause a scan to stop after the first alert. The above changes required restructuring to create an fmap inside of cl_scandesc_callback() so that scan_common() could be modified to require an fmap and set up so that the current *ctx->fmap pointer is never NULL when scan_common() evaluates match results. Also fixed a couple minor bugs in the phishing unit tests and cleaned up the test code for improved legitibility and type safety.
2020-04-18 10:46:57 -04:00
extern cl_error_t cl_engine_settings_apply(struct cl_engine *engine, const struct cl_settings *settings);
/**
* @brief Free a settings struct pointer.
*
* @param settings The settings struct pointer.
* @return cl_error_t CL_SUCCESS if successful.
* @return cl_error_t CL_ENULLARG null arguments were provided.
*/
bb12506: Fix phishing/heuristic alert verbosity Some detections, like phishing, are considered heuristic alerts because they match based on behavior more than on content. A subset of these are considered "potentially unwanted" (low-severity). These low-severity alerts include: - phishing - PDFs with obfuscated object names - bytecode signature alerts that start with "BC.Heuristics" The concept is that unless you enable "heuristic precedence" (a method of lowing the threshold to immediateley alert on low-severity detections), the scan should continue after a match in case a higher severity match is found. Only at the end will it print the low-severity match if nothing else was found. The current implementation is buggy though. Scanning of archives does not correctly bail out for the entire archive if one email contains a phishing link. Instead, it sets the "heuristic found" flag then and alerts for every subsequent file in the archive because it doesn't know if the heuristic was found in an embedded file or the target file. Because it's just a heuristic and the status is "clean", it keeps scanning. This patch corrects the behavior by checking if a low-severity alerts were found at the end of scanning the target file, instead of at the end of each embedded file. Additionally, this patch fixes an in issue with phishing alerts wherein heuristic precedence mode did not cause a scan to stop after the first alert. The above changes required restructuring to create an fmap inside of cl_scandesc_callback() so that scan_common() could be modified to require an fmap and set up so that the current *ctx->fmap pointer is never NULL when scan_common() evaluates match results. Also fixed a couple minor bugs in the phishing unit tests and cleaned up the test code for improved legitibility and type safety.
2020-04-18 10:46:57 -04:00
extern cl_error_t cl_engine_settings_free(struct cl_settings *settings);
/**
* @brief Prepare the scanning engine.
*
* Call this after all required databases have been loaded and settings have
* been applied.
*
* @param engine A scan engine.
* @return cl_error_t CL_SUCCESS if successful.
* @return cl_error_t CL_ENULLARG null arguments were provided.
*/
bb12506: Fix phishing/heuristic alert verbosity Some detections, like phishing, are considered heuristic alerts because they match based on behavior more than on content. A subset of these are considered "potentially unwanted" (low-severity). These low-severity alerts include: - phishing - PDFs with obfuscated object names - bytecode signature alerts that start with "BC.Heuristics" The concept is that unless you enable "heuristic precedence" (a method of lowing the threshold to immediateley alert on low-severity detections), the scan should continue after a match in case a higher severity match is found. Only at the end will it print the low-severity match if nothing else was found. The current implementation is buggy though. Scanning of archives does not correctly bail out for the entire archive if one email contains a phishing link. Instead, it sets the "heuristic found" flag then and alerts for every subsequent file in the archive because it doesn't know if the heuristic was found in an embedded file or the target file. Because it's just a heuristic and the status is "clean", it keeps scanning. This patch corrects the behavior by checking if a low-severity alerts were found at the end of scanning the target file, instead of at the end of each embedded file. Additionally, this patch fixes an in issue with phishing alerts wherein heuristic precedence mode did not cause a scan to stop after the first alert. The above changes required restructuring to create an fmap inside of cl_scandesc_callback() so that scan_common() could be modified to require an fmap and set up so that the current *ctx->fmap pointer is never NULL when scan_common() evaluates match results. Also fixed a couple minor bugs in the phishing unit tests and cleaned up the test code for improved legitibility and type safety.
2020-04-18 10:46:57 -04:00
extern cl_error_t cl_engine_compile(struct cl_engine *engine);
/**
* @brief Add a reference count to the engine.
*
* Thread safety mechanism so that the engine is not free'd by another thread.
*
* The engine is initialized with refcount = 1, so this only needs to be called
* for additional scanning threads.
*
* @param engine A scan engine.
* @return cl_error_t CL_SUCCESS if successful.
* @return cl_error_t CL_ENULLARG null arguments were provided.
*/
bb12506: Fix phishing/heuristic alert verbosity Some detections, like phishing, are considered heuristic alerts because they match based on behavior more than on content. A subset of these are considered "potentially unwanted" (low-severity). These low-severity alerts include: - phishing - PDFs with obfuscated object names - bytecode signature alerts that start with "BC.Heuristics" The concept is that unless you enable "heuristic precedence" (a method of lowing the threshold to immediateley alert on low-severity detections), the scan should continue after a match in case a higher severity match is found. Only at the end will it print the low-severity match if nothing else was found. The current implementation is buggy though. Scanning of archives does not correctly bail out for the entire archive if one email contains a phishing link. Instead, it sets the "heuristic found" flag then and alerts for every subsequent file in the archive because it doesn't know if the heuristic was found in an embedded file or the target file. Because it's just a heuristic and the status is "clean", it keeps scanning. This patch corrects the behavior by checking if a low-severity alerts were found at the end of scanning the target file, instead of at the end of each embedded file. Additionally, this patch fixes an in issue with phishing alerts wherein heuristic precedence mode did not cause a scan to stop after the first alert. The above changes required restructuring to create an fmap inside of cl_scandesc_callback() so that scan_common() could be modified to require an fmap and set up so that the current *ctx->fmap pointer is never NULL when scan_common() evaluates match results. Also fixed a couple minor bugs in the phishing unit tests and cleaned up the test code for improved legitibility and type safety.
2020-04-18 10:46:57 -04:00
extern cl_error_t cl_engine_addref(struct cl_engine *engine);
/**
* @brief Free an engine.
*
* Will lower the reference count on an engine. If the reference count hits
* zero, the engine will be freed.
*
* @param engine A scan engine.
* @return cl_error_t CL_SUCCESS if successful.
* @return cl_error_t CL_ENULLARG null arguments were provided.
*/
bb12506: Fix phishing/heuristic alert verbosity Some detections, like phishing, are considered heuristic alerts because they match based on behavior more than on content. A subset of these are considered "potentially unwanted" (low-severity). These low-severity alerts include: - phishing - PDFs with obfuscated object names - bytecode signature alerts that start with "BC.Heuristics" The concept is that unless you enable "heuristic precedence" (a method of lowing the threshold to immediateley alert on low-severity detections), the scan should continue after a match in case a higher severity match is found. Only at the end will it print the low-severity match if nothing else was found. The current implementation is buggy though. Scanning of archives does not correctly bail out for the entire archive if one email contains a phishing link. Instead, it sets the "heuristic found" flag then and alerts for every subsequent file in the archive because it doesn't know if the heuristic was found in an embedded file or the target file. Because it's just a heuristic and the status is "clean", it keeps scanning. This patch corrects the behavior by checking if a low-severity alerts were found at the end of scanning the target file, instead of at the end of each embedded file. Additionally, this patch fixes an in issue with phishing alerts wherein heuristic precedence mode did not cause a scan to stop after the first alert. The above changes required restructuring to create an fmap inside of cl_scandesc_callback() so that scan_common() could be modified to require an fmap and set up so that the current *ctx->fmap pointer is never NULL when scan_common() evaluates match results. Also fixed a couple minor bugs in the phishing unit tests and cleaned up the test code for improved legitibility and type safety.
2020-04-18 10:46:57 -04:00
extern cl_error_t cl_engine_free(struct cl_engine *engine);
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
/* ----------------------------------------------------------------------------
* ClamAV File Map Abstraction Public API.
*/
struct cl_fmap;
typedef struct cl_fmap cl_fmap_t;
/**
* @brief Read callback function type.
*
* A callback function pointer type for reading data from a cl_fmap_t that uses
* reads data from a handle interface.
*
* Read 'count' bytes starting at 'offset' into the buffer 'buf'
*
* Thread safety: It is guaranteed that only one callback is executing for a
* specific handle at any time, but there might be multiple callbacks executing
* for different handle at the same time.
*
* @param handle The handle passed to cl_fmap_open_handle, its meaning is up
* to the callback's implementation
* @param buf A buffer to read data into, must be at least offset + count
* bytes in size.
* @param count The number of bytes to read.
* @param offset The offset into buf to read the data to. If successful,
* the number of bytes actually read is returned. Upon reading
* end-of-file, zero is returned. Otherwise, a -1 is returned
* and the global variable errno is set to indicate the error.
*/
typedef off_t (*clcb_pread)(void *handle, void *buf, size_t count, off_t offset);
/**
* @brief Open a map given a handle.
*
* Open a map for scanning custom data accessed by a handle and pread (lseek +
* read)-like interface. For example a file descriptor or a WIN32 HANDLE.
* By default fmap will use aging to discard old data, unless you tell it not
* to.
*
* The handle will be passed to the callback each time.
*
* TIP: Check the size limits before calling this function so you don't map a
* file that is larger than the maximum size the engine is configured to scan.
*
* Note that the offset and len is for the start through the end of an _entire_
* file, which may be contained within another larger file.
* You can't give it parts of a file and expect detection to work.
*
* @param handle A handle that may be accessed using lseek + read.
* @param offset Initial offset to start scanning.
* @param len Length of the data from the start (not the offset).
* @param pread_cb A callback function to read data from the handle.
* @param use_aging Set to a non-zero value to enable aging.
* @return cl_fmap_t* A map representing the handle interface.
*/
extern cl_fmap_t *cl_fmap_open_handle(
void *handle,
size_t offset,
size_t len,
clcb_pread pread_cb,
int use_aging);
/**
* @brief Open a map given a buffer.
*
* Open a map for scanning custom data, where the data is already in memory,
* either in the form of a buffer, a memory mapped file, etc.
*
* Note that the memory [start, start+len) must be the _entire_ file,
* you can't give it parts of a file and expect detection to work.
*
* @param start Pointer to a buffer of data.
* @param len Length in bytes of the data.
* @return cl_fmap_t* A map representing the buffer.
*/
extern cl_fmap_t *cl_fmap_open_memory(const void *start, size_t len);
/**
* @brief Set the utf8 name of a file map. E.g. "invoice.exe"
*
* The name is used for debugging and logging purposes.
* In a future release, the file extension may help determine refine file type
* detection. E.g. ".js" may indicate that something previously identified as
* a text file is more specifically a JavaScript file.
* That is - at this time setting the name will have no impact on detection,
* but it may in the future.
*
* @param map The file map to modify.
* @param name The new name for the file map.
* @return cl_error_t CL_SUCCESS if the name was set successfully.
*/
extern cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);
/**
* @brief Get the utf8 name of a file map.
*
* The name is used for debugging and logging purposes.
*
* @param map The file map to query.
* @param[out] name_out Pointer to a variable to receive the name of the file map.
* @return const char* The name of the file map, or NULL if not set.
*/
extern cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);
/**
* @brief Set the utf8 path of a file map. E.g. "/tmp/invoice.exe"
*
* This is used to set the actual file path of the mapped map.
* The path may be used for debugging or logging purposes.
*
* You might want to set the path if you opened the map with `cl_fmap_open_handle()`.
*
* @param map The file map to modify.
* @param path The new path for the file map.
* @return cl_error_t CL_SUCCESS if the path was set successfully.
*/
extern cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);
/**
* @brief Get the utf8 path of a file map.
*
* The path is used for debugging and logging purposes.
*
* This may be NULL even if there is a file descriptor associated with the map.
* There is no guarantee that the path was set by whomever created the map.
* For example, if the map was created from a file descriptor that was provided
* to clamd by clamonacc or clamdscan using fd-passing, then the path may not be
* known.
*
* This of course will be NULL if the map was created from a memory buffer.
*
* If you need a temp file for each file extracted, you can use ClamAV's
* force-to-disk feature. Note: There is no option to force-to-memory.
* Many of the parser modules will create temporary files no matter what.
*
* If the fmap represents a temp file created during the scan, then unless you
* use CL_ENGINE_KEEPTMP, it will be deleted when the scan is done with this
* layer.
*
* @param map The file map to query.
* @param[out] path_out Pointer to a variable to receive the path of the file map.
* @param[out] offset_out (optional) Pointer to a variable to receive the offset of the current layer within the given file.
* @param[out] len_out (optional) Pointer to a variable to receive the length of the current layer within the given file.
* @return cl_error_t CL_SUCCESS if the path was successfully retrieved.
* CL_EACCES if the map does not have a file descriptor.
* CL_ENULLARG if null arguments were provided.
*/
extern cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);
/**
* @brief Get the file descriptor of a file map.
*
* The file descriptor is used to access the file represented by the file map.
* If the file map was created from a file descriptor, this will return that
* descriptor. Otherwise, it will return -1.
*
* If you need a temp file for each file extracted, you can use ClamAV's
* force-to-disk feature. Note: There is no option to force-to-memory.
* Many of the parser modules will create temporary files no matter what.
*
* Don't close this file descriptor. Don't dup it either.
*
* @param map The file map to query.
* @param[out] fd_out Pointer to a variable to receive the file descriptor.
* @param[out] offset_out (optional) Pointer to a variable to receive the offset of the current layer within the given file.
* @param[out] len_out (optional) Pointer to a variable to receive the length of the current layer within the given file.
* @return cl_error_t CL_SUCCESS if the file descriptor was successfully retrieved.
* CL_EACCES if the map does not have a file descriptor.
* CL_ENULLARG if null arguments were provided.
*/
extern cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);
/**
* @brief Get the file size of the current layer from a file map.
*
* This function retrieves the size of the file represented by the file map.
* If the file map was created from a handle, it will use the pread callback
* to determine the size.
*
* @param map The file map to query.
* @param[out] size_out Pointer to a variable to receive the size of the file.
* @return cl_error_t CL_SUCCESS if the file size was successfully retrieved.
*/
extern cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);
/**
* @brief Set the file hash for a file map.
*
* This function sets the hash of the file represented by the file map.
* If a hash of the requested type has already been calculated, it will be
* overwritten with the new value.
*
* @param map The file map to modify.
* @param hash_alg The hash algorithm to use (e.g., "md5", "sha1", "sha2-256").
* @param hash The hash value to set.
* @return cl_error_t CL_SUCCESS if the hash was successfully set.
*/
extern cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);
/**
* @brief Check if we already calculated a file hash of a specific type.
*
* This function checks if the file represented by the file map has a hash
* of the requested type already calculated.
*
* @param map A file map.
* @param hash_alg The hash algorithm to check (e.g., "md5", "sha1", "sha2-256").
* @param[out] have_hash_out Pointer to a boolean that will be set to true if the hash exists, false otherwise.
* @return cl_error_t CL_SUCCESS if the check was successful.
*/
extern cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);
/**
* @brief Indicate that we will need a file hash of a specific type later.
*
* This function indicates that we will need the file hash of the requested
* type in the future. The next time a hash is calculated, (e.g. when you run
* `cl_fmap_get_hash()`, it will also calculate the requested hash type.
*
* This is an optimization to avoid iterating over the file contents multiple times
* which might happen if you call `cl_fmap_get_hash()` without calling this first.
*
* @param map A file map.
* @param hash_alg The hash algorithm to indicate (e.g., "md5", "sha1", "sha2-256").
* @return cl_error_t CL_SUCCESS if the indication was successful.
*/
extern cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);
/**
* @brief Get the file hash from a file map.
*
* This function retrieves the hash of the file represented by the file map.
* If a hash of the requested type has NOT already been calculated then it will
* be calculated when you call this function, and stored for future use.
*
* You are responsible for freeing the hash string when you're done with it.
*
* @param map A file map.
* @param hash_alg The hash algorithm to use (e.g., "md5", "sha1", "sha2-256").
* @param[out] hash_out Malloced string containing the hash value.
* @return cl_error_t CL_SUCCESS if the hash was successfully retrieved.
*/
extern cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, char **hash_out);
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
/**
* @brief Get the file contents from a file map.
*
* This function will get you a pointer to the contents of the file represented
* by the file map. This is read-only and is not malloced.
* If you want a copy to keep, you must copy it yourself.
*
* Use the offset and length parameters to specify the range of the file you
* want to retrieve. If you don't need the whole file, don't ask for the whole file.
*
* @param map A file map.
* @param offset The offset in the file from which to access.
* @param len The length of the data to read.
* If 0, then the rest of the file will be provided.
* @param[out] data_out A valid pointer to data file contents. Will be NULL if failed.
* @param[out] data_len_out The length of the file contents. May be less than requested if the end of file is reached.
* @return cl_error_t CL_SUCCESS if the contents were successfully retrieved.
*/
extern cl_error_t cl_fmap_get_data(
const cl_fmap_t *map,
size_t offset,
size_t len,
const uint8_t **data_out,
size_t *data_len_out);
/**
* @brief Releases resources associated with the map.
*
* You must release any resources you hold only after (handles, maps) calling
* this function.
*
* @param map Map to be closed.
*/
extern void cl_fmap_close(cl_fmap_t *);
/* ----------------------------------------------------------------------------
* ClamAV Recursive Scan Layer Public API.
*/
struct cl_scan_layer;
typedef struct cl_scan_layer cl_scan_layer_t;
/**
* @brief Get the file map associated with a scan layer.
*
* @param layer The scan layer to query.
* @param[out] fmap_out Pointer to a variable to receive the file map.
* @return cl_error_t CL_SUCCESS if successful.
*/
extern cl_error_t cl_scan_layer_get_fmap(
cl_scan_layer_t *layer,
cl_fmap_t **fmap_out);
/**
* @brief Get the parent layer of a scan layer.
*
* You may use this in a loop/recursively to walk the scan layers.
* Also consider using `cl_scan_layer_get_recursion_level()` to determine the
* depth of the current layer.
*
* @param layer The scan layer to query.
* @param[out] parent_layer_out Pointer to a variable to receive the parent layer.
* Will be NULL if the layer has no parent.
* For example, the root layer has no parent.
* @return cl_error_t CL_SUCCESS if successful.
*/
extern cl_error_t cl_scan_layer_get_parent_layer(
cl_scan_layer_t *layer,
cl_scan_layer_t **parent_layer_out);
/**
* @brief Get the file type of a scan layer.
*
* The file type as clamav currently believes it to be.
* It may change later in the scan, so consider using `clcb_file_type_correction`
* callback to access the file again if it is re-typed.
*
* @param layer The scan layer to query.
* @param[out] type_out Pointer to a variable to receive the file type.
* This is a static reference and must not be freed.
* @return cl_error_t CL_SUCCESS if successful.
*/
extern cl_error_t cl_scan_layer_get_type(
cl_scan_layer_t *layer,
const char **type_out);
/**
* @brief Get the recursion level of a scan layer.
*
* @param layer The scan layer to query.
* @param[out] recursion_level_out Pointer to a variable to receive the recursion level.
* @return cl_error_t CL_SUCCESS if successful.
*/
extern cl_error_t cl_scan_layer_get_recursion_level(
cl_scan_layer_t *layer,
uint32_t *recursion_level_out);
/**
* @brief Get the object ID of a scan layer.
*
* Object ID is a unique identifier for the scan layer. It counts up from 0, although the callback interface
* may skip some IDs if the scan layer is processed immediately rather than being handled as distinct file type.
* For example, HTML may be normalized several ways and they're each given an Object ID, but we immediately
* pattern match them and do not handle them as distinct file types that were contained within the HTML.
*
* @param layer The scan layer to query.
* @param[out] object_id_out Pointer to a variable to receive the object ID.
* @return cl_error_t CL_SUCCESS if successful.
*/
extern cl_error_t cl_scan_layer_get_object_id(
cl_scan_layer_t *layer,
uint64_t *object_id_out);
/**
* @brief Get the last detected alert name from a scan layer.
*
* Do not free the alert name, and make a copy if you need one.
*
* @param layer The scan layer to query.
* @param[out] alert_name_out Pointer to a variable to receive the alert name.
* If the layer has no alerts, this will be set to NULL.
* @return cl_error_t CL_SUCCESS if successful.
*/
extern cl_error_t cl_scan_layer_get_last_alert(
cl_scan_layer_t *layer,
const char **alert_name_out);
/*
* Attributes of each layer in scan.
*/
#define LAYER_ATTRIBUTES_NONE 0x0 /**< No attributes set. */
#define LAYER_ATTRIBUTES_NORMALIZED 0x1 /**< This layer was modified to make matching more generic, reliable. */
#define LAYER_ATTRIBUTES_DECRYPTED 0x2 /**< Decryption was used to extract this layer. \
* E.g. ClamAV was able to decrypt an encrypted file entry in the parent layer. */
#define LAYER_ATTRIBUTES_RETYPED 0x4 /**< This layer is a re-scan of the parent layer but as a new type (e.g. using HandlerType). */
#define LAYER_ATTRIBUTES_EMBEDDED 0x8 /**< This layer was found within the parent layer using FTM signatures \
* (e.g. like ZIP entries found within an executable). */
/**
* @brief Get the attributes of a scan layer.
*
* @param layer The scan layer to query.
* @param[out] attributes_out Pointer to a variable to receive the layer attributes.
* @return cl_error_t CL_SUCCESS if successful.
*/
extern cl_error_t cl_scan_layer_get_attributes(
cl_scan_layer_t *layer,
uint32_t *attributes_out);
/* ----------------------------------------------------------------------------
* Callback function type definitions.
*/
2011-06-14 17:00:06 +02:00
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
typedef enum scan_callback {
/** Pre-hash
*
* Occurs just after basic file-type detection and before any hashes have been calculated either for the cache or
* the gen-json metadata.
* If you want any hashes other than SHA2-256, the most efficient option is to run the
* `cl_fmap_will_need_hash_later()` function in this callback for each and then run the `cl_fmap_get_hash()`
* function afterwards (now or later) to gather the hashes.
*
* Using this callback will appear in-order for ClamAV's depth-first approach.
*/
CL_SCAN_CALLBACK_PRE_HASH,
/** Pre-scan
*
* Occurs before parser modules run and before pattern matching.
*
* Using this callback will appear in-order for ClamAV's depth-first approach.
*/
CL_SCAN_CALLBACK_PRE_SCAN,
/** Post-scan
*
* Occurs after pattern matching and after running parser modules (i.e. scan complete for this layer).
* This callback is useful for post-processing the results of the scan, but is the most likely of the callbacks to
* be skipped if something goes critically wrong or the hash for the layer appeared in the clean-cache.
*
* Using this callback will appear in REVERSE-order for ClamAV's depth-first approach.
*/
CL_SCAN_CALLBACK_POST_SCAN,
/** Alert
*
* Occurs each time an alert (detection) would be triggered during a scan.
* In all-match mode, you may receive multiple alerts for the same file, and even the same layer, corresponding with
* each signature that matched.
*/
CL_SCAN_CALLBACK_ALERT,
/** File type
*
* Occurs each time the file type determination is refined.
* This may happen more than once per LAYER!
*
* Outside of using this callback, The most accurate time to check the file type would be:
* - For each layer: Use `cl_scan_layer_get_type()` in the `CL_SCAN_CALLBACK_POST_SCAN` callback.
* - For just the top layer: Use the `file_type_out` parameter provided by the `cl_scan*ex()` functions.
*/
CL_SCAN_CALLBACK_FILE_TYPE
} cl_scan_callback_t;
/**
* @brief Callback interface to get access to the current layer using the scan-
* layer abstraction. This grants access to file content and attributes as well
* as those of each ancestor layers (if any).
*
* Called for each processed file including both the top level file (i.e. the
* zeroeth layer) and all contained files (recursively).
*
* @param layer Scan layer (abstraction) for the current layer being scanned.
* Use the `cl_scan_layer_*` functions to access layer data and metadata.
* You may want to use `cl_scan_layer_get_fmap()` to get the file map for the current layer.
* You may also use it to access ancestor layers using `cl_scan_layer_get_parent_layer()`.
*
* @param context The application context pointer passed in to the `cl_scan*()` function.
*
* @return CL_BREAK
*
* Scan aborted by callback (the rest of the scan is skipped).
* This does not mark the file as clean or infected, it just skips the rest of the scan.
*
* @return CL_SUCCESS / CL_CLEAN
*
* File scan will continue.
*
* For CL_SCAN_CALLBACK_ALERT: Means you want to ignore this specific alert and keep scanning.
* This is different than CL_VERIFIED because it does not affect prior or future alerts.
* Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip
* the rest of the scan for this layer.
*
* @return CL_VIRUS
*
* This will mark the file as infected. A new alert will be added.
*
* For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed).
* Remember that CL_SUCCESS means you want to ignore the alert.
*
* @return CL_VERIFIED
*
* Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer.
* You might want to do this if you trust the hash or verified a digital signature.
* The rest of the scan will be skipped FOR THIS layer.
* For contained files, this does NOT mean that the parent or adjacent layers are trusted.
*/
typedef cl_error_t (*clcb_scan)(cl_scan_layer_t *layer, void *context);
/**
* @brief Set a callback function using the clcb_scan callback type.
*
* Select the callback location using the `cl_scan_callback_t` enum.
*
* Caution: changing options for an engine that is in-use is not thread-safe!
*
* @param engine The initialized scanning engine.
* @param callback The callback function pointer.
* @param location The location of the callback.
*/
extern void cl_engine_set_scan_callback(struct cl_engine *engine, clcb_scan callback, cl_scan_callback_t location);
/**
* @brief Pre-cache callback.
*
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
* @deprecated This function is deprecated and will be removed in a future release.
* Use `CL_SCAN_CALLBACK_PRE_HASH` with `cl_engine_set_scan_callback()` instead.
*
* Called for each processed file (both the entry level - AKA 'outer' - file and
* inner files - those generated when processing archive and container files), before
* the actual scanning takes place.
*
* @param fd File descriptor which is about to be scanned.
* @param type File type detected via magic - i.e. NOT on the fly - (e.g. "CL_TYPE_MSEXE").
* @param context Opaque application provided data.
* @return CL_CLEAN = File is scanned.
* @return CL_BREAK = Allowed by callback - file is skipped and marked as clean.
* @return CL_VIRUS = Blocked by callback - file is skipped and marked as infected.
*/
2011-06-14 17:00:06 +02:00
typedef cl_error_t (*clcb_pre_cache)(int fd, const char *type, void *context);
/**
* @brief Set a custom pre-cache callback function.
*
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
* @deprecated This function is deprecated and will be removed in a future release.
* Use `CL_SCAN_CALLBACK_PRE_HASH` with `cl_engine_set_scan_callback()` instead.
*
Record names of extracted files A way is needed to record scanned file names for two purposes: 1. File names (and extensions) must be stored in the json metadata properties recorded when using the --gen-json clamscan option. Future work may use this to compare file extensions with detected file types. 2. File names are useful when interpretting tmp directory output when using the --leave-temps option. This commit enables file name retention for later use by storing file names in the fmap header structure, if a file name exists. To store the names in fmaps, an optional name argument has been added to any internal scan API's that create fmaps and every call to these APIs has been modified to pass a file name or NULL if a file name is not required. The zip and gpt parsers required some modification to record file names. The NSIS and XAR parsers fail to collect file names at all and will require future work to support file name extraction. Also: - Added recursive extraction to the tmp directory when the --leave-temps option is enabled. When not enabled, the tmp directory structure remains flat so as to prevent the likelihood of exceeding MAX_PATH. The current tmp directory is stored in the scan context. - Made the cli_scanfile() internal API non-static and added it to scanners.h so it would be accessible outside of scanners.c in order to remove code duplication within libmspack.c. - Added function comments to scanners.h and matcher.h - Converted a TDB-type macros and LSIG-type macros to enums for improved type safey. - Converted more return status variables from `int` to `cl_error_t` for improved type safety, and corrected ooxml file typing functions so they use `cli_file_t` exclusively rather than mixing types with `cl_error_t`. - Restructured the magic_scandesc() function to use goto's for error handling and removed the early_ret_from_magicscan() macro and magic_scandesc_cleanup() function. This makes the code easier to read and made it easier to add the recursive tmp directory cleanup to magic_scandesc(). - Corrected zip, egg, rar filename extraction issues. - Removed use of extra sub-directory layer for zip, egg, and rar file extraction. For Zip, this also involved changing the extracted filenames to be randomly generated rather than using the "zip.###" file name scheme.
2020-03-19 21:23:54 -04:00
* Caution: changing options for an engine that is in-use is not thread-safe!
*
* @param engine The initialized scanning engine.
* @param callback The callback function pointer.
*/
2011-06-15 04:50:54 +02:00
extern void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);
2011-06-14 17:00:06 +02:00
/**
* @brief File inspection callback.
*
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
* @deprecated This function is deprecated and will be removed in a future release.
* Use `CL_SCAN_CALLBACK_PRE_SCAN` with `cl_engine_set_scan_callback()` instead.
*
* You can use the `cl_fmap_*` functions to access the file map, file name, file size, file contents,
* and those of each ancestor layers (if any).
*
* Called for each NEW file (inner and outer).
* Provides capability to record embedded file information during a scan.
*
* @param fd Current file descriptor which is about to be scanned.
* @param type Current file type detected via magic - i.e. NOT on the fly - (e.g. "CL_TYPE_MSEXE").
* @param ancestors An array of ancestors filenames of size `recursion_level`. filenames may be NULL.
* @param parent_file_size Parent file size.
* @param file_name Current file name, or NULL if the file does not have a name or ClamAV failed to record the name.
* @param file_size Current file size.
* @param file_buffer Current file buffer pointer.
* @param recursion_level Recursion level / depth of the current file.
* @param layer_attributes See LAYER_ATTRIBUTES_* flags.
* @param context Opaque application provided data.
* @return CL_CLEAN = File is scanned.
* @return CL_BREAK = Whitelisted by callback - file is skipped and marked as clean.
* @return CL_VIRUS = Blacklisted by callback - file is skipped and marked as infected.
*/
typedef cl_error_t (*clcb_file_inspection)(
int fd,
const char *type,
const char **ancestors,
size_t parent_file_size,
const char *file_name,
size_t file_size,
const char *file_buffer,
uint32_t recursion_level,
uint32_t layer_attributes,
void *context);
/**
* @brief Set a custom file inspection callback function.
*
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
* @deprecated This function is deprecated and will be removed in a future release.
* Use `CL_SCAN_CALLBACK_PRE_SCAN` with `cl_engine_set_scan_callback()` instead.
*
* DISCLAIMER: This interface is to be considered unstable while we continue to evaluate it.
* We may change this interface in the future.
*
* Caution: changing options for an engine that is in-use is not thread-safe!
*
* @param engine The initialized scanning engine.
* @param callback The callback function pointer.
*/
extern void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);
/**
* @brief Pre-scan callback.
*
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
* @deprecated This function is deprecated and will be removed in a future release.
*
* Called for each NEW file (inner and outer) before the scanning takes place. This is
2023-11-26 15:01:19 -08:00
* roughly the same as clcb_before_cache, but it is affected by clean file caching.
* This means that it won't be called if a clean cached file (inner or outer) is
* scanned a second time.
*
* @param fd File descriptor which is about to be scanned.
* @param type File type detected via magic - i.e. NOT on the fly - (e.g. "CL_TYPE_MSEXE").
* @param context Opaque application provided data.
* @return CL_CLEAN = File is scanned.
* @return CL_BREAK = Allowed by callback - file is skipped and marked as clean.
* @return CL_VIRUS = Blocked by callback - file is skipped and marked as infected.
*/
2011-06-14 17:00:06 +02:00
typedef cl_error_t (*clcb_pre_scan)(int fd, const char *type, void *context);
/**
* @brief Set a custom pre-scan callback function.
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
* Use `CL_SCAN_CALLBACK_PRE_SCAN` with `cl_engine_set_scan_callback()` instead.
*
Record names of extracted files A way is needed to record scanned file names for two purposes: 1. File names (and extensions) must be stored in the json metadata properties recorded when using the --gen-json clamscan option. Future work may use this to compare file extensions with detected file types. 2. File names are useful when interpretting tmp directory output when using the --leave-temps option. This commit enables file name retention for later use by storing file names in the fmap header structure, if a file name exists. To store the names in fmaps, an optional name argument has been added to any internal scan API's that create fmaps and every call to these APIs has been modified to pass a file name or NULL if a file name is not required. The zip and gpt parsers required some modification to record file names. The NSIS and XAR parsers fail to collect file names at all and will require future work to support file name extraction. Also: - Added recursive extraction to the tmp directory when the --leave-temps option is enabled. When not enabled, the tmp directory structure remains flat so as to prevent the likelihood of exceeding MAX_PATH. The current tmp directory is stored in the scan context. - Made the cli_scanfile() internal API non-static and added it to scanners.h so it would be accessible outside of scanners.c in order to remove code duplication within libmspack.c. - Added function comments to scanners.h and matcher.h - Converted a TDB-type macros and LSIG-type macros to enums for improved type safey. - Converted more return status variables from `int` to `cl_error_t` for improved type safety, and corrected ooxml file typing functions so they use `cli_file_t` exclusively rather than mixing types with `cl_error_t`. - Restructured the magic_scandesc() function to use goto's for error handling and removed the early_ret_from_magicscan() macro and magic_scandesc_cleanup() function. This makes the code easier to read and made it easier to add the recursive tmp directory cleanup to magic_scandesc(). - Corrected zip, egg, rar filename extraction issues. - Removed use of extra sub-directory layer for zip, egg, and rar file extraction. For Zip, this also involved changing the extracted filenames to be randomly generated rather than using the "zip.###" file name scheme.
2020-03-19 21:23:54 -04:00
* Caution: changing options for an engine that is in-use is not thread-safe!
*
* @param engine The initialized scanning engine.
* @param callback The callback function pointer.
*/
2011-06-14 17:00:06 +02:00
extern void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);
2010-06-22 15:41:19 +02:00
/**
* @brief Post-scan callback.
*
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
* @deprecated This function is deprecated and will be removed in a future release.
* Use `CL_SCAN_CALLBACK_PRE_SCAN` with `cl_engine_set_scan_callback()` instead.
*
* Called for each processed file (inner and outer), after the scanning is complete.
* In all-match mode, the virname will be one of the matches, but there is no
* guarantee in which order the matches will occur, thus the final virname may
* be any one of the matches.
*
* @param fd File descriptor which was scanned.
* @param result The scan result for the file.
* @param virname A signature name if there was one or more matches.
* @param context Opaque application provided data.
* @return Scan result is not overridden.
* @return CL_BREAK = Allowed by callback - scan result is set to CL_CLEAN.
* @return Blocked by callback - scan result is set to CL_VIRUS.
*/
2010-07-07 03:01:55 +02:00
typedef cl_error_t (*clcb_post_scan)(int fd, int result, const char *virname, void *context);
/**
* @brief Set a custom post-scan callback function.
*
Record names of extracted files A way is needed to record scanned file names for two purposes: 1. File names (and extensions) must be stored in the json metadata properties recorded when using the --gen-json clamscan option. Future work may use this to compare file extensions with detected file types. 2. File names are useful when interpretting tmp directory output when using the --leave-temps option. This commit enables file name retention for later use by storing file names in the fmap header structure, if a file name exists. To store the names in fmaps, an optional name argument has been added to any internal scan API's that create fmaps and every call to these APIs has been modified to pass a file name or NULL if a file name is not required. The zip and gpt parsers required some modification to record file names. The NSIS and XAR parsers fail to collect file names at all and will require future work to support file name extraction. Also: - Added recursive extraction to the tmp directory when the --leave-temps option is enabled. When not enabled, the tmp directory structure remains flat so as to prevent the likelihood of exceeding MAX_PATH. The current tmp directory is stored in the scan context. - Made the cli_scanfile() internal API non-static and added it to scanners.h so it would be accessible outside of scanners.c in order to remove code duplication within libmspack.c. - Added function comments to scanners.h and matcher.h - Converted a TDB-type macros and LSIG-type macros to enums for improved type safey. - Converted more return status variables from `int` to `cl_error_t` for improved type safety, and corrected ooxml file typing functions so they use `cli_file_t` exclusively rather than mixing types with `cl_error_t`. - Restructured the magic_scandesc() function to use goto's for error handling and removed the early_ret_from_magicscan() macro and magic_scandesc_cleanup() function. This makes the code easier to read and made it easier to add the recursive tmp directory cleanup to magic_scandesc(). - Corrected zip, egg, rar filename extraction issues. - Removed use of extra sub-directory layer for zip, egg, and rar file extraction. For Zip, this also involved changing the extracted filenames to be randomly generated rather than using the "zip.###" file name scheme.
2020-03-19 21:23:54 -04:00
* Caution: changing options for an engine that is in-use is not thread-safe!
*
* @param engine The initialized scanning engine.
* @param callback The callback function pointer.
*/
2010-07-07 03:01:55 +02:00
extern void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);
2010-06-22 15:41:19 +02:00
/**
* @brief Virus-found callback.
*
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
* @deprecated This function is deprecated and will be removed in a future release.
* Use `CL_SCAN_CALLBACK_ALERT` with `cl_engine_set_scan_callback()` instead.
*
* Called for each signature match.
* If all-match is enabled, clcb_virus_found() may be called multiple times per
* scan.
*
* In addition, clcb_virus_found() does not have a return value and thus.
* can not be used to ignore the match.
*
* @param fd File descriptor which was scanned.
* @param virname Virus name.
* @param context Opaque application provided data.
*/
typedef void (*clcb_virus_found)(int fd, const char *virname, void *context);
/**
* @brief Set a custom virus-found callback function.
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
* Use `CL_SCAN_CALLBACK_ALERT` with `cl_engine_set_scan_callback()` instead.
*
Record names of extracted files A way is needed to record scanned file names for two purposes: 1. File names (and extensions) must be stored in the json metadata properties recorded when using the --gen-json clamscan option. Future work may use this to compare file extensions with detected file types. 2. File names are useful when interpretting tmp directory output when using the --leave-temps option. This commit enables file name retention for later use by storing file names in the fmap header structure, if a file name exists. To store the names in fmaps, an optional name argument has been added to any internal scan API's that create fmaps and every call to these APIs has been modified to pass a file name or NULL if a file name is not required. The zip and gpt parsers required some modification to record file names. The NSIS and XAR parsers fail to collect file names at all and will require future work to support file name extraction. Also: - Added recursive extraction to the tmp directory when the --leave-temps option is enabled. When not enabled, the tmp directory structure remains flat so as to prevent the likelihood of exceeding MAX_PATH. The current tmp directory is stored in the scan context. - Made the cli_scanfile() internal API non-static and added it to scanners.h so it would be accessible outside of scanners.c in order to remove code duplication within libmspack.c. - Added function comments to scanners.h and matcher.h - Converted a TDB-type macros and LSIG-type macros to enums for improved type safey. - Converted more return status variables from `int` to `cl_error_t` for improved type safety, and corrected ooxml file typing functions so they use `cli_file_t` exclusively rather than mixing types with `cl_error_t`. - Restructured the magic_scandesc() function to use goto's for error handling and removed the early_ret_from_magicscan() macro and magic_scandesc_cleanup() function. This makes the code easier to read and made it easier to add the recursive tmp directory cleanup to magic_scandesc(). - Corrected zip, egg, rar filename extraction issues. - Removed use of extra sub-directory layer for zip, egg, and rar file extraction. For Zip, this also involved changing the extracted filenames to be randomly generated rather than using the "zip.###" file name scheme.
2020-03-19 21:23:54 -04:00
* Caution: changing options for an engine that is in-use is not thread-safe!
*
* @param engine The initialized scanning engine.
* @param callback The callback function pointer.
*/
extern void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);
2010-06-22 15:41:19 +02:00
/**
* @brief Signature-load callback.
*
* May be used to ignore signatures at database load time.
*
* WARNING: Some signatures (notably ldb, cbc) can be dependent upon other signatures.
* Failure to preserve dependency chains will result in database loading failure.
* It is the implementor's responsibility to guarantee consistency.
*
* @param type The signature type (e.g. "db", "ndb", "mdb", etc.)
* @param name Signature name.
* @param custom The signature is official (custom == 0) or custom (custom != 0)
* @param context Opaque application provided data
* @return 0 to load the current signature.
* @return Non-0 to skip the current signature.
*/
typedef int (*clcb_sigload)(const char *type, const char *name, unsigned int custom, void *context);
/**
* @brief Set a custom signature-load callback function.
*
Record names of extracted files A way is needed to record scanned file names for two purposes: 1. File names (and extensions) must be stored in the json metadata properties recorded when using the --gen-json clamscan option. Future work may use this to compare file extensions with detected file types. 2. File names are useful when interpretting tmp directory output when using the --leave-temps option. This commit enables file name retention for later use by storing file names in the fmap header structure, if a file name exists. To store the names in fmaps, an optional name argument has been added to any internal scan API's that create fmaps and every call to these APIs has been modified to pass a file name or NULL if a file name is not required. The zip and gpt parsers required some modification to record file names. The NSIS and XAR parsers fail to collect file names at all and will require future work to support file name extraction. Also: - Added recursive extraction to the tmp directory when the --leave-temps option is enabled. When not enabled, the tmp directory structure remains flat so as to prevent the likelihood of exceeding MAX_PATH. The current tmp directory is stored in the scan context. - Made the cli_scanfile() internal API non-static and added it to scanners.h so it would be accessible outside of scanners.c in order to remove code duplication within libmspack.c. - Added function comments to scanners.h and matcher.h - Converted a TDB-type macros and LSIG-type macros to enums for improved type safey. - Converted more return status variables from `int` to `cl_error_t` for improved type safety, and corrected ooxml file typing functions so they use `cli_file_t` exclusively rather than mixing types with `cl_error_t`. - Restructured the magic_scandesc() function to use goto's for error handling and removed the early_ret_from_magicscan() macro and magic_scandesc_cleanup() function. This makes the code easier to read and made it easier to add the recursive tmp directory cleanup to magic_scandesc(). - Corrected zip, egg, rar filename extraction issues. - Removed use of extra sub-directory layer for zip, egg, and rar file extraction. For Zip, this also involved changing the extracted filenames to be randomly generated rather than using the "zip.###" file name scheme.
2020-03-19 21:23:54 -04:00
* Caution: changing options for an engine that is in-use is not thread-safe!
*
* @param engine The initialized scanning engine.
* @param callback The callback function pointer.
* @param context Opaque application provided data.
*/
2010-06-22 15:41:19 +02:00
extern void cl_engine_set_clcb_sigload(struct cl_engine *engine, clcb_sigload callback, void *context);
enum cl_msg {
/* leave room for more message levels in the future */
CL_MSG_INFO_VERBOSE = 32, /* verbose */
CL_MSG_WARN = 64, /* LibClamAV WARNING: */
CL_MSG_ERROR = 128 /* LibClamAV ERROR: */
};
/**
* @brief Progress callback for sig-load, engine-compile, and engine-free.
*
* Progress is complete when total_items == now_completed.
*
* Note: The callback should return CL_SUCCESS. We reserve the right to have it
* cancel the operation in the future if you return something else...
* ... but for now, the return value will be ignored.
*
* @param total_items Total number of items
* @param now_completed Number of items completed
* @param context Opaque application provided data
* @return cl_error_t reserved for future use
*/
typedef cl_error_t (*clcb_progress)(size_t total_items, size_t now_completed, void *context);
/**
* @brief Set a progress callback function to be called incrementally during a
* database load.
*
* Caution: changing options for an engine that is in-use is not thread-safe!
*
* @param engine The initialized scanning engine
* @param callback The callback function pointer
* @param context Opaque application provided data
*/
extern void cl_engine_set_clcb_sigload_progress(struct cl_engine *engine, clcb_progress callback, void *context);
/**
* @brief Set a progress callback function to be called incrementally during an
* engine compile.
*
* Disclaimer: the number of items for this is a rough estimate of the items that
* tend to take longest to compile and doesn't represent an accurate number of
* things compiled.
*
* Caution: changing options for an engine that is in-use is not thread-safe!
*
* @param engine The initialized scanning engine
* @param callback The callback function pointer
* @param context Opaque application provided data
*/
extern void cl_engine_set_clcb_engine_compile_progress(struct cl_engine *engine, clcb_progress callback, void *context);
/**
* @brief Set a progress callback function to be called incrementally during an
* engine free (if the engine is in fact freed).
*
* Disclaimer: the number of items for this is a rough estimate of the items that
* tend to take longest to free and doesn't represent an accurate number of
* things freed.
*
* Caution: changing options for an engine that is in-use is not thread-safe!
*
* @param engine The initialized scanning engine
* @param callback The callback function pointer
* @param context Opaque application provided data
*/
extern void cl_engine_set_clcb_engine_free_progress(struct cl_engine *engine, clcb_progress callback, void *context);
/**
* @brief Logging message callback for info, warning, and error messages.
*
* The specified callback will be called instead of logging to stderr.
* Messages of lower severity than specified are logged as usual.
*
* Callback may be used to silence logging by assigning a do-nothing function.
* Does not affect debug log messages.
*
* Just like with cl_debug() this must be called before going multithreaded.
* Callable before cl_init, if you want to log messages from cl_init() itself.
*
* You can use context of cl_scandesc_callback to convey more information to
* the callback (such as the filename!).
*
* Note: setting a 2nd callbacks overwrites previous, multiple callbacks are not
* supported.
*
* @param severity Message severity (CL_MSG_INFO_VERBOSE, CL_MSG_WARN, or CL_MSG_ERROR).
* @param fullmsg The log message including the "LibClamAV <severity>: " prefix.
* @param msg The log message.
* @param context Opaque application provided data.
*/
typedef void (*clcb_msg)(enum cl_msg severity, const char *fullmsg, const char *msg, void *context);
/**
* @brief Set a custom logging message callback function for all of libclamav.
*
* @param callback The callback function pointer.
*/
extern void cl_set_clcb_msg(clcb_msg callback);
/**
* @brief LibClamAV hash stats callback.
*
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
* @deprecated This function is deprecated and will be removed in a future release.
* Use `CL_SCAN_CALLBACK_ALERT` with `cl_engine_set_scan_callback()` instead.
* Then use the `cl_scan_layer_get_fmap()` and `cl_fmap_get_hash()` functions.
*
* Callback that provides the hash of a scanned sample if a signature alerted.
* Provides a mechanism to record detection statistics.
*
* @param fd File descriptor if available, else -1.
* @param size Sample size
Swap clean cache from MD5 to SHA2-256 Change the clean-cache to use SHA2-256 instead of MD5. Note that all references are changed to specify "SHA2-256" now instead of "SHA256", for clarity. But there is no plan to add support for SHA3 algorithms at this time. Significant code cleanup. E.g.: - Implemented goto-done error handling. - Used `uint8_t *` instead of `unsigned char *`. - Use `bool` for boolean checks, rather than `int. - Used `#defines` instead of magic numbers. - Removed duplicate `#defines` for things like hash length. Add new option to calculate and record additional hash types when the "generate metadata JSON" feature is enabled: - libclamav option: `CL_SCAN_GENERAL_STORE_EXTRA_HASHES` - clamscan option: `--json-store-extra-hashes` (default off) - clamd.conf option: `JsonStoreExtraHashes` (default 'no') Renamed the sigtool option `--sha256` to `--sha2-256`. The original option is still functional, but is deprecated. For the "generate metadata JSON" feature, the file hash is now stored as "sha2-256" instead of "FileMD5". If you enable the "extra hashes" option, then it will also record "md5" and "sha1". Deprecate and disable the internal "SHA collect" feature. This option had been hidden behind C #ifdef checks for an option that wasn't exposed through CMake, so it was basically unavailable anyways. Changes to calculate file hashes when they're needed and no sooner. For the FP feature in the matcher module, I have mimiced the optimization in the FMAP scan routine which makes it so that it can calculate multiple hashes in a single pass of the file. The `HandlerType` feature stores a hash of the file in the scan ctx to prevent retyping the exact same data more than once. I removed that hash field and replaced it with an attribute flag that is applied to the new recursion stack layer when retyping a file. This also closes a minor bug that would prevent retyping a file with an all-zero hash. :) The work upgrading cache.c to support SHA2-256 sized hashes thanks to: https://github.com/m-sola CLAM-255 CLAM-1858 CLAM-1859 CLAM-1860
2025-06-03 19:03:20 -04:00
* @param md5 Sample md5 hash (string)
* @param virname Signature name that the sample matched against
* @param context Opaque application provided data
*/
Swap clean cache from MD5 to SHA2-256 Change the clean-cache to use SHA2-256 instead of MD5. Note that all references are changed to specify "SHA2-256" now instead of "SHA256", for clarity. But there is no plan to add support for SHA3 algorithms at this time. Significant code cleanup. E.g.: - Implemented goto-done error handling. - Used `uint8_t *` instead of `unsigned char *`. - Use `bool` for boolean checks, rather than `int. - Used `#defines` instead of magic numbers. - Removed duplicate `#defines` for things like hash length. Add new option to calculate and record additional hash types when the "generate metadata JSON" feature is enabled: - libclamav option: `CL_SCAN_GENERAL_STORE_EXTRA_HASHES` - clamscan option: `--json-store-extra-hashes` (default off) - clamd.conf option: `JsonStoreExtraHashes` (default 'no') Renamed the sigtool option `--sha256` to `--sha2-256`. The original option is still functional, but is deprecated. For the "generate metadata JSON" feature, the file hash is now stored as "sha2-256" instead of "FileMD5". If you enable the "extra hashes" option, then it will also record "md5" and "sha1". Deprecate and disable the internal "SHA collect" feature. This option had been hidden behind C #ifdef checks for an option that wasn't exposed through CMake, so it was basically unavailable anyways. Changes to calculate file hashes when they're needed and no sooner. For the FP feature in the matcher module, I have mimiced the optimization in the FMAP scan routine which makes it so that it can calculate multiple hashes in a single pass of the file. The `HandlerType` feature stores a hash of the file in the scan ctx to prevent retyping the exact same data more than once. I removed that hash field and replaced it with an attribute flag that is applied to the new recursion stack layer when retyping a file. This also closes a minor bug that would prevent retyping a file with an all-zero hash. :) The work upgrading cache.c to support SHA2-256 sized hashes thanks to: https://github.com/m-sola CLAM-255 CLAM-1858 CLAM-1859 CLAM-1860
2025-06-03 19:03:20 -04:00
typedef void (*clcb_hash)(int fd, unsigned long long size, const char *md5, const char *virname, void *context);
/**
* @brief Set a custom hash stats callback function.
*
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
* @deprecated This function is deprecated and will be removed in a future release.
* Use `CL_SCAN_CALLBACK_ALERT` with `cl_engine_set_scan_callback()` instead.
* Then use the `cl_scan_layer_get_fmap()` and `cl_fmap_get_hash()` functions.
*
Record names of extracted files A way is needed to record scanned file names for two purposes: 1. File names (and extensions) must be stored in the json metadata properties recorded when using the --gen-json clamscan option. Future work may use this to compare file extensions with detected file types. 2. File names are useful when interpretting tmp directory output when using the --leave-temps option. This commit enables file name retention for later use by storing file names in the fmap header structure, if a file name exists. To store the names in fmaps, an optional name argument has been added to any internal scan API's that create fmaps and every call to these APIs has been modified to pass a file name or NULL if a file name is not required. The zip and gpt parsers required some modification to record file names. The NSIS and XAR parsers fail to collect file names at all and will require future work to support file name extraction. Also: - Added recursive extraction to the tmp directory when the --leave-temps option is enabled. When not enabled, the tmp directory structure remains flat so as to prevent the likelihood of exceeding MAX_PATH. The current tmp directory is stored in the scan context. - Made the cli_scanfile() internal API non-static and added it to scanners.h so it would be accessible outside of scanners.c in order to remove code duplication within libmspack.c. - Added function comments to scanners.h and matcher.h - Converted a TDB-type macros and LSIG-type macros to enums for improved type safey. - Converted more return status variables from `int` to `cl_error_t` for improved type safety, and corrected ooxml file typing functions so they use `cli_file_t` exclusively rather than mixing types with `cl_error_t`. - Restructured the magic_scandesc() function to use goto's for error handling and removed the early_ret_from_magicscan() macro and magic_scandesc_cleanup() function. This makes the code easier to read and made it easier to add the recursive tmp directory cleanup to magic_scandesc(). - Corrected zip, egg, rar filename extraction issues. - Removed use of extra sub-directory layer for zip, egg, and rar file extraction. For Zip, this also involved changing the extracted filenames to be randomly generated rather than using the "zip.###" file name scheme.
2020-03-19 21:23:54 -04:00
* Caution: changing options for an engine that is in-use is not thread-safe!
*
* @param engine The initialized scanning engine.
* @param callback The callback function pointer.
*/
extern void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);
2010-06-22 15:41:19 +02:00
/**
* @brief Archive meta matching callback function.
*
* May be used to block archive/container samples based on archive metadata.
* Function is invoked multiple times per archive. Typically once per contained file.
*
* Note: Used by the --archive-verbose clamscan option. Overriding this will alter
* the output from --archive-verbose.
*
* @param container_type String name of type (CL_TYPE).
* @param fsize_container Sample size
* @param filename Filename associated with the data in archive.
* @param fsize_real Size of file after decompression (according to the archive).
* @param is_encrypted Boolean non-zero if the contained file is encrypted.
* @param filepos_container File index in container.
* @param context Opaque application provided data.
* @return CL_VIRUS to block (alert on)
* @return CL_CLEAN to continue scanning
*/
typedef cl_error_t (*clcb_meta)(
const char *container_type,
unsigned long fsize_container,
const char *filename,
unsigned long fsize_real,
int is_encrypted,
unsigned int filepos_container,
void *context);
/**
* @brief Set a custom archive metadata matching callback function.
*
Record names of extracted files A way is needed to record scanned file names for two purposes: 1. File names (and extensions) must be stored in the json metadata properties recorded when using the --gen-json clamscan option. Future work may use this to compare file extensions with detected file types. 2. File names are useful when interpretting tmp directory output when using the --leave-temps option. This commit enables file name retention for later use by storing file names in the fmap header structure, if a file name exists. To store the names in fmaps, an optional name argument has been added to any internal scan API's that create fmaps and every call to these APIs has been modified to pass a file name or NULL if a file name is not required. The zip and gpt parsers required some modification to record file names. The NSIS and XAR parsers fail to collect file names at all and will require future work to support file name extraction. Also: - Added recursive extraction to the tmp directory when the --leave-temps option is enabled. When not enabled, the tmp directory structure remains flat so as to prevent the likelihood of exceeding MAX_PATH. The current tmp directory is stored in the scan context. - Made the cli_scanfile() internal API non-static and added it to scanners.h so it would be accessible outside of scanners.c in order to remove code duplication within libmspack.c. - Added function comments to scanners.h and matcher.h - Converted a TDB-type macros and LSIG-type macros to enums for improved type safey. - Converted more return status variables from `int` to `cl_error_t` for improved type safety, and corrected ooxml file typing functions so they use `cli_file_t` exclusively rather than mixing types with `cl_error_t`. - Restructured the magic_scandesc() function to use goto's for error handling and removed the early_ret_from_magicscan() macro and magic_scandesc_cleanup() function. This makes the code easier to read and made it easier to add the recursive tmp directory cleanup to magic_scandesc(). - Corrected zip, egg, rar filename extraction issues. - Removed use of extra sub-directory layer for zip, egg, and rar file extraction. For Zip, this also involved changing the extracted filenames to be randomly generated rather than using the "zip.###" file name scheme.
2020-03-19 21:23:54 -04:00
* Caution: changing options for an engine that is in-use is not thread-safe!
*
* @param engine The initialized scanning engine.
* @param callback The callback function pointer.
*/
2011-08-22 15:22:55 +03:00
extern void cl_engine_set_clcb_meta(struct cl_engine *engine, clcb_meta callback);
/**
* @brief File properties callback function.
*
* Invoked after a scan the CL_SCAN_GENERAL_COLLECT_METADATA general scan option
* is enabled and libclamav was built with json support.
*
* @param j_propstr File properties/metadata in a JSON encoded string.
* @param rc The cl_error_t return code from the scan.
* @param cbdata Opaque application provided data.
*/
typedef int (*clcb_file_props)(const char *j_propstr, int rc, void *cbdata);
/**
* @brief Set a custom file properties callback function.
*
Record names of extracted files A way is needed to record scanned file names for two purposes: 1. File names (and extensions) must be stored in the json metadata properties recorded when using the --gen-json clamscan option. Future work may use this to compare file extensions with detected file types. 2. File names are useful when interpretting tmp directory output when using the --leave-temps option. This commit enables file name retention for later use by storing file names in the fmap header structure, if a file name exists. To store the names in fmaps, an optional name argument has been added to any internal scan API's that create fmaps and every call to these APIs has been modified to pass a file name or NULL if a file name is not required. The zip and gpt parsers required some modification to record file names. The NSIS and XAR parsers fail to collect file names at all and will require future work to support file name extraction. Also: - Added recursive extraction to the tmp directory when the --leave-temps option is enabled. When not enabled, the tmp directory structure remains flat so as to prevent the likelihood of exceeding MAX_PATH. The current tmp directory is stored in the scan context. - Made the cli_scanfile() internal API non-static and added it to scanners.h so it would be accessible outside of scanners.c in order to remove code duplication within libmspack.c. - Added function comments to scanners.h and matcher.h - Converted a TDB-type macros and LSIG-type macros to enums for improved type safey. - Converted more return status variables from `int` to `cl_error_t` for improved type safety, and corrected ooxml file typing functions so they use `cli_file_t` exclusively rather than mixing types with `cl_error_t`. - Restructured the magic_scandesc() function to use goto's for error handling and removed the early_ret_from_magicscan() macro and magic_scandesc_cleanup() function. This makes the code easier to read and made it easier to add the recursive tmp directory cleanup to magic_scandesc(). - Corrected zip, egg, rar filename extraction issues. - Removed use of extra sub-directory layer for zip, egg, and rar file extraction. For Zip, this also involved changing the extracted filenames to be randomly generated rather than using the "zip.###" file name scheme.
2020-03-19 21:23:54 -04:00
* Caution: changing options for an engine that is in-use is not thread-safe!
*
* @param engine The initialized scanning engine.
* @param callback The callback function pointer.
*/
extern void cl_engine_set_clcb_file_props(struct cl_engine *engine, clcb_file_props callback);
/**
* @brief generic data callback function.
*
* Callback handler prototype for callbacks passing back data and application context.
*
* @param data A pointer to some data. Should be treated as read-only and may be freed after callback.
* @param data_len The length of data.
* @param cbdata Opaque application provided data.
*/
typedef int (*clcb_generic_data)(const unsigned char *const data, const size_t data_len, void *cbdata);
/**
* @brief Set a custom VBA macro callback function.
*
* Caution: changing options for an engine that is in-use is not thread-safe!
*
* @param engine The initialized scanning engine.
* @param callback The callback function pointer.
*/
extern void cl_engine_set_clcb_vba(struct cl_engine *engine, clcb_generic_data callback);
/* ----------------------------------------------------------------------------
* Statistics/telemetry gathering callbacks.
*
* The statistics callback functions may be used to implement a telemetry
* gathering feature.
*
* The structure definition for `cbdata` is entirely up to the caller, as are
* the implementations of each of the callback functions defined below.
*/
/**
* @brief Set a pointer the caller-defined cbdata structure.
*
* The data must persist at least until `clcb_stats_submit()` is called, or
* `clcb_stats_flush()` is called (optional).
*
Record names of extracted files A way is needed to record scanned file names for two purposes: 1. File names (and extensions) must be stored in the json metadata properties recorded when using the --gen-json clamscan option. Future work may use this to compare file extensions with detected file types. 2. File names are useful when interpretting tmp directory output when using the --leave-temps option. This commit enables file name retention for later use by storing file names in the fmap header structure, if a file name exists. To store the names in fmaps, an optional name argument has been added to any internal scan API's that create fmaps and every call to these APIs has been modified to pass a file name or NULL if a file name is not required. The zip and gpt parsers required some modification to record file names. The NSIS and XAR parsers fail to collect file names at all and will require future work to support file name extraction. Also: - Added recursive extraction to the tmp directory when the --leave-temps option is enabled. When not enabled, the tmp directory structure remains flat so as to prevent the likelihood of exceeding MAX_PATH. The current tmp directory is stored in the scan context. - Made the cli_scanfile() internal API non-static and added it to scanners.h so it would be accessible outside of scanners.c in order to remove code duplication within libmspack.c. - Added function comments to scanners.h and matcher.h - Converted a TDB-type macros and LSIG-type macros to enums for improved type safey. - Converted more return status variables from `int` to `cl_error_t` for improved type safety, and corrected ooxml file typing functions so they use `cli_file_t` exclusively rather than mixing types with `cl_error_t`. - Restructured the magic_scandesc() function to use goto's for error handling and removed the early_ret_from_magicscan() macro and magic_scandesc_cleanup() function. This makes the code easier to read and made it easier to add the recursive tmp directory cleanup to magic_scandesc(). - Corrected zip, egg, rar filename extraction issues. - Removed use of extra sub-directory layer for zip, egg, and rar file extraction. For Zip, this also involved changing the extracted filenames to be randomly generated rather than using the "zip.###" file name scheme.
2020-03-19 21:23:54 -04:00
* Caution: changing options for an engine that is in-use is not thread-safe!
*
* @param engine The scanning engine.
* @param cbdata The statistics data. Probably a pointer to a malloc'd struct.
*/
extern void cl_engine_set_stats_set_cbdata(struct cl_engine *engine, void *cbdata);
/**
* @brief Add sample metadata to the statistics for a sample that matched on a signature.
*
* @param virname Name of the signature that matched.
* @param md5 Sample hash.
* @param size Sample size.
* @param sections PE section data, if applicable.
* @param cbdata The statistics data. Probably a pointer to a malloc'd struct.
*/
typedef void (*clcb_stats_add_sample)(
const char *virname,
const unsigned char *md5,
size_t size,
stats_section_t *sections,
void *cbdata);
/**
* @brief Set a custom callback function to add sample metadata to a statistics report.
*
Record names of extracted files A way is needed to record scanned file names for two purposes: 1. File names (and extensions) must be stored in the json metadata properties recorded when using the --gen-json clamscan option. Future work may use this to compare file extensions with detected file types. 2. File names are useful when interpretting tmp directory output when using the --leave-temps option. This commit enables file name retention for later use by storing file names in the fmap header structure, if a file name exists. To store the names in fmaps, an optional name argument has been added to any internal scan API's that create fmaps and every call to these APIs has been modified to pass a file name or NULL if a file name is not required. The zip and gpt parsers required some modification to record file names. The NSIS and XAR parsers fail to collect file names at all and will require future work to support file name extraction. Also: - Added recursive extraction to the tmp directory when the --leave-temps option is enabled. When not enabled, the tmp directory structure remains flat so as to prevent the likelihood of exceeding MAX_PATH. The current tmp directory is stored in the scan context. - Made the cli_scanfile() internal API non-static and added it to scanners.h so it would be accessible outside of scanners.c in order to remove code duplication within libmspack.c. - Added function comments to scanners.h and matcher.h - Converted a TDB-type macros and LSIG-type macros to enums for improved type safey. - Converted more return status variables from `int` to `cl_error_t` for improved type safety, and corrected ooxml file typing functions so they use `cli_file_t` exclusively rather than mixing types with `cl_error_t`. - Restructured the magic_scandesc() function to use goto's for error handling and removed the early_ret_from_magicscan() macro and magic_scandesc_cleanup() function. This makes the code easier to read and made it easier to add the recursive tmp directory cleanup to magic_scandesc(). - Corrected zip, egg, rar filename extraction issues. - Removed use of extra sub-directory layer for zip, egg, and rar file extraction. For Zip, this also involved changing the extracted filenames to be randomly generated rather than using the "zip.###" file name scheme.
2020-03-19 21:23:54 -04:00
* Caution: changing options for an engine that is in-use is not thread-safe!
*
* @param engine The initialized scanning engine.
* @param callback The callback function pointer.
*/
extern void cl_engine_set_clcb_stats_add_sample(struct cl_engine *engine, clcb_stats_add_sample callback);
/**
* @brief Remove a specific sample from the statistics report.
*
* @param virname Name of the signature that matched.
* @param md5 Sample hash.
* @param size Sample size.
* @param cbdata The statistics data. Probably a pointer to a malloc'd struct.
*/
typedef void (*clcb_stats_remove_sample)(const char *virname, const unsigned char *md5, size_t size, void *cbdata);
/**
* @brief Set a custom callback function to remove sample metadata from a statistics report.
*
Record names of extracted files A way is needed to record scanned file names for two purposes: 1. File names (and extensions) must be stored in the json metadata properties recorded when using the --gen-json clamscan option. Future work may use this to compare file extensions with detected file types. 2. File names are useful when interpretting tmp directory output when using the --leave-temps option. This commit enables file name retention for later use by storing file names in the fmap header structure, if a file name exists. To store the names in fmaps, an optional name argument has been added to any internal scan API's that create fmaps and every call to these APIs has been modified to pass a file name or NULL if a file name is not required. The zip and gpt parsers required some modification to record file names. The NSIS and XAR parsers fail to collect file names at all and will require future work to support file name extraction. Also: - Added recursive extraction to the tmp directory when the --leave-temps option is enabled. When not enabled, the tmp directory structure remains flat so as to prevent the likelihood of exceeding MAX_PATH. The current tmp directory is stored in the scan context. - Made the cli_scanfile() internal API non-static and added it to scanners.h so it would be accessible outside of scanners.c in order to remove code duplication within libmspack.c. - Added function comments to scanners.h and matcher.h - Converted a TDB-type macros and LSIG-type macros to enums for improved type safey. - Converted more return status variables from `int` to `cl_error_t` for improved type safety, and corrected ooxml file typing functions so they use `cli_file_t` exclusively rather than mixing types with `cl_error_t`. - Restructured the magic_scandesc() function to use goto's for error handling and removed the early_ret_from_magicscan() macro and magic_scandesc_cleanup() function. This makes the code easier to read and made it easier to add the recursive tmp directory cleanup to magic_scandesc(). - Corrected zip, egg, rar filename extraction issues. - Removed use of extra sub-directory layer for zip, egg, and rar file extraction. For Zip, this also involved changing the extracted filenames to be randomly generated rather than using the "zip.###" file name scheme.
2020-03-19 21:23:54 -04:00
* Caution: changing options for an engine that is in-use is not thread-safe!
*
* @param engine The initialized scanning engine.
* @param callback The callback function pointer.
*/
extern void cl_engine_set_clcb_stats_remove_sample(struct cl_engine *engine, clcb_stats_remove_sample callback);
/**
* @brief Decrement the hit count listed in the statistics report for a specific sample.
*
* @param virname Name of the signature that matched.
* @param md5 Sample hash.
* @param size Sample size.
* @param cbdata The statistics data. Probably a pointer to a malloc'd struct.
*/
typedef void (*clcb_stats_decrement_count)(const char *virname, const unsigned char *md5, size_t size, void *cbdata);
/**
* @brief Set a custom callback function to decrement the hit count listed in the statistics report for a specific sample.
*
* This function may remove the sample from the report if the hit count is decremented to 0.
*
* @param engine The initialized scanning engine.
* @param callback The callback function pointer.
*/
extern void cl_engine_set_clcb_stats_decrement_count(struct cl_engine *engine, clcb_stats_decrement_count callback);
/**
* @brief Function to submit a statistics report.
*
* @param engine The initialized scanning engine.
* @param cbdata The statistics data. Probably a pointer to a malloc'd struct.
*/
typedef void (*clcb_stats_submit)(struct cl_engine *engine, void *cbdata);
/**
* @brief Set a custom callback function to submit the statistics report.
*
Record names of extracted files A way is needed to record scanned file names for two purposes: 1. File names (and extensions) must be stored in the json metadata properties recorded when using the --gen-json clamscan option. Future work may use this to compare file extensions with detected file types. 2. File names are useful when interpretting tmp directory output when using the --leave-temps option. This commit enables file name retention for later use by storing file names in the fmap header structure, if a file name exists. To store the names in fmaps, an optional name argument has been added to any internal scan API's that create fmaps and every call to these APIs has been modified to pass a file name or NULL if a file name is not required. The zip and gpt parsers required some modification to record file names. The NSIS and XAR parsers fail to collect file names at all and will require future work to support file name extraction. Also: - Added recursive extraction to the tmp directory when the --leave-temps option is enabled. When not enabled, the tmp directory structure remains flat so as to prevent the likelihood of exceeding MAX_PATH. The current tmp directory is stored in the scan context. - Made the cli_scanfile() internal API non-static and added it to scanners.h so it would be accessible outside of scanners.c in order to remove code duplication within libmspack.c. - Added function comments to scanners.h and matcher.h - Converted a TDB-type macros and LSIG-type macros to enums for improved type safey. - Converted more return status variables from `int` to `cl_error_t` for improved type safety, and corrected ooxml file typing functions so they use `cli_file_t` exclusively rather than mixing types with `cl_error_t`. - Restructured the magic_scandesc() function to use goto's for error handling and removed the early_ret_from_magicscan() macro and magic_scandesc_cleanup() function. This makes the code easier to read and made it easier to add the recursive tmp directory cleanup to magic_scandesc(). - Corrected zip, egg, rar filename extraction issues. - Removed use of extra sub-directory layer for zip, egg, and rar file extraction. For Zip, this also involved changing the extracted filenames to be randomly generated rather than using the "zip.###" file name scheme.
2020-03-19 21:23:54 -04:00
* Caution: changing options for an engine that is in-use is not thread-safe!
*
* @param engine The initialized scanning engine.
* @param callback The callback function pointer.
*/
extern void cl_engine_set_clcb_stats_submit(struct cl_engine *engine, clcb_stats_submit callback);
/**
* @brief Function to flush/free the statistics report data.
*
* @param engine The initialized scanning engine.
* @param cbdata The statistics data. Probably a pointer to a malloc'd struct.
*/
typedef void (*clcb_stats_flush)(struct cl_engine *engine, void *cbdata);
/**
* @brief Set a custom callback function to flush/free the statistics report data.
*
Record names of extracted files A way is needed to record scanned file names for two purposes: 1. File names (and extensions) must be stored in the json metadata properties recorded when using the --gen-json clamscan option. Future work may use this to compare file extensions with detected file types. 2. File names are useful when interpretting tmp directory output when using the --leave-temps option. This commit enables file name retention for later use by storing file names in the fmap header structure, if a file name exists. To store the names in fmaps, an optional name argument has been added to any internal scan API's that create fmaps and every call to these APIs has been modified to pass a file name or NULL if a file name is not required. The zip and gpt parsers required some modification to record file names. The NSIS and XAR parsers fail to collect file names at all and will require future work to support file name extraction. Also: - Added recursive extraction to the tmp directory when the --leave-temps option is enabled. When not enabled, the tmp directory structure remains flat so as to prevent the likelihood of exceeding MAX_PATH. The current tmp directory is stored in the scan context. - Made the cli_scanfile() internal API non-static and added it to scanners.h so it would be accessible outside of scanners.c in order to remove code duplication within libmspack.c. - Added function comments to scanners.h and matcher.h - Converted a TDB-type macros and LSIG-type macros to enums for improved type safey. - Converted more return status variables from `int` to `cl_error_t` for improved type safety, and corrected ooxml file typing functions so they use `cli_file_t` exclusively rather than mixing types with `cl_error_t`. - Restructured the magic_scandesc() function to use goto's for error handling and removed the early_ret_from_magicscan() macro and magic_scandesc_cleanup() function. This makes the code easier to read and made it easier to add the recursive tmp directory cleanup to magic_scandesc(). - Corrected zip, egg, rar filename extraction issues. - Removed use of extra sub-directory layer for zip, egg, and rar file extraction. For Zip, this also involved changing the extracted filenames to be randomly generated rather than using the "zip.###" file name scheme.
2020-03-19 21:23:54 -04:00
* Caution: changing options for an engine that is in-use is not thread-safe!
*
* @param engine The initialized scanning engine.
* @param callback The callback function pointer.
*/
extern void cl_engine_set_clcb_stats_flush(struct cl_engine *engine, clcb_stats_flush callback);
/**
* @brief Function to get the number of samples listed in the statistics report.
*
* @param cbdata The statistics data. Probably a pointer to a malloc'd struct.
*/
typedef size_t (*clcb_stats_get_num)(void *cbdata);
/**
* @brief Set a custom callback function to get the number of samples listed in the statistics report.
*
Record names of extracted files A way is needed to record scanned file names for two purposes: 1. File names (and extensions) must be stored in the json metadata properties recorded when using the --gen-json clamscan option. Future work may use this to compare file extensions with detected file types. 2. File names are useful when interpretting tmp directory output when using the --leave-temps option. This commit enables file name retention for later use by storing file names in the fmap header structure, if a file name exists. To store the names in fmaps, an optional name argument has been added to any internal scan API's that create fmaps and every call to these APIs has been modified to pass a file name or NULL if a file name is not required. The zip and gpt parsers required some modification to record file names. The NSIS and XAR parsers fail to collect file names at all and will require future work to support file name extraction. Also: - Added recursive extraction to the tmp directory when the --leave-temps option is enabled. When not enabled, the tmp directory structure remains flat so as to prevent the likelihood of exceeding MAX_PATH. The current tmp directory is stored in the scan context. - Made the cli_scanfile() internal API non-static and added it to scanners.h so it would be accessible outside of scanners.c in order to remove code duplication within libmspack.c. - Added function comments to scanners.h and matcher.h - Converted a TDB-type macros and LSIG-type macros to enums for improved type safey. - Converted more return status variables from `int` to `cl_error_t` for improved type safety, and corrected ooxml file typing functions so they use `cli_file_t` exclusively rather than mixing types with `cl_error_t`. - Restructured the magic_scandesc() function to use goto's for error handling and removed the early_ret_from_magicscan() macro and magic_scandesc_cleanup() function. This makes the code easier to read and made it easier to add the recursive tmp directory cleanup to magic_scandesc(). - Corrected zip, egg, rar filename extraction issues. - Removed use of extra sub-directory layer for zip, egg, and rar file extraction. For Zip, this also involved changing the extracted filenames to be randomly generated rather than using the "zip.###" file name scheme.
2020-03-19 21:23:54 -04:00
* Caution: changing options for an engine that is in-use is not thread-safe!
*
* @param engine The initialized scanning engine.
* @param callback The callback function pointer.
*/
extern void cl_engine_set_clcb_stats_get_num(struct cl_engine *engine, clcb_stats_get_num callback);
/**
* @brief Function to get the size of memory used to store the statistics report.
*
* @param cbdata The statistics data. Probably a pointer to a malloc'd struct.
*/
typedef size_t (*clcb_stats_get_size)(void *cbdata);
/**
* @brief Set a custom callback function to get the size of memory used to store the statistics report.
*
Record names of extracted files A way is needed to record scanned file names for two purposes: 1. File names (and extensions) must be stored in the json metadata properties recorded when using the --gen-json clamscan option. Future work may use this to compare file extensions with detected file types. 2. File names are useful when interpretting tmp directory output when using the --leave-temps option. This commit enables file name retention for later use by storing file names in the fmap header structure, if a file name exists. To store the names in fmaps, an optional name argument has been added to any internal scan API's that create fmaps and every call to these APIs has been modified to pass a file name or NULL if a file name is not required. The zip and gpt parsers required some modification to record file names. The NSIS and XAR parsers fail to collect file names at all and will require future work to support file name extraction. Also: - Added recursive extraction to the tmp directory when the --leave-temps option is enabled. When not enabled, the tmp directory structure remains flat so as to prevent the likelihood of exceeding MAX_PATH. The current tmp directory is stored in the scan context. - Made the cli_scanfile() internal API non-static and added it to scanners.h so it would be accessible outside of scanners.c in order to remove code duplication within libmspack.c. - Added function comments to scanners.h and matcher.h - Converted a TDB-type macros and LSIG-type macros to enums for improved type safey. - Converted more return status variables from `int` to `cl_error_t` for improved type safety, and corrected ooxml file typing functions so they use `cli_file_t` exclusively rather than mixing types with `cl_error_t`. - Restructured the magic_scandesc() function to use goto's for error handling and removed the early_ret_from_magicscan() macro and magic_scandesc_cleanup() function. This makes the code easier to read and made it easier to add the recursive tmp directory cleanup to magic_scandesc(). - Corrected zip, egg, rar filename extraction issues. - Removed use of extra sub-directory layer for zip, egg, and rar file extraction. For Zip, this also involved changing the extracted filenames to be randomly generated rather than using the "zip.###" file name scheme.
2020-03-19 21:23:54 -04:00
* Caution: changing options for an engine that is in-use is not thread-safe!
*
* @param engine The initialized scanning engine.
* @param callback The callback function pointer.
*/
extern void cl_engine_set_clcb_stats_get_size(struct cl_engine *engine, clcb_stats_get_size callback);
/**
* @brief Function to get the machine's unique host ID.
*
* @param cbdata The statistics data. Probably a pointer to a malloc'd struct.
*/
typedef char *(*clcb_stats_get_hostid)(void *cbdata);
/**
* @brief Set a custom callback function to get the machine's unique host ID.
*
Record names of extracted files A way is needed to record scanned file names for two purposes: 1. File names (and extensions) must be stored in the json metadata properties recorded when using the --gen-json clamscan option. Future work may use this to compare file extensions with detected file types. 2. File names are useful when interpretting tmp directory output when using the --leave-temps option. This commit enables file name retention for later use by storing file names in the fmap header structure, if a file name exists. To store the names in fmaps, an optional name argument has been added to any internal scan API's that create fmaps and every call to these APIs has been modified to pass a file name or NULL if a file name is not required. The zip and gpt parsers required some modification to record file names. The NSIS and XAR parsers fail to collect file names at all and will require future work to support file name extraction. Also: - Added recursive extraction to the tmp directory when the --leave-temps option is enabled. When not enabled, the tmp directory structure remains flat so as to prevent the likelihood of exceeding MAX_PATH. The current tmp directory is stored in the scan context. - Made the cli_scanfile() internal API non-static and added it to scanners.h so it would be accessible outside of scanners.c in order to remove code duplication within libmspack.c. - Added function comments to scanners.h and matcher.h - Converted a TDB-type macros and LSIG-type macros to enums for improved type safey. - Converted more return status variables from `int` to `cl_error_t` for improved type safety, and corrected ooxml file typing functions so they use `cli_file_t` exclusively rather than mixing types with `cl_error_t`. - Restructured the magic_scandesc() function to use goto's for error handling and removed the early_ret_from_magicscan() macro and magic_scandesc_cleanup() function. This makes the code easier to read and made it easier to add the recursive tmp directory cleanup to magic_scandesc(). - Corrected zip, egg, rar filename extraction issues. - Removed use of extra sub-directory layer for zip, egg, and rar file extraction. For Zip, this also involved changing the extracted filenames to be randomly generated rather than using the "zip.###" file name scheme.
2020-03-19 21:23:54 -04:00
* Caution: changing options for an engine that is in-use is not thread-safe!
*
* @param engine The initialized scanning engine.
* @param callback The callback function pointer.
*/
extern void cl_engine_set_clcb_stats_get_hostid(struct cl_engine *engine, clcb_stats_get_hostid callback);
/**
* @brief Function enables the built-in statistics reporting feature.
*
* @param engine The initialized scanning engine.
*/
extern void cl_engine_stats_enable(struct cl_engine *engine);
/* ----------------------------------------------------------------------------
* File scanning.
*/
/**
* @brief Scan a file, given a file descriptor.
*
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
* @deprecated This function is deprecated and will be removed in a future release. Use `cl_scandesc_ex()` instead.
*
* @param desc File descriptor of an open file. The caller must provide this or the map.
* @param filename (optional) Filepath of the open file descriptor or file map.
* @param[out] virname Will be set to a statically allocated (i.e. needs not be freed) signature name if the scan matches against a signature.
* @param[out] scanned The number of bytes scanned / CL_COUNT_PRECISION.
* @param engine The scanning engine.
* @param scanoptions Scanning options.
* @return cl_error_t CL_CLEAN, CL_VIRUS, or an error code if an error occurred during the scan.
*/
extern cl_error_t cl_scandesc(
int desc,
const char *filename,
const char **virname,
unsigned long int *scanned,
const struct cl_engine *engine,
struct cl_scan_options *scanoptions);
/**
* @brief Scan a file, given a file descriptor.
*
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
* @deprecated This function is deprecated and will be removed in a future release. Use `cl_scandesc_ex()` instead.
*
* This callback variant allows the caller to provide a context structure that
* caller provided callback functions can interpret.
*
* @param desc File descriptor of an open file. The caller must provide this or the map.
* @param filename (optional) Filepath of the open file descriptor or file map.
* @param[out] virname Will be set to a statically allocated (i.e. needs not be freed) signature name if the scan matches against a signature.
* @param[out] scanned The number of bytes scanned / CL_COUNT_PRECISION.
* @param engine The scanning engine.
* @param scanoptions Scanning options.
* @param[in,out] context (Optional) An application-defined context struct, opaque to libclamav.
* May be used within your callback functions.
* @return cl_error_t CL_CLEAN, CL_VIRUS, or an error code if an error occurred during the scan.
*/
extern cl_error_t cl_scandesc_callback(
int desc,
const char *filename,
const char **virname,
unsigned long int *scanned,
const struct cl_engine *engine,
struct cl_scan_options *scanoptions,
void *context);
/**
* @brief Scan a file, given a file descriptor.
*
* This callback variant allows the caller to provide a context structure that
* caller provided callback functions can interpret.
*
* This extended version of cl_scanmap_callback allows the caller to provide
* additional hints to the scanning engine, such as a file hash and file type.
*
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
* This variant also upgrades the `scanned` output parameter to a 64-bit integer.
*
* @param desc File descriptor of an open file. The caller must provide this or the map.
* @param filename (optional) Filepath of the open file descriptor or file map.
* @param[out] virname Will be set to a statically allocated (i.e. needs not be freed) signature name if the scan matches against a signature.
* @param[out] scanned The (exact) number of bytes scanned.
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
* @param engine The scanning engine.
* @param scanoptions Scanning options.
* @param[in,out] context (Optional) An application-defined context struct, opaque to libclamav.
* May be used within your callback functions.
* @param hash_hint (Optional) A NULL terminated string of the file hash so that
* libclamav does not need to calculate it.
* @param[out] hash_out (Optional) A NULL terminated string of the file hash.
* The caller is responsible for freeing the string.
* @param hash_alg The hashing algorithm used for either `hash_hint` or `hash_out`.
* Supported algorithms are "md5", "sha1", "sha2-256".
* If not specified, the default is "sha2-256".
* @param file_type_hint (Optional) A NULL terminated string of the file type hint.
* E.g. "pe", "elf", "zip", etc.
* You may also use ClamAV type names such as "CL_TYPE_PE".
* ClamAV will ignore the hint if it is not familiar with the specified type.
* See also: https://docs.clamav.net/appendix/FileTypes.html#file-types
* @param[out] file_type_out (Optional) A NULL terminated string of the file type
* of the top layer as determined by ClamAV.
* Will take the form of the standard ClamAV file type format. E.g. "CL_TYPE_PE".
* See also: https://docs.clamav.net/appendix/FileTypes.html#file-types
* @return cl_error_t CL_CLEAN if no signature matched.
* CL_VIRUS if a signature matched.
* Another CL_E* error code if an error occurred.
*/
extern cl_error_t cl_scandesc_ex(
int desc,
const char *filename,
const char **virname,
uint64_t *scanned,
const struct cl_engine *engine,
struct cl_scan_options *scanoptions,
void *context,
const char *hash_hint,
char **hash_out,
const char *hash_alg,
const char *file_type_hint,
char **file_type_out);
/**
* @brief Scan a file, given a filename.
*
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
* @deprecated This function is deprecated and will be removed in a future release. Use `cl_scanfile_ex()` instead.
*
* @param filename Filepath of the file to be scanned.
* @param[out] virname Will be set to a statically allocated (i.e. needs not be freed) signature name if the scan matches against a signature.
* @param[out] scanned The number of bytes scanned / CL_COUNT_PRECISION.
* @param engine The scanning engine.
* @param scanoptions Scanning options.
* @return cl_error_t CL_CLEAN, CL_VIRUS, or an error code if an error occurred during the scan.
*/
extern cl_error_t cl_scanfile(
const char *filename,
const char **virname,
unsigned long int *scanned,
const struct cl_engine *engine,
struct cl_scan_options *scanoptions);
/**
* @brief Scan a file, given a filename.
*
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
* @deprecated This function is deprecated and will be removed in a future release. Use `cl_scanfile_ex()` instead.
*
* This callback variant allows the caller to provide a context structure that
* caller provided callback functions can interpret.
*
* @param filename Filepath of the file to be scanned.
* @param[out] virname Will be set to a statically allocated (i.e. needs not be freed) signature name if the scan matches against a signature.
* @param[out] scanned The number of bytes scanned / CL_COUNT_PRECISION.
* @param engine The scanning engine.
* @param scanoptions Scanning options.
* @param[in,out] context (Optional) An application-defined context struct, opaque to libclamav.
* May be used within your callback functions.
* @return cl_error_t CL_CLEAN, CL_VIRUS, or an error code if an error occurred during the scan.
*/
extern cl_error_t cl_scanfile_callback(
const char *filename,
const char **virname,
unsigned long int *scanned,
const struct cl_engine *engine,
struct cl_scan_options *scanoptions,
void *context);
/**
* @brief Scan a file, given a filename.
*
* This callback variant allows the caller to provide a context structure that
* caller provided callback functions can interpret.
*
* This extended version of cl_scanmap_callback allows the caller to provide
* additional hints to the scanning engine, such as a file hash and file type.
*
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
* This variant also upgrades the `scanned` output parameter to a 64-bit integer.
*
* @param filename Filepath of the file to be scanned.
* @param[out] virname Will be set to a statically allocated (i.e. needs not be freed) signature name if the scan matches against a signature.
* @param[out] scanned The exact number of bytes scanned.
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
* @param engine The scanning engine.
* @param scanoptions Scanning options.
* @param[in,out] context (Optional) An application-defined context struct, opaque to libclamav.
* May be used within your callback functions.
* @param hash_hint (Optional) A NULL terminated string of the file hash so that
* libclamav does not need to calculate it.
* @param[out] hash_out (Optional) A NULL terminated string of the file hash.
* The caller is responsible for freeing the string.
* @param hash_alg The hashing algorithm used for either `hash_hint` or `hash_out`.
* Supported algorithms are "md5", "sha1", "sha2-256".
* If not specified, the default is "sha2-256".
* @param file_type_hint (Optional) A NULL terminated string of the file type hint.
* E.g. "pe", "elf", "zip", etc.
* You may also use ClamAV type names such as "CL_TYPE_PE".
* ClamAV will ignore the hint if it is not familiar with the specified type.
* See also: https://docs.clamav.net/appendix/FileTypes.html#file-types
* @param[out] file_type_out (Optional) A NULL terminated string of the file type
* of the top layer as determined by ClamAV.
* Will take the form of the standard ClamAV file type format. E.g. "CL_TYPE_PE".
* See also: https://docs.clamav.net/appendix/FileTypes.html#file-types
* @return cl_error_t CL_CLEAN if no signature matched.
* CL_VIRUS if a signature matched.
* Another CL_E* error code if an error occurred.
*/
extern cl_error_t cl_scanfile_ex(
const char *filename,
const char **virname,
uint64_t *scanned,
const struct cl_engine *engine,
struct cl_scan_options *scanoptions,
void *context,
const char *hash_hint,
char **hash_out,
const char *hash_alg,
const char *file_type_hint,
char **file_type_out);
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
/**
* @brief Scan custom data, given a fmap.
*
* @deprecated This function is deprecated and will be removed in a future release. Use `cl_scanmap_ex()` instead.
*
* Fmaps are an abstraction representing a file or memory buffer.
*
* Create one using:
* - `cl_fmap_open_handle()` for a file handle, or
* - `cl_fmap_open_memory()` for a memory buffer.
*
* After the scan, you can also get the file hash with `cl_fmap_get_file_hash()`.
*
* @param map Buffer to be scanned, in form of a cl_fmap_t.
* @param filename Name of data origin. Does not need to be an actual
* file on disk. May be NULL if a name is not available.
* @param[out] virname Pointer to receive the signature match name name if a
* signature matched.
* @param[out] scanned Number of bytes scanned / CL_COUNT_PRECISION.
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
* @param engine The scanning engine.
* @param scanoptions The scanning options struct.
* @param context An application-defined context struct, opaque to
* libclamav. May be used within your callback functions.
* @return cl_error_t CL_CLEAN if no signature matched. CL_VIRUS if a
* signature matched. Another CL_E* error code if an
* error occurred.
*/
extern cl_error_t cl_scanmap_callback(
cl_fmap_t *map,
const char *filename,
const char **virname,
unsigned long int *scanned,
const struct cl_engine *engine,
struct cl_scan_options *scanoptions,
void *context);
/**
* @brief Scan custom data, given a map.
*
* Fmaps are an abstraction representing a file or memory buffer.
*
* Create one using:
* - `cl_fmap_open_handle()` for a file handle, or
* - `cl_fmap_open_memory()` for a memory buffer.
*
* After the scan, you can also get the file hash with `cl_fmap_get_file_hash()`.
*
* This extended version of cl_scanmap_callback allows the caller to provide
* additional hints to the scanning engine, such as a file hash and file type.
*
* This variant also upgrades the `scanned` output parameter to a 64-bit integer.
*
* @param map Buffer to be scanned, in form of a cl_fmap_t.
* @param filename Name of data origin. Does not need to be an actual
* file on disk. May be NULL if a name is not available.
* @param[out] virname Pointer to receive the signature match name name if a
* signature matched.
* @param[out] scanned The exact number of bytes scanned.
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
* @param engine The scanning engine.
* @param scanoptions The scanning options struct.
* @param[in,out] context (Optional) An application-defined context struct, opaque to libclamav.
* May be used within your callback functions.
* @param hash_hint (Optional) A NULL terminated string of the file hash so that
* libclamav does not need to calculate it.
* @param[out] hash_out (Optional) A NULL terminated string of the file hash.
* The caller is responsible for freeing the string.
* @param hash_alg The hashing algorithm used for either `hash_hint` or `hash_out`.
* Supported algorithms are "md5", "sha1", "sha2-256".
* If not specified, the default is "sha2-256".
* @param file_type_hint (Optional) A NULL terminated string of the file type hint.
* E.g. "pe", "elf", "zip", etc.
* You may also use ClamAV type names such as "CL_TYPE_PE".
* ClamAV will ignore the hint if it is not familiar with the specified type.
* See also: https://docs.clamav.net/appendix/FileTypes.html#file-types
* @param[out] file_type_out (Optional) A NULL terminated string of the file type
* of the top layer as determined by ClamAV.
* Will take the form of the standard ClamAV file type format. E.g. "CL_TYPE_PE".
* See also: https://docs.clamav.net/appendix/FileTypes.html#file-types
* @return cl_error_t CL_CLEAN if no signature matched.
* CL_VIRUS if a signature matched.
* Another CL_E* error code if an error occurred.
*/
extern cl_error_t cl_scanmap_ex(
cl_fmap_t *map,
const char *filename,
const char **virname,
uint64_t *scanned,
const struct cl_engine *engine,
struct cl_scan_options *scanoptions,
void *context,
const char *hash_hint,
char **hash_out,
const char *hash_alg,
const char *file_type_hint,
char **file_type_out);
/* ----------------------------------------------------------------------------
* Database handling.
*/
/**
* @brief Load the signature databases found at the path.
*
* @param path May be a file or directory.
* @param engine The engine to load the signatures into
* @param[out] signo The number of signatures loaded
* @param dboptions Database load bitflag field. See the CL_DB_* defines, above.
* @return cl_error_t
*/
bb12506: Fix phishing/heuristic alert verbosity Some detections, like phishing, are considered heuristic alerts because they match based on behavior more than on content. A subset of these are considered "potentially unwanted" (low-severity). These low-severity alerts include: - phishing - PDFs with obfuscated object names - bytecode signature alerts that start with "BC.Heuristics" The concept is that unless you enable "heuristic precedence" (a method of lowing the threshold to immediateley alert on low-severity detections), the scan should continue after a match in case a higher severity match is found. Only at the end will it print the low-severity match if nothing else was found. The current implementation is buggy though. Scanning of archives does not correctly bail out for the entire archive if one email contains a phishing link. Instead, it sets the "heuristic found" flag then and alerts for every subsequent file in the archive because it doesn't know if the heuristic was found in an embedded file or the target file. Because it's just a heuristic and the status is "clean", it keeps scanning. This patch corrects the behavior by checking if a low-severity alerts were found at the end of scanning the target file, instead of at the end of each embedded file. Additionally, this patch fixes an in issue with phishing alerts wherein heuristic precedence mode did not cause a scan to stop after the first alert. The above changes required restructuring to create an fmap inside of cl_scandesc_callback() so that scan_common() could be modified to require an fmap and set up so that the current *ctx->fmap pointer is never NULL when scan_common() evaluates match results. Also fixed a couple minor bugs in the phishing unit tests and cleaned up the test code for improved legitibility and type safety.
2020-04-18 10:46:57 -04:00
extern cl_error_t cl_load(const char *path, struct cl_engine *engine, unsigned int *signo, unsigned int dboptions);
/**
* @brief Get the default database directory path.
*
* @return const char*
*/
extern const char *cl_retdbdir(void);
/* ----------------------------------------------------------------------------
* CVD / database functions
*/
2003-07-29 15:48:06 +00:00
struct cl_cvd { /* field no. */
char *time; /* 2 */
unsigned int version; /* 3 */
unsigned int sigs; /* 4 */
unsigned int fl; /* 5 */
/* padding */
char *md5; /* 6 */
char *dsig; /* 7 */
char *builder; /* 8 */
unsigned int stime; /* 9 */
2003-09-29 11:44:52 +00:00
};
/**
* @brief Read the CVD header data from a file.
*
* The returned pointer must be free'd with cl_cvdfree().
*
* @param file Filepath of CVD file.
* @return struct cl_cvd* Pointer to an allocated CVD header data structure.
*/
2003-09-29 11:44:52 +00:00
extern struct cl_cvd *cl_cvdhead(const char *file);
/**
* @brief Parse the CVD header.
*
* Buffer length is not an argument, and the check must be done
* by the caller cl_cvdhead().
*
* The returned pointer must be free'd with cl_cvdfree().
*
* @param head Pointer to the header data buffer.
* @return struct cl_cvd* Pointer to an allocated CVD header data structure.
*/
2003-10-26 06:01:03 +00:00
extern struct cl_cvd *cl_cvdparse(const char *head);
/**
* @brief Verify a CVD file by loading and unloading it.
*
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
* @deprecated This function is deprecated and will be removed in a future release. Use cl_cvdverify_ex() instead.
FIPS-compliant CVD signing and verification Add X509 certificate chain based signing with PKCS7-PEM external signatures distributed alongside CVD's in a custom .cvd.sign format. This new signing and verification mechanism is primarily in support of FIPS compliance. Fixes: https://github.com/Cisco-Talos/clamav/issues/564 Add a Rust implementation for parsing, verifying, and unpacking CVD files. Now installs a 'certs' directory in the app config directory (e.g. <prefix>/etc/certs). The install location is configurable. The CMake option to configure the CVD certs directory is: `-D CVD_CERTS_DIRECTORY=PATH` New options to set an alternative CVD certs directory: - Commandline for freshclam, clamd, clamscan, and sigtool is: `--cvdcertsdir PATH` - Env variable for freshclam, clamd, clamscan, and sigtool is: `CVD_CERTS_DIR` - Config option for freshclam and clamd is: `CVDCertsDirectory PATH` Sigtool: - Add sign/verify commands. - Also verify CDIFF external digital signatures when applying CDIFFs. - Place commonly used commands at the top of --help string. - Fix up manpage. Freshclam: - Will try to download .sign files to verify CVDs and CDIFFs. - Fix an issue where making a CLD would only include the CFG file for daily and not if patching any other database. libclamav.so: - Bump version to 13:0:1 (aka 12.1.0). - Also remove libclamav.map versioning. Resolves: https://github.com/Cisco-Talos/clamav/issues/1304 - Add two new API's to the public clamav.h header: ```c extern cl_error_t cl_cvdverify_ex(const char *file, const char *certs_directory); extern cl_error_t cl_cvdunpack_ex(const char *file, const char *dir, bool dont_verify, const char *certs_directory); ``` The original `cl_cvdverify` and `cl_cvdunpack` are deprecated. - Add `cl_engine_field` enum option `CL_ENGINE_CVDCERTSDIR`. You may set this option with `cl_engine_set_str` and get it with `cl_engine_get_str`, to override the compiled in default CVD certs directory. libfreshclam.so: Bump version to 4:0:0 (aka 4.0.0). Add sigtool sign/verify tests and test certs. Make it so downloadFile doesn't throw a warning if the server doesn't have the .sign file. Replace use of md5-based FP signatures in the unit tests with sha256-based FP signatures because the md5 implementation used by Python may be disabled in FIPS mode. Fixes: https://github.com/Cisco-Talos/clamav/issues/1411 CMake: Add logic to enable the Rust openssl-sys / openssl-rs crates to build against the same OpenSSL library as is used for the C build. The Rust unit test application must also link directly with libcrypto and libssl. Fix some log messages with missing new lines. Fix missing environment variable notes in --help messages and manpages. Deconflict CONFDIR/DATADIR/CERTSDIR variable names that are defined in clamav-config.h.in for libclamav from variable that had the same name for use in clamav applications that use the optparser. The 'clamav-test' certs for the unit tests will live for 10 years. The 'clamav-beta.crt' public cert will only live for 120 days and will be replaced before the stable release with a production 'clamav.crt'.
2024-11-21 14:01:09 -05:00
*
* @param file Filepath of CVD file.
* @return cl_error_t CL_SUCCESS if success, else a CL_E* error code.
*/
bb12506: Fix phishing/heuristic alert verbosity Some detections, like phishing, are considered heuristic alerts because they match based on behavior more than on content. A subset of these are considered "potentially unwanted" (low-severity). These low-severity alerts include: - phishing - PDFs with obfuscated object names - bytecode signature alerts that start with "BC.Heuristics" The concept is that unless you enable "heuristic precedence" (a method of lowing the threshold to immediateley alert on low-severity detections), the scan should continue after a match in case a higher severity match is found. Only at the end will it print the low-severity match if nothing else was found. The current implementation is buggy though. Scanning of archives does not correctly bail out for the entire archive if one email contains a phishing link. Instead, it sets the "heuristic found" flag then and alerts for every subsequent file in the archive because it doesn't know if the heuristic was found in an embedded file or the target file. Because it's just a heuristic and the status is "clean", it keeps scanning. This patch corrects the behavior by checking if a low-severity alerts were found at the end of scanning the target file, instead of at the end of each embedded file. Additionally, this patch fixes an in issue with phishing alerts wherein heuristic precedence mode did not cause a scan to stop after the first alert. The above changes required restructuring to create an fmap inside of cl_scandesc_callback() so that scan_common() could be modified to require an fmap and set up so that the current *ctx->fmap pointer is never NULL when scan_common() evaluates match results. Also fixed a couple minor bugs in the phishing unit tests and cleaned up the test code for improved legitibility and type safety.
2020-04-18 10:46:57 -04:00
extern cl_error_t cl_cvdverify(const char *file);
FIPS-compliant CVD signing and verification Add X509 certificate chain based signing with PKCS7-PEM external signatures distributed alongside CVD's in a custom .cvd.sign format. This new signing and verification mechanism is primarily in support of FIPS compliance. Fixes: https://github.com/Cisco-Talos/clamav/issues/564 Add a Rust implementation for parsing, verifying, and unpacking CVD files. Now installs a 'certs' directory in the app config directory (e.g. <prefix>/etc/certs). The install location is configurable. The CMake option to configure the CVD certs directory is: `-D CVD_CERTS_DIRECTORY=PATH` New options to set an alternative CVD certs directory: - Commandline for freshclam, clamd, clamscan, and sigtool is: `--cvdcertsdir PATH` - Env variable for freshclam, clamd, clamscan, and sigtool is: `CVD_CERTS_DIR` - Config option for freshclam and clamd is: `CVDCertsDirectory PATH` Sigtool: - Add sign/verify commands. - Also verify CDIFF external digital signatures when applying CDIFFs. - Place commonly used commands at the top of --help string. - Fix up manpage. Freshclam: - Will try to download .sign files to verify CVDs and CDIFFs. - Fix an issue where making a CLD would only include the CFG file for daily and not if patching any other database. libclamav.so: - Bump version to 13:0:1 (aka 12.1.0). - Also remove libclamav.map versioning. Resolves: https://github.com/Cisco-Talos/clamav/issues/1304 - Add two new API's to the public clamav.h header: ```c extern cl_error_t cl_cvdverify_ex(const char *file, const char *certs_directory); extern cl_error_t cl_cvdunpack_ex(const char *file, const char *dir, bool dont_verify, const char *certs_directory); ``` The original `cl_cvdverify` and `cl_cvdunpack` are deprecated. - Add `cl_engine_field` enum option `CL_ENGINE_CVDCERTSDIR`. You may set this option with `cl_engine_set_str` and get it with `cl_engine_get_str`, to override the compiled in default CVD certs directory. libfreshclam.so: Bump version to 4:0:0 (aka 4.0.0). Add sigtool sign/verify tests and test certs. Make it so downloadFile doesn't throw a warning if the server doesn't have the .sign file. Replace use of md5-based FP signatures in the unit tests with sha256-based FP signatures because the md5 implementation used by Python may be disabled in FIPS mode. Fixes: https://github.com/Cisco-Talos/clamav/issues/1411 CMake: Add logic to enable the Rust openssl-sys / openssl-rs crates to build against the same OpenSSL library as is used for the C build. The Rust unit test application must also link directly with libcrypto and libssl. Fix some log messages with missing new lines. Fix missing environment variable notes in --help messages and manpages. Deconflict CONFDIR/DATADIR/CERTSDIR variable names that are defined in clamav-config.h.in for libclamav from variable that had the same name for use in clamav applications that use the optparser. The 'clamav-test' certs for the unit tests will live for 10 years. The 'clamav-beta.crt' public cert will only live for 120 days and will be replaced before the stable release with a production 'clamav.crt'.
2024-11-21 14:01:09 -05:00
/**
* @brief Verify a CVD file by loading and unloading it.
*
FIPS & FIPS-like limits on hash algs for cryptographic uses ClamAV will not function when using a FIPS-enabled OpenSSL 3.x. This is because ClamAV uses MD5 and SHA1 algorithms for a variety of purposes including matching for malware detection, matching to prevent false positives on known-clean files, and for verification of MD5-based RSA digital signatures for determining CVD (signature database archive) authenticity. Interestingly, FIPS had been intentionally bypassed when creating hashes based whole buffers and whole files (by descriptor or `FILE`-pointer): https://github.com/Cisco-Talos/clamav/commit/78d4a9985a06a418dd1338c94ee5db461035d75b Note: this bypassed FIPS the 1.x way with: `EVP_MD_CTX_set_flags(ctx, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);` It was NOT disabled when using `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. That likely worked by coincidence in that the hash was already calculated most of the time. It certainly would have made use of those functions if the hash had not been calculated prior: https://github.com/Cisco-Talos/clamav/blob/78d4a9985a06a418dd1338c94ee5db461035d75b/libclamav/matcher.c#L743 Regardless, bypassing FIPS entirely is not the correct solution. The FIPS restrictions against using MD5 and SHA1 are valid, particularly when verifying CVD digital siganatures, but also I think when using a hash to determine if the file is known-clean (i.e. the "clean cache" and also MD5-based and SHA1-based FP signatures). This commit extends the work to bypass FIPS using the newer 3.x method: `md = EVP_MD_fetch(NULL, alg, "-fips");` It does this for the legacy `cl_hash*()` functions including `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. It also introduces extended versions that allow the caller to choose if they want to bypass FIPS: - `cl_hash_data_ex()` - `cl_hash_init_ex()` - `cl_update_hash_ex()` - `cl_finish_hash_ex()` - `cl_hash_destroy_ex()` - `cl_hash_file_fd_ex()` See the `flags` parameter for each. Ironically, this commit does NOT use the new functions at this time. The rational is that ClamAV may need MD5, SHA1, and SHA-256 hashes of the same files both for determining if the file is malware, and for determining if the file is clean. So instead, this commit will do a checks when: 1. Creating a new ClamAV scanning engine. If FIPS-mode enabled, it will automatically toggle the "FIPS limits" engine option. When loading signatures, if the engine "FIPS limits" option is enabled, then MD5 and SHA1 FP signatures will be skipped. 2. Before verifying a CVD (e.g. also for loading, unpacking when verification enabled). If "FIPS limits" or FIPS-mode are enabled, then the legacy MD5-based RSA method is disabled. Note: This commit also refactors the interface for `cl_cvdverify_ex()` and `cl_cvdunpack_ex()` so they take a `flags` parameters, rather than a single `bool`. As these functions are new in this version, it does not break the ABI. The cache was already switched to use SHA2-256, so that's not a concern for checking FIPS-mode / FIPS limits options. This adds an option for `freshclam.conf` and `clamd.conf`: FIPSCryptoHashLimits yes And an equivalent command-line option for `clamscan` and `sigtool`: --fips-limits You may programmatically enable FIPS-limits for a ClamAV engine like this: ```C cl_engine_set_num(engine, CL_ENGINE_FIPS_LIMITS, 1); ``` CLAM-2792
2025-07-01 20:41:47 -04:00
* May also verify the CVD digital signature.
*
* @param file Filepath of CVD file.
* @param certs_directory Directory containing CA certificates required to verify the CVD digital signature.
* @param dboptions Bitmask of flags to modify behavior.
* Set CL_DB_FIPS_LIMITS to require the CVD to be signed with a FIPS-compliant external '.sign' file.
* Set CL_DB_UNSIGNED to disable verification of CVD digital signatures. Allow load testing unsigned CVD files.
* @return cl_error_t CL_SUCCESS if success, else a CL_E* error code.
FIPS-compliant CVD signing and verification Add X509 certificate chain based signing with PKCS7-PEM external signatures distributed alongside CVD's in a custom .cvd.sign format. This new signing and verification mechanism is primarily in support of FIPS compliance. Fixes: https://github.com/Cisco-Talos/clamav/issues/564 Add a Rust implementation for parsing, verifying, and unpacking CVD files. Now installs a 'certs' directory in the app config directory (e.g. <prefix>/etc/certs). The install location is configurable. The CMake option to configure the CVD certs directory is: `-D CVD_CERTS_DIRECTORY=PATH` New options to set an alternative CVD certs directory: - Commandline for freshclam, clamd, clamscan, and sigtool is: `--cvdcertsdir PATH` - Env variable for freshclam, clamd, clamscan, and sigtool is: `CVD_CERTS_DIR` - Config option for freshclam and clamd is: `CVDCertsDirectory PATH` Sigtool: - Add sign/verify commands. - Also verify CDIFF external digital signatures when applying CDIFFs. - Place commonly used commands at the top of --help string. - Fix up manpage. Freshclam: - Will try to download .sign files to verify CVDs and CDIFFs. - Fix an issue where making a CLD would only include the CFG file for daily and not if patching any other database. libclamav.so: - Bump version to 13:0:1 (aka 12.1.0). - Also remove libclamav.map versioning. Resolves: https://github.com/Cisco-Talos/clamav/issues/1304 - Add two new API's to the public clamav.h header: ```c extern cl_error_t cl_cvdverify_ex(const char *file, const char *certs_directory); extern cl_error_t cl_cvdunpack_ex(const char *file, const char *dir, bool dont_verify, const char *certs_directory); ``` The original `cl_cvdverify` and `cl_cvdunpack` are deprecated. - Add `cl_engine_field` enum option `CL_ENGINE_CVDCERTSDIR`. You may set this option with `cl_engine_set_str` and get it with `cl_engine_get_str`, to override the compiled in default CVD certs directory. libfreshclam.so: Bump version to 4:0:0 (aka 4.0.0). Add sigtool sign/verify tests and test certs. Make it so downloadFile doesn't throw a warning if the server doesn't have the .sign file. Replace use of md5-based FP signatures in the unit tests with sha256-based FP signatures because the md5 implementation used by Python may be disabled in FIPS mode. Fixes: https://github.com/Cisco-Talos/clamav/issues/1411 CMake: Add logic to enable the Rust openssl-sys / openssl-rs crates to build against the same OpenSSL library as is used for the C build. The Rust unit test application must also link directly with libcrypto and libssl. Fix some log messages with missing new lines. Fix missing environment variable notes in --help messages and manpages. Deconflict CONFDIR/DATADIR/CERTSDIR variable names that are defined in clamav-config.h.in for libclamav from variable that had the same name for use in clamav applications that use the optparser. The 'clamav-test' certs for the unit tests will live for 10 years. The 'clamav-beta.crt' public cert will only live for 120 days and will be replaced before the stable release with a production 'clamav.crt'.
2024-11-21 14:01:09 -05:00
*/
FIPS & FIPS-like limits on hash algs for cryptographic uses ClamAV will not function when using a FIPS-enabled OpenSSL 3.x. This is because ClamAV uses MD5 and SHA1 algorithms for a variety of purposes including matching for malware detection, matching to prevent false positives on known-clean files, and for verification of MD5-based RSA digital signatures for determining CVD (signature database archive) authenticity. Interestingly, FIPS had been intentionally bypassed when creating hashes based whole buffers and whole files (by descriptor or `FILE`-pointer): https://github.com/Cisco-Talos/clamav/commit/78d4a9985a06a418dd1338c94ee5db461035d75b Note: this bypassed FIPS the 1.x way with: `EVP_MD_CTX_set_flags(ctx, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);` It was NOT disabled when using `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. That likely worked by coincidence in that the hash was already calculated most of the time. It certainly would have made use of those functions if the hash had not been calculated prior: https://github.com/Cisco-Talos/clamav/blob/78d4a9985a06a418dd1338c94ee5db461035d75b/libclamav/matcher.c#L743 Regardless, bypassing FIPS entirely is not the correct solution. The FIPS restrictions against using MD5 and SHA1 are valid, particularly when verifying CVD digital siganatures, but also I think when using a hash to determine if the file is known-clean (i.e. the "clean cache" and also MD5-based and SHA1-based FP signatures). This commit extends the work to bypass FIPS using the newer 3.x method: `md = EVP_MD_fetch(NULL, alg, "-fips");` It does this for the legacy `cl_hash*()` functions including `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. It also introduces extended versions that allow the caller to choose if they want to bypass FIPS: - `cl_hash_data_ex()` - `cl_hash_init_ex()` - `cl_update_hash_ex()` - `cl_finish_hash_ex()` - `cl_hash_destroy_ex()` - `cl_hash_file_fd_ex()` See the `flags` parameter for each. Ironically, this commit does NOT use the new functions at this time. The rational is that ClamAV may need MD5, SHA1, and SHA-256 hashes of the same files both for determining if the file is malware, and for determining if the file is clean. So instead, this commit will do a checks when: 1. Creating a new ClamAV scanning engine. If FIPS-mode enabled, it will automatically toggle the "FIPS limits" engine option. When loading signatures, if the engine "FIPS limits" option is enabled, then MD5 and SHA1 FP signatures will be skipped. 2. Before verifying a CVD (e.g. also for loading, unpacking when verification enabled). If "FIPS limits" or FIPS-mode are enabled, then the legacy MD5-based RSA method is disabled. Note: This commit also refactors the interface for `cl_cvdverify_ex()` and `cl_cvdunpack_ex()` so they take a `flags` parameters, rather than a single `bool`. As these functions are new in this version, it does not break the ABI. The cache was already switched to use SHA2-256, so that's not a concern for checking FIPS-mode / FIPS limits options. This adds an option for `freshclam.conf` and `clamd.conf`: FIPSCryptoHashLimits yes And an equivalent command-line option for `clamscan` and `sigtool`: --fips-limits You may programmatically enable FIPS-limits for a ClamAV engine like this: ```C cl_engine_set_num(engine, CL_ENGINE_FIPS_LIMITS, 1); ``` CLAM-2792
2025-07-01 20:41:47 -04:00
extern cl_error_t cl_cvdverify_ex(const char *file, const char *certs_directory, uint32_t dboptions);
FIPS-compliant CVD signing and verification Add X509 certificate chain based signing with PKCS7-PEM external signatures distributed alongside CVD's in a custom .cvd.sign format. This new signing and verification mechanism is primarily in support of FIPS compliance. Fixes: https://github.com/Cisco-Talos/clamav/issues/564 Add a Rust implementation for parsing, verifying, and unpacking CVD files. Now installs a 'certs' directory in the app config directory (e.g. <prefix>/etc/certs). The install location is configurable. The CMake option to configure the CVD certs directory is: `-D CVD_CERTS_DIRECTORY=PATH` New options to set an alternative CVD certs directory: - Commandline for freshclam, clamd, clamscan, and sigtool is: `--cvdcertsdir PATH` - Env variable for freshclam, clamd, clamscan, and sigtool is: `CVD_CERTS_DIR` - Config option for freshclam and clamd is: `CVDCertsDirectory PATH` Sigtool: - Add sign/verify commands. - Also verify CDIFF external digital signatures when applying CDIFFs. - Place commonly used commands at the top of --help string. - Fix up manpage. Freshclam: - Will try to download .sign files to verify CVDs and CDIFFs. - Fix an issue where making a CLD would only include the CFG file for daily and not if patching any other database. libclamav.so: - Bump version to 13:0:1 (aka 12.1.0). - Also remove libclamav.map versioning. Resolves: https://github.com/Cisco-Talos/clamav/issues/1304 - Add two new API's to the public clamav.h header: ```c extern cl_error_t cl_cvdverify_ex(const char *file, const char *certs_directory); extern cl_error_t cl_cvdunpack_ex(const char *file, const char *dir, bool dont_verify, const char *certs_directory); ``` The original `cl_cvdverify` and `cl_cvdunpack` are deprecated. - Add `cl_engine_field` enum option `CL_ENGINE_CVDCERTSDIR`. You may set this option with `cl_engine_set_str` and get it with `cl_engine_get_str`, to override the compiled in default CVD certs directory. libfreshclam.so: Bump version to 4:0:0 (aka 4.0.0). Add sigtool sign/verify tests and test certs. Make it so downloadFile doesn't throw a warning if the server doesn't have the .sign file. Replace use of md5-based FP signatures in the unit tests with sha256-based FP signatures because the md5 implementation used by Python may be disabled in FIPS mode. Fixes: https://github.com/Cisco-Talos/clamav/issues/1411 CMake: Add logic to enable the Rust openssl-sys / openssl-rs crates to build against the same OpenSSL library as is used for the C build. The Rust unit test application must also link directly with libcrypto and libssl. Fix some log messages with missing new lines. Fix missing environment variable notes in --help messages and manpages. Deconflict CONFDIR/DATADIR/CERTSDIR variable names that are defined in clamav-config.h.in for libclamav from variable that had the same name for use in clamav applications that use the optparser. The 'clamav-test' certs for the unit tests will live for 10 years. The 'clamav-beta.crt' public cert will only live for 120 days and will be replaced before the stable release with a production 'clamav.crt'.
2024-11-21 14:01:09 -05:00
/**
* @brief Free a CVD header struct.
*
* @param cvd Pointer to a CVD header struct.
*/
2003-09-29 11:44:52 +00:00
extern void cl_cvdfree(struct cl_cvd *cvd);
/**
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
* @brief Unpack a CVD file.
*
* @deprecated This function is deprecated and will be removed in a future release. Use cl_cvdunpack_ex() instead.
*
* Will verify the CVD is correctly signed unless the `dont_verify` parameter is true.
*
FIPS-compliant CVD signing and verification Add X509 certificate chain based signing with PKCS7-PEM external signatures distributed alongside CVD's in a custom .cvd.sign format. This new signing and verification mechanism is primarily in support of FIPS compliance. Fixes: https://github.com/Cisco-Talos/clamav/issues/564 Add a Rust implementation for parsing, verifying, and unpacking CVD files. Now installs a 'certs' directory in the app config directory (e.g. <prefix>/etc/certs). The install location is configurable. The CMake option to configure the CVD certs directory is: `-D CVD_CERTS_DIRECTORY=PATH` New options to set an alternative CVD certs directory: - Commandline for freshclam, clamd, clamscan, and sigtool is: `--cvdcertsdir PATH` - Env variable for freshclam, clamd, clamscan, and sigtool is: `CVD_CERTS_DIR` - Config option for freshclam and clamd is: `CVDCertsDirectory PATH` Sigtool: - Add sign/verify commands. - Also verify CDIFF external digital signatures when applying CDIFFs. - Place commonly used commands at the top of --help string. - Fix up manpage. Freshclam: - Will try to download .sign files to verify CVDs and CDIFFs. - Fix an issue where making a CLD would only include the CFG file for daily and not if patching any other database. libclamav.so: - Bump version to 13:0:1 (aka 12.1.0). - Also remove libclamav.map versioning. Resolves: https://github.com/Cisco-Talos/clamav/issues/1304 - Add two new API's to the public clamav.h header: ```c extern cl_error_t cl_cvdverify_ex(const char *file, const char *certs_directory); extern cl_error_t cl_cvdunpack_ex(const char *file, const char *dir, bool dont_verify, const char *certs_directory); ``` The original `cl_cvdverify` and `cl_cvdunpack` are deprecated. - Add `cl_engine_field` enum option `CL_ENGINE_CVDCERTSDIR`. You may set this option with `cl_engine_set_str` and get it with `cl_engine_get_str`, to override the compiled in default CVD certs directory. libfreshclam.so: Bump version to 4:0:0 (aka 4.0.0). Add sigtool sign/verify tests and test certs. Make it so downloadFile doesn't throw a warning if the server doesn't have the .sign file. Replace use of md5-based FP signatures in the unit tests with sha256-based FP signatures because the md5 implementation used by Python may be disabled in FIPS mode. Fixes: https://github.com/Cisco-Talos/clamav/issues/1411 CMake: Add logic to enable the Rust openssl-sys / openssl-rs crates to build against the same OpenSSL library as is used for the C build. The Rust unit test application must also link directly with libcrypto and libssl. Fix some log messages with missing new lines. Fix missing environment variable notes in --help messages and manpages. Deconflict CONFDIR/DATADIR/CERTSDIR variable names that are defined in clamav-config.h.in for libclamav from variable that had the same name for use in clamav applications that use the optparser. The 'clamav-test' certs for the unit tests will live for 10 years. The 'clamav-beta.crt' public cert will only live for 120 days and will be replaced before the stable release with a production 'clamav.crt'.
2024-11-21 14:01:09 -05:00
* This function is deprecated. Use cl_cvdunpack_ex() instead.
*
* @param file Filepath of CVD file.
* @param dir Destination directory.
* @param dont_verify If true, don't verify the CVD.
* @return cl_error_t CL_SUCCESS if success, else a CL_E* error code.
*/
extern cl_error_t cl_cvdunpack(const char *file, const char *dir, bool dont_verify);
FIPS-compliant CVD signing and verification Add X509 certificate chain based signing with PKCS7-PEM external signatures distributed alongside CVD's in a custom .cvd.sign format. This new signing and verification mechanism is primarily in support of FIPS compliance. Fixes: https://github.com/Cisco-Talos/clamav/issues/564 Add a Rust implementation for parsing, verifying, and unpacking CVD files. Now installs a 'certs' directory in the app config directory (e.g. <prefix>/etc/certs). The install location is configurable. The CMake option to configure the CVD certs directory is: `-D CVD_CERTS_DIRECTORY=PATH` New options to set an alternative CVD certs directory: - Commandline for freshclam, clamd, clamscan, and sigtool is: `--cvdcertsdir PATH` - Env variable for freshclam, clamd, clamscan, and sigtool is: `CVD_CERTS_DIR` - Config option for freshclam and clamd is: `CVDCertsDirectory PATH` Sigtool: - Add sign/verify commands. - Also verify CDIFF external digital signatures when applying CDIFFs. - Place commonly used commands at the top of --help string. - Fix up manpage. Freshclam: - Will try to download .sign files to verify CVDs and CDIFFs. - Fix an issue where making a CLD would only include the CFG file for daily and not if patching any other database. libclamav.so: - Bump version to 13:0:1 (aka 12.1.0). - Also remove libclamav.map versioning. Resolves: https://github.com/Cisco-Talos/clamav/issues/1304 - Add two new API's to the public clamav.h header: ```c extern cl_error_t cl_cvdverify_ex(const char *file, const char *certs_directory); extern cl_error_t cl_cvdunpack_ex(const char *file, const char *dir, bool dont_verify, const char *certs_directory); ``` The original `cl_cvdverify` and `cl_cvdunpack` are deprecated. - Add `cl_engine_field` enum option `CL_ENGINE_CVDCERTSDIR`. You may set this option with `cl_engine_set_str` and get it with `cl_engine_get_str`, to override the compiled in default CVD certs directory. libfreshclam.so: Bump version to 4:0:0 (aka 4.0.0). Add sigtool sign/verify tests and test certs. Make it so downloadFile doesn't throw a warning if the server doesn't have the .sign file. Replace use of md5-based FP signatures in the unit tests with sha256-based FP signatures because the md5 implementation used by Python may be disabled in FIPS mode. Fixes: https://github.com/Cisco-Talos/clamav/issues/1411 CMake: Add logic to enable the Rust openssl-sys / openssl-rs crates to build against the same OpenSSL library as is used for the C build. The Rust unit test application must also link directly with libcrypto and libssl. Fix some log messages with missing new lines. Fix missing environment variable notes in --help messages and manpages. Deconflict CONFDIR/DATADIR/CERTSDIR variable names that are defined in clamav-config.h.in for libclamav from variable that had the same name for use in clamav applications that use the optparser. The 'clamav-test' certs for the unit tests will live for 10 years. The 'clamav-beta.crt' public cert will only live for 120 days and will be replaced before the stable release with a production 'clamav.crt'.
2024-11-21 14:01:09 -05:00
/**
* @brief Unpack a CVD file.
*
* Will verify the CVD is correctly signed unless the `dont_verify` parameter is true.
*
* @param file Filepath of CVD file.
* @param dir Destination directory.
* @param certs_directory Path where ClamAV public certs are located, needed to verify external digital signatures.
FIPS & FIPS-like limits on hash algs for cryptographic uses ClamAV will not function when using a FIPS-enabled OpenSSL 3.x. This is because ClamAV uses MD5 and SHA1 algorithms for a variety of purposes including matching for malware detection, matching to prevent false positives on known-clean files, and for verification of MD5-based RSA digital signatures for determining CVD (signature database archive) authenticity. Interestingly, FIPS had been intentionally bypassed when creating hashes based whole buffers and whole files (by descriptor or `FILE`-pointer): https://github.com/Cisco-Talos/clamav/commit/78d4a9985a06a418dd1338c94ee5db461035d75b Note: this bypassed FIPS the 1.x way with: `EVP_MD_CTX_set_flags(ctx, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);` It was NOT disabled when using `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. That likely worked by coincidence in that the hash was already calculated most of the time. It certainly would have made use of those functions if the hash had not been calculated prior: https://github.com/Cisco-Talos/clamav/blob/78d4a9985a06a418dd1338c94ee5db461035d75b/libclamav/matcher.c#L743 Regardless, bypassing FIPS entirely is not the correct solution. The FIPS restrictions against using MD5 and SHA1 are valid, particularly when verifying CVD digital siganatures, but also I think when using a hash to determine if the file is known-clean (i.e. the "clean cache" and also MD5-based and SHA1-based FP signatures). This commit extends the work to bypass FIPS using the newer 3.x method: `md = EVP_MD_fetch(NULL, alg, "-fips");` It does this for the legacy `cl_hash*()` functions including `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. It also introduces extended versions that allow the caller to choose if they want to bypass FIPS: - `cl_hash_data_ex()` - `cl_hash_init_ex()` - `cl_update_hash_ex()` - `cl_finish_hash_ex()` - `cl_hash_destroy_ex()` - `cl_hash_file_fd_ex()` See the `flags` parameter for each. Ironically, this commit does NOT use the new functions at this time. The rational is that ClamAV may need MD5, SHA1, and SHA-256 hashes of the same files both for determining if the file is malware, and for determining if the file is clean. So instead, this commit will do a checks when: 1. Creating a new ClamAV scanning engine. If FIPS-mode enabled, it will automatically toggle the "FIPS limits" engine option. When loading signatures, if the engine "FIPS limits" option is enabled, then MD5 and SHA1 FP signatures will be skipped. 2. Before verifying a CVD (e.g. also for loading, unpacking when verification enabled). If "FIPS limits" or FIPS-mode are enabled, then the legacy MD5-based RSA method is disabled. Note: This commit also refactors the interface for `cl_cvdverify_ex()` and `cl_cvdunpack_ex()` so they take a `flags` parameters, rather than a single `bool`. As these functions are new in this version, it does not break the ABI. The cache was already switched to use SHA2-256, so that's not a concern for checking FIPS-mode / FIPS limits options. This adds an option for `freshclam.conf` and `clamd.conf`: FIPSCryptoHashLimits yes And an equivalent command-line option for `clamscan` and `sigtool`: --fips-limits You may programmatically enable FIPS-limits for a ClamAV engine like this: ```C cl_engine_set_num(engine, CL_ENGINE_FIPS_LIMITS, 1); ``` CLAM-2792
2025-07-01 20:41:47 -04:00
* @param dboptions Bitmask of flags to modify behavior.
* Set CL_DB_FIPS_LIMITS to require the CVD to be signed with a FIPS-compliant external '.sign' file.
* Set CL_DB_UNSIGNED to disable verification of CVD digital signatures. Allow load testing unsigned CVD files.
FIPS-compliant CVD signing and verification Add X509 certificate chain based signing with PKCS7-PEM external signatures distributed alongside CVD's in a custom .cvd.sign format. This new signing and verification mechanism is primarily in support of FIPS compliance. Fixes: https://github.com/Cisco-Talos/clamav/issues/564 Add a Rust implementation for parsing, verifying, and unpacking CVD files. Now installs a 'certs' directory in the app config directory (e.g. <prefix>/etc/certs). The install location is configurable. The CMake option to configure the CVD certs directory is: `-D CVD_CERTS_DIRECTORY=PATH` New options to set an alternative CVD certs directory: - Commandline for freshclam, clamd, clamscan, and sigtool is: `--cvdcertsdir PATH` - Env variable for freshclam, clamd, clamscan, and sigtool is: `CVD_CERTS_DIR` - Config option for freshclam and clamd is: `CVDCertsDirectory PATH` Sigtool: - Add sign/verify commands. - Also verify CDIFF external digital signatures when applying CDIFFs. - Place commonly used commands at the top of --help string. - Fix up manpage. Freshclam: - Will try to download .sign files to verify CVDs and CDIFFs. - Fix an issue where making a CLD would only include the CFG file for daily and not if patching any other database. libclamav.so: - Bump version to 13:0:1 (aka 12.1.0). - Also remove libclamav.map versioning. Resolves: https://github.com/Cisco-Talos/clamav/issues/1304 - Add two new API's to the public clamav.h header: ```c extern cl_error_t cl_cvdverify_ex(const char *file, const char *certs_directory); extern cl_error_t cl_cvdunpack_ex(const char *file, const char *dir, bool dont_verify, const char *certs_directory); ``` The original `cl_cvdverify` and `cl_cvdunpack` are deprecated. - Add `cl_engine_field` enum option `CL_ENGINE_CVDCERTSDIR`. You may set this option with `cl_engine_set_str` and get it with `cl_engine_get_str`, to override the compiled in default CVD certs directory. libfreshclam.so: Bump version to 4:0:0 (aka 4.0.0). Add sigtool sign/verify tests and test certs. Make it so downloadFile doesn't throw a warning if the server doesn't have the .sign file. Replace use of md5-based FP signatures in the unit tests with sha256-based FP signatures because the md5 implementation used by Python may be disabled in FIPS mode. Fixes: https://github.com/Cisco-Talos/clamav/issues/1411 CMake: Add logic to enable the Rust openssl-sys / openssl-rs crates to build against the same OpenSSL library as is used for the C build. The Rust unit test application must also link directly with libcrypto and libssl. Fix some log messages with missing new lines. Fix missing environment variable notes in --help messages and manpages. Deconflict CONFDIR/DATADIR/CERTSDIR variable names that are defined in clamav-config.h.in for libclamav from variable that had the same name for use in clamav applications that use the optparser. The 'clamav-test' certs for the unit tests will live for 10 years. The 'clamav-beta.crt' public cert will only live for 120 days and will be replaced before the stable release with a production 'clamav.crt'.
2024-11-21 14:01:09 -05:00
* @return cl_error_t CL_SUCCESS if success, else a CL_E* error code.
*/
FIPS & FIPS-like limits on hash algs for cryptographic uses ClamAV will not function when using a FIPS-enabled OpenSSL 3.x. This is because ClamAV uses MD5 and SHA1 algorithms for a variety of purposes including matching for malware detection, matching to prevent false positives on known-clean files, and for verification of MD5-based RSA digital signatures for determining CVD (signature database archive) authenticity. Interestingly, FIPS had been intentionally bypassed when creating hashes based whole buffers and whole files (by descriptor or `FILE`-pointer): https://github.com/Cisco-Talos/clamav/commit/78d4a9985a06a418dd1338c94ee5db461035d75b Note: this bypassed FIPS the 1.x way with: `EVP_MD_CTX_set_flags(ctx, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);` It was NOT disabled when using `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. That likely worked by coincidence in that the hash was already calculated most of the time. It certainly would have made use of those functions if the hash had not been calculated prior: https://github.com/Cisco-Talos/clamav/blob/78d4a9985a06a418dd1338c94ee5db461035d75b/libclamav/matcher.c#L743 Regardless, bypassing FIPS entirely is not the correct solution. The FIPS restrictions against using MD5 and SHA1 are valid, particularly when verifying CVD digital siganatures, but also I think when using a hash to determine if the file is known-clean (i.e. the "clean cache" and also MD5-based and SHA1-based FP signatures). This commit extends the work to bypass FIPS using the newer 3.x method: `md = EVP_MD_fetch(NULL, alg, "-fips");` It does this for the legacy `cl_hash*()` functions including `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. It also introduces extended versions that allow the caller to choose if they want to bypass FIPS: - `cl_hash_data_ex()` - `cl_hash_init_ex()` - `cl_update_hash_ex()` - `cl_finish_hash_ex()` - `cl_hash_destroy_ex()` - `cl_hash_file_fd_ex()` See the `flags` parameter for each. Ironically, this commit does NOT use the new functions at this time. The rational is that ClamAV may need MD5, SHA1, and SHA-256 hashes of the same files both for determining if the file is malware, and for determining if the file is clean. So instead, this commit will do a checks when: 1. Creating a new ClamAV scanning engine. If FIPS-mode enabled, it will automatically toggle the "FIPS limits" engine option. When loading signatures, if the engine "FIPS limits" option is enabled, then MD5 and SHA1 FP signatures will be skipped. 2. Before verifying a CVD (e.g. also for loading, unpacking when verification enabled). If "FIPS limits" or FIPS-mode are enabled, then the legacy MD5-based RSA method is disabled. Note: This commit also refactors the interface for `cl_cvdverify_ex()` and `cl_cvdunpack_ex()` so they take a `flags` parameters, rather than a single `bool`. As these functions are new in this version, it does not break the ABI. The cache was already switched to use SHA2-256, so that's not a concern for checking FIPS-mode / FIPS limits options. This adds an option for `freshclam.conf` and `clamd.conf`: FIPSCryptoHashLimits yes And an equivalent command-line option for `clamscan` and `sigtool`: --fips-limits You may programmatically enable FIPS-limits for a ClamAV engine like this: ```C cl_engine_set_num(engine, CL_ENGINE_FIPS_LIMITS, 1); ``` CLAM-2792
2025-07-01 20:41:47 -04:00
cl_error_t cl_cvdunpack_ex(const char *file, const char *dir, const char *certs_directory, uint32_t dboptions);
FIPS-compliant CVD signing and verification Add X509 certificate chain based signing with PKCS7-PEM external signatures distributed alongside CVD's in a custom .cvd.sign format. This new signing and verification mechanism is primarily in support of FIPS compliance. Fixes: https://github.com/Cisco-Talos/clamav/issues/564 Add a Rust implementation for parsing, verifying, and unpacking CVD files. Now installs a 'certs' directory in the app config directory (e.g. <prefix>/etc/certs). The install location is configurable. The CMake option to configure the CVD certs directory is: `-D CVD_CERTS_DIRECTORY=PATH` New options to set an alternative CVD certs directory: - Commandline for freshclam, clamd, clamscan, and sigtool is: `--cvdcertsdir PATH` - Env variable for freshclam, clamd, clamscan, and sigtool is: `CVD_CERTS_DIR` - Config option for freshclam and clamd is: `CVDCertsDirectory PATH` Sigtool: - Add sign/verify commands. - Also verify CDIFF external digital signatures when applying CDIFFs. - Place commonly used commands at the top of --help string. - Fix up manpage. Freshclam: - Will try to download .sign files to verify CVDs and CDIFFs. - Fix an issue where making a CLD would only include the CFG file for daily and not if patching any other database. libclamav.so: - Bump version to 13:0:1 (aka 12.1.0). - Also remove libclamav.map versioning. Resolves: https://github.com/Cisco-Talos/clamav/issues/1304 - Add two new API's to the public clamav.h header: ```c extern cl_error_t cl_cvdverify_ex(const char *file, const char *certs_directory); extern cl_error_t cl_cvdunpack_ex(const char *file, const char *dir, bool dont_verify, const char *certs_directory); ``` The original `cl_cvdverify` and `cl_cvdunpack` are deprecated. - Add `cl_engine_field` enum option `CL_ENGINE_CVDCERTSDIR`. You may set this option with `cl_engine_set_str` and get it with `cl_engine_get_str`, to override the compiled in default CVD certs directory. libfreshclam.so: Bump version to 4:0:0 (aka 4.0.0). Add sigtool sign/verify tests and test certs. Make it so downloadFile doesn't throw a warning if the server doesn't have the .sign file. Replace use of md5-based FP signatures in the unit tests with sha256-based FP signatures because the md5 implementation used by Python may be disabled in FIPS mode. Fixes: https://github.com/Cisco-Talos/clamav/issues/1411 CMake: Add logic to enable the Rust openssl-sys / openssl-rs crates to build against the same OpenSSL library as is used for the C build. The Rust unit test application must also link directly with libcrypto and libssl. Fix some log messages with missing new lines. Fix missing environment variable notes in --help messages and manpages. Deconflict CONFDIR/DATADIR/CERTSDIR variable names that are defined in clamav-config.h.in for libclamav from variable that had the same name for use in clamav applications that use the optparser. The 'clamav-test' certs for the unit tests will live for 10 years. The 'clamav-beta.crt' public cert will only live for 120 days and will be replaced before the stable release with a production 'clamav.crt'.
2024-11-21 14:01:09 -05:00
/**
* @brief Retrieve the age of CVD disk data.
*
* Will retrieve the age of the youngest file in a database directory,
* or the age of a single CVD (or CLD) file.
*
* @param path Filepath of CVD directory or file.
* @param age_seconds Age of the directory or file.
* @return cl_error_t CL_SUCCESS if success, else a CL_E* error code.
*/
extern cl_error_t cl_cvdgetage(const char *path, time_t *age_seconds);
/* ----------------------------------------------------------------------------
* DB directory stat functions.
* Use these functions to watch for database changes.
*/
struct cl_stat {
char *dir;
STATBUF *stattab;
char **statdname;
unsigned int entries;
};
/**
* @brief Initialize a directory to be watched for database changes.
*
* The dbstat out variable is allocated and must be freed using cl_statfree().
*
* @param dirname Pathname of the database directory.
* @param[out] dbstat dbstat handle.
* @return cl_error_t CL_SUCCESS if successfully initialized.
*/
bb12506: Fix phishing/heuristic alert verbosity Some detections, like phishing, are considered heuristic alerts because they match based on behavior more than on content. A subset of these are considered "potentially unwanted" (low-severity). These low-severity alerts include: - phishing - PDFs with obfuscated object names - bytecode signature alerts that start with "BC.Heuristics" The concept is that unless you enable "heuristic precedence" (a method of lowing the threshold to immediateley alert on low-severity detections), the scan should continue after a match in case a higher severity match is found. Only at the end will it print the low-severity match if nothing else was found. The current implementation is buggy though. Scanning of archives does not correctly bail out for the entire archive if one email contains a phishing link. Instead, it sets the "heuristic found" flag then and alerts for every subsequent file in the archive because it doesn't know if the heuristic was found in an embedded file or the target file. Because it's just a heuristic and the status is "clean", it keeps scanning. This patch corrects the behavior by checking if a low-severity alerts were found at the end of scanning the target file, instead of at the end of each embedded file. Additionally, this patch fixes an in issue with phishing alerts wherein heuristic precedence mode did not cause a scan to stop after the first alert. The above changes required restructuring to create an fmap inside of cl_scandesc_callback() so that scan_common() could be modified to require an fmap and set up so that the current *ctx->fmap pointer is never NULL when scan_common() evaluates match results. Also fixed a couple minor bugs in the phishing unit tests and cleaned up the test code for improved legitibility and type safety.
2020-04-18 10:46:57 -04:00
extern cl_error_t cl_statinidir(const char *dirname, struct cl_stat *dbstat);
/**
* @brief Check the database directory for changes.
*
* @param dbstat dbstat handle.
* @return int 0 No change.
2023-11-26 15:01:19 -08:00
* @return int 1 Some change occurred.
*/
2003-07-29 15:48:06 +00:00
extern int cl_statchkdir(const struct cl_stat *dbstat);
/**
* @brief Free the dbstat handle.
*
* @param dbstat dbstat handle.
* @return cl_error_t CL_SUCCESS
* @return cl_error_t CL_ENULLARG
*/
bb12506: Fix phishing/heuristic alert verbosity Some detections, like phishing, are considered heuristic alerts because they match based on behavior more than on content. A subset of these are considered "potentially unwanted" (low-severity). These low-severity alerts include: - phishing - PDFs with obfuscated object names - bytecode signature alerts that start with "BC.Heuristics" The concept is that unless you enable "heuristic precedence" (a method of lowing the threshold to immediateley alert on low-severity detections), the scan should continue after a match in case a higher severity match is found. Only at the end will it print the low-severity match if nothing else was found. The current implementation is buggy though. Scanning of archives does not correctly bail out for the entire archive if one email contains a phishing link. Instead, it sets the "heuristic found" flag then and alerts for every subsequent file in the archive because it doesn't know if the heuristic was found in an embedded file or the target file. Because it's just a heuristic and the status is "clean", it keeps scanning. This patch corrects the behavior by checking if a low-severity alerts were found at the end of scanning the target file, instead of at the end of each embedded file. Additionally, this patch fixes an in issue with phishing alerts wherein heuristic precedence mode did not cause a scan to stop after the first alert. The above changes required restructuring to create an fmap inside of cl_scandesc_callback() so that scan_common() could be modified to require an fmap and set up so that the current *ctx->fmap pointer is never NULL when scan_common() evaluates match results. Also fixed a couple minor bugs in the phishing unit tests and cleaned up the test code for improved legitibility and type safety.
2020-04-18 10:46:57 -04:00
extern cl_error_t cl_statfree(struct cl_stat *dbstat);
2003-07-29 15:48:06 +00:00
/**
* @brief Count the number of signatures in a database file or directory.
*
* @param path Path of the database file or directory.
* @param countoptions A bitflag field. May be CL_COUNTSIGS_OFFICIAL, CL_COUNTSIGS_UNOFFICIAL, or CL_COUNTSIGS_ALL.
* @param[out] sigs The number of sigs.
* @return cl_error_t CL_SUCCESS if success, else a CL_E* error type.
*/
bb12506: Fix phishing/heuristic alert verbosity Some detections, like phishing, are considered heuristic alerts because they match based on behavior more than on content. A subset of these are considered "potentially unwanted" (low-severity). These low-severity alerts include: - phishing - PDFs with obfuscated object names - bytecode signature alerts that start with "BC.Heuristics" The concept is that unless you enable "heuristic precedence" (a method of lowing the threshold to immediateley alert on low-severity detections), the scan should continue after a match in case a higher severity match is found. Only at the end will it print the low-severity match if nothing else was found. The current implementation is buggy though. Scanning of archives does not correctly bail out for the entire archive if one email contains a phishing link. Instead, it sets the "heuristic found" flag then and alerts for every subsequent file in the archive because it doesn't know if the heuristic was found in an embedded file or the target file. Because it's just a heuristic and the status is "clean", it keeps scanning. This patch corrects the behavior by checking if a low-severity alerts were found at the end of scanning the target file, instead of at the end of each embedded file. Additionally, this patch fixes an in issue with phishing alerts wherein heuristic precedence mode did not cause a scan to stop after the first alert. The above changes required restructuring to create an fmap inside of cl_scandesc_callback() so that scan_common() could be modified to require an fmap and set up so that the current *ctx->fmap pointer is never NULL when scan_common() evaluates match results. Also fixed a couple minor bugs in the phishing unit tests and cleaned up the test code for improved legitibility and type safety.
2020-04-18 10:46:57 -04:00
extern cl_error_t cl_countsigs(const char *path, unsigned int countoptions, unsigned int *sigs);
/* ----------------------------------------------------------------------------
* Software versions.
*/
2003-07-29 15:48:06 +00:00
/**
* @brief Get the Functionality Level (FLEVEL).
*
* @return unsigned int The FLEVEL.
*/
extern unsigned int cl_retflevel(void);
/**
* @brief Get the ClamAV version string.
*
* E.g. clamav-0.100.0-beta
*
* @return const char* The version string.
*/
extern const char *cl_retver(void);
2003-07-29 15:48:06 +00:00
/* ----------------------------------------------------------------------------
* Others.
*/
extern const char *cl_strerror(cl_error_t clerror);
2003-07-29 15:48:06 +00:00
/* ----------------------------------------------------------------------------
FIPS & FIPS-like limits on hash algs for cryptographic uses ClamAV will not function when using a FIPS-enabled OpenSSL 3.x. This is because ClamAV uses MD5 and SHA1 algorithms for a variety of purposes including matching for malware detection, matching to prevent false positives on known-clean files, and for verification of MD5-based RSA digital signatures for determining CVD (signature database archive) authenticity. Interestingly, FIPS had been intentionally bypassed when creating hashes based whole buffers and whole files (by descriptor or `FILE`-pointer): https://github.com/Cisco-Talos/clamav/commit/78d4a9985a06a418dd1338c94ee5db461035d75b Note: this bypassed FIPS the 1.x way with: `EVP_MD_CTX_set_flags(ctx, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);` It was NOT disabled when using `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. That likely worked by coincidence in that the hash was already calculated most of the time. It certainly would have made use of those functions if the hash had not been calculated prior: https://github.com/Cisco-Talos/clamav/blob/78d4a9985a06a418dd1338c94ee5db461035d75b/libclamav/matcher.c#L743 Regardless, bypassing FIPS entirely is not the correct solution. The FIPS restrictions against using MD5 and SHA1 are valid, particularly when verifying CVD digital siganatures, but also I think when using a hash to determine if the file is known-clean (i.e. the "clean cache" and also MD5-based and SHA1-based FP signatures). This commit extends the work to bypass FIPS using the newer 3.x method: `md = EVP_MD_fetch(NULL, alg, "-fips");` It does this for the legacy `cl_hash*()` functions including `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. It also introduces extended versions that allow the caller to choose if they want to bypass FIPS: - `cl_hash_data_ex()` - `cl_hash_init_ex()` - `cl_update_hash_ex()` - `cl_finish_hash_ex()` - `cl_hash_destroy_ex()` - `cl_hash_file_fd_ex()` See the `flags` parameter for each. Ironically, this commit does NOT use the new functions at this time. The rational is that ClamAV may need MD5, SHA1, and SHA-256 hashes of the same files both for determining if the file is malware, and for determining if the file is clean. So instead, this commit will do a checks when: 1. Creating a new ClamAV scanning engine. If FIPS-mode enabled, it will automatically toggle the "FIPS limits" engine option. When loading signatures, if the engine "FIPS limits" option is enabled, then MD5 and SHA1 FP signatures will be skipped. 2. Before verifying a CVD (e.g. also for loading, unpacking when verification enabled). If "FIPS limits" or FIPS-mode are enabled, then the legacy MD5-based RSA method is disabled. Note: This commit also refactors the interface for `cl_cvdverify_ex()` and `cl_cvdunpack_ex()` so they take a `flags` parameters, rather than a single `bool`. As these functions are new in this version, it does not break the ABI. The cache was already switched to use SHA2-256, so that's not a concern for checking FIPS-mode / FIPS limits options. This adds an option for `freshclam.conf` and `clamd.conf`: FIPSCryptoHashLimits yes And an equivalent command-line option for `clamscan` and `sigtool`: --fips-limits You may programmatically enable FIPS-limits for a ClamAV engine like this: ```C cl_engine_set_num(engine, CL_ENGINE_FIPS_LIMITS, 1); ``` CLAM-2792
2025-07-01 20:41:47 -04:00
* Hashing functions.
*/
#define MD5_HASH_SIZE 16
2014-07-01 19:38:01 -04:00
#define SHA1_HASH_SIZE 20
#define SHA256_HASH_SIZE 32
#define SHA384_HASH_SIZE 48
#define SHA512_HASH_SIZE 64
2014-07-01 19:38:01 -04:00
FIPS & FIPS-like limits on hash algs for cryptographic uses ClamAV will not function when using a FIPS-enabled OpenSSL 3.x. This is because ClamAV uses MD5 and SHA1 algorithms for a variety of purposes including matching for malware detection, matching to prevent false positives on known-clean files, and for verification of MD5-based RSA digital signatures for determining CVD (signature database archive) authenticity. Interestingly, FIPS had been intentionally bypassed when creating hashes based whole buffers and whole files (by descriptor or `FILE`-pointer): https://github.com/Cisco-Talos/clamav/commit/78d4a9985a06a418dd1338c94ee5db461035d75b Note: this bypassed FIPS the 1.x way with: `EVP_MD_CTX_set_flags(ctx, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);` It was NOT disabled when using `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. That likely worked by coincidence in that the hash was already calculated most of the time. It certainly would have made use of those functions if the hash had not been calculated prior: https://github.com/Cisco-Talos/clamav/blob/78d4a9985a06a418dd1338c94ee5db461035d75b/libclamav/matcher.c#L743 Regardless, bypassing FIPS entirely is not the correct solution. The FIPS restrictions against using MD5 and SHA1 are valid, particularly when verifying CVD digital siganatures, but also I think when using a hash to determine if the file is known-clean (i.e. the "clean cache" and also MD5-based and SHA1-based FP signatures). This commit extends the work to bypass FIPS using the newer 3.x method: `md = EVP_MD_fetch(NULL, alg, "-fips");` It does this for the legacy `cl_hash*()` functions including `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. It also introduces extended versions that allow the caller to choose if they want to bypass FIPS: - `cl_hash_data_ex()` - `cl_hash_init_ex()` - `cl_update_hash_ex()` - `cl_finish_hash_ex()` - `cl_hash_destroy_ex()` - `cl_hash_file_fd_ex()` See the `flags` parameter for each. Ironically, this commit does NOT use the new functions at this time. The rational is that ClamAV may need MD5, SHA1, and SHA-256 hashes of the same files both for determining if the file is malware, and for determining if the file is clean. So instead, this commit will do a checks when: 1. Creating a new ClamAV scanning engine. If FIPS-mode enabled, it will automatically toggle the "FIPS limits" engine option. When loading signatures, if the engine "FIPS limits" option is enabled, then MD5 and SHA1 FP signatures will be skipped. 2. Before verifying a CVD (e.g. also for loading, unpacking when verification enabled). If "FIPS limits" or FIPS-mode are enabled, then the legacy MD5-based RSA method is disabled. Note: This commit also refactors the interface for `cl_cvdverify_ex()` and `cl_cvdunpack_ex()` so they take a `flags` parameters, rather than a single `bool`. As these functions are new in this version, it does not break the ABI. The cache was already switched to use SHA2-256, so that's not a concern for checking FIPS-mode / FIPS limits options. This adds an option for `freshclam.conf` and `clamd.conf`: FIPSCryptoHashLimits yes And an equivalent command-line option for `clamscan` and `sigtool`: --fips-limits You may programmatically enable FIPS-limits for a ClamAV engine like this: ```C cl_engine_set_num(engine, CL_ENGINE_FIPS_LIMITS, 1); ``` CLAM-2792
2025-07-01 20:41:47 -04:00
#define CL_HASH_FLAG_NONE 0x00 /* None */
#define CL_HASH_FLAG_ALLOCATE 0x01 /* Use CL_HASH_FLAG_ALLOCATE to dynamically allocate the output buffer. \
* If this flag is set, the function will allocate a buffer for the hash and return it. \
* The caller is responsible for freeing the buffer using free(). \
* If this flag is not set, the caller must provide a buffer to store the hash. */
2025-07-01 23:39:58 -04:00
#define CL_HASH_FLAG_FIPS_BYPASS 0x02 /* Use CL_HASH_FLAG_FIPS_BYPASS to bypass FIPS restrictions on which algorithms can be \
* used. This is useful if you want to use algorithms that are not FIPS-approved. \
* For example, you might want to use this flag if you want to use "md5" or "sha1". \
* You should only do this when the hash is used for non-cryptographic purposes. \
FIPS & FIPS-like limits on hash algs for cryptographic uses ClamAV will not function when using a FIPS-enabled OpenSSL 3.x. This is because ClamAV uses MD5 and SHA1 algorithms for a variety of purposes including matching for malware detection, matching to prevent false positives on known-clean files, and for verification of MD5-based RSA digital signatures for determining CVD (signature database archive) authenticity. Interestingly, FIPS had been intentionally bypassed when creating hashes based whole buffers and whole files (by descriptor or `FILE`-pointer): https://github.com/Cisco-Talos/clamav/commit/78d4a9985a06a418dd1338c94ee5db461035d75b Note: this bypassed FIPS the 1.x way with: `EVP_MD_CTX_set_flags(ctx, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);` It was NOT disabled when using `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. That likely worked by coincidence in that the hash was already calculated most of the time. It certainly would have made use of those functions if the hash had not been calculated prior: https://github.com/Cisco-Talos/clamav/blob/78d4a9985a06a418dd1338c94ee5db461035d75b/libclamav/matcher.c#L743 Regardless, bypassing FIPS entirely is not the correct solution. The FIPS restrictions against using MD5 and SHA1 are valid, particularly when verifying CVD digital siganatures, but also I think when using a hash to determine if the file is known-clean (i.e. the "clean cache" and also MD5-based and SHA1-based FP signatures). This commit extends the work to bypass FIPS using the newer 3.x method: `md = EVP_MD_fetch(NULL, alg, "-fips");` It does this for the legacy `cl_hash*()` functions including `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. It also introduces extended versions that allow the caller to choose if they want to bypass FIPS: - `cl_hash_data_ex()` - `cl_hash_init_ex()` - `cl_update_hash_ex()` - `cl_finish_hash_ex()` - `cl_hash_destroy_ex()` - `cl_hash_file_fd_ex()` See the `flags` parameter for each. Ironically, this commit does NOT use the new functions at this time. The rational is that ClamAV may need MD5, SHA1, and SHA-256 hashes of the same files both for determining if the file is malware, and for determining if the file is clean. So instead, this commit will do a checks when: 1. Creating a new ClamAV scanning engine. If FIPS-mode enabled, it will automatically toggle the "FIPS limits" engine option. When loading signatures, if the engine "FIPS limits" option is enabled, then MD5 and SHA1 FP signatures will be skipped. 2. Before verifying a CVD (e.g. also for loading, unpacking when verification enabled). If "FIPS limits" or FIPS-mode are enabled, then the legacy MD5-based RSA method is disabled. Note: This commit also refactors the interface for `cl_cvdverify_ex()` and `cl_cvdunpack_ex()` so they take a `flags` parameters, rather than a single `bool`. As these functions are new in this version, it does not break the ABI. The cache was already switched to use SHA2-256, so that's not a concern for checking FIPS-mode / FIPS limits options. This adds an option for `freshclam.conf` and `clamd.conf`: FIPSCryptoHashLimits yes And an equivalent command-line option for `clamscan` and `sigtool`: --fips-limits You may programmatically enable FIPS-limits for a ClamAV engine like this: ```C cl_engine_set_num(engine, CL_ENGINE_FIPS_LIMITS, 1); ``` CLAM-2792
2025-07-01 20:41:47 -04:00
* Note: If OpenSSL's FIPS provider is not available, this flag has no effect. */
/**
* @brief Generate a hash of data.
*
FIPS & FIPS-like limits on hash algs for cryptographic uses ClamAV will not function when using a FIPS-enabled OpenSSL 3.x. This is because ClamAV uses MD5 and SHA1 algorithms for a variety of purposes including matching for malware detection, matching to prevent false positives on known-clean files, and for verification of MD5-based RSA digital signatures for determining CVD (signature database archive) authenticity. Interestingly, FIPS had been intentionally bypassed when creating hashes based whole buffers and whole files (by descriptor or `FILE`-pointer): https://github.com/Cisco-Talos/clamav/commit/78d4a9985a06a418dd1338c94ee5db461035d75b Note: this bypassed FIPS the 1.x way with: `EVP_MD_CTX_set_flags(ctx, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);` It was NOT disabled when using `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. That likely worked by coincidence in that the hash was already calculated most of the time. It certainly would have made use of those functions if the hash had not been calculated prior: https://github.com/Cisco-Talos/clamav/blob/78d4a9985a06a418dd1338c94ee5db461035d75b/libclamav/matcher.c#L743 Regardless, bypassing FIPS entirely is not the correct solution. The FIPS restrictions against using MD5 and SHA1 are valid, particularly when verifying CVD digital siganatures, but also I think when using a hash to determine if the file is known-clean (i.e. the "clean cache" and also MD5-based and SHA1-based FP signatures). This commit extends the work to bypass FIPS using the newer 3.x method: `md = EVP_MD_fetch(NULL, alg, "-fips");` It does this for the legacy `cl_hash*()` functions including `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. It also introduces extended versions that allow the caller to choose if they want to bypass FIPS: - `cl_hash_data_ex()` - `cl_hash_init_ex()` - `cl_update_hash_ex()` - `cl_finish_hash_ex()` - `cl_hash_destroy_ex()` - `cl_hash_file_fd_ex()` See the `flags` parameter for each. Ironically, this commit does NOT use the new functions at this time. The rational is that ClamAV may need MD5, SHA1, and SHA-256 hashes of the same files both for determining if the file is malware, and for determining if the file is clean. So instead, this commit will do a checks when: 1. Creating a new ClamAV scanning engine. If FIPS-mode enabled, it will automatically toggle the "FIPS limits" engine option. When loading signatures, if the engine "FIPS limits" option is enabled, then MD5 and SHA1 FP signatures will be skipped. 2. Before verifying a CVD (e.g. also for loading, unpacking when verification enabled). If "FIPS limits" or FIPS-mode are enabled, then the legacy MD5-based RSA method is disabled. Note: This commit also refactors the interface for `cl_cvdverify_ex()` and `cl_cvdunpack_ex()` so they take a `flags` parameters, rather than a single `bool`. As these functions are new in this version, it does not break the ABI. The cache was already switched to use SHA2-256, so that's not a concern for checking FIPS-mode / FIPS limits options. This adds an option for `freshclam.conf` and `clamd.conf`: FIPSCryptoHashLimits yes And an equivalent command-line option for `clamscan` and `sigtool`: --fips-limits You may programmatically enable FIPS-limits for a ClamAV engine like this: ```C cl_engine_set_num(engine, CL_ENGINE_FIPS_LIMITS, 1); ``` CLAM-2792
2025-07-01 20:41:47 -04:00
* @param alg The hashing algorithm to use.
* Suggested "alg" names include "md5", "sha1", "sha2-256", "sha2-384", and "sha2-512".
* But the underlying hashing library is OpenSSL and you might be able to use
* other algorithms supported by OpenSSL's EVP_get_digestbyname() function.
* Note: For the `cl_scan*` functions (above) the supported algorithms are
* presently limited to "md5", "sha1", "sha2-256".
* @param data The data to be hashed.
* @param data_len The length of the to-be-hashed data.
* @param[inout] hash A buffer to store the generated hash.
* Set flags to CL_HASH_FLAG_ALLOCATE to dynamically allocate buffer.
* @param[inout] hash_len If providing a buffer, set this to the size of the buffer.
* If allocating, this is purely an output parameter and need not be initialized.
* @param flags Flags to modify the behavior of the hashing function.
* Use CL_HASH_FLAG_ALLOCATE to dynamically allocate the output buffer.
* Use CL_HASH_FLAG_FIPS_BYPASS to bypass FIPS restrictions on which algorithms can be used.
*
* @return cl_error_t CL_SUCCESS if the hash was generated successfully.
* CL_E* error code if an error occurred.
*/
extern cl_error_t cl_hash_data_ex(
const char *alg,
const uint8_t *data,
size_t data_len,
uint8_t **hash,
size_t *hash_len,
uint32_t flags);
struct cl_hash_ctx;
typedef struct cl_hash_ctx cl_hash_ctx_t;
/**
* @brief Initialize a hash context.
*
* @param alg The hash algorithm to use.
* @param flags Flags to modify the behavior of the hashing function.
* Use CL_HASH_FLAG_FIPS_BYPASS to bypass FIPS restrictions on which algorithms can be used.
* @return cl_error_t CL_SUCCESS if the hash context was successfully initialized.
*/
extern cl_error_t cl_hash_init_ex(
const char *alg,
uint32_t flags,
cl_hash_ctx_t **ctx_out);
/**
* @brief Update a hash context with new data.
*
* @param ctx The hash context.
* @param data The data to hash.
* @param length The size of the data.
* @return cl_error_t CL_SUCCESS if the data was successfully added to the hash context.
* CL_E* error code if an error occurred.
*/
extern cl_error_t cl_update_hash_ex(
cl_hash_ctx_t *ctx,
const uint8_t *data,
size_t length);
/**
* @brief Finalize a hash context and get the resulting hash.
*
* @param ctx The hash context.
* @param[inout] hash A buffer to store the generated hash.
* Set flags to CL_HASH_FLAG_ALLOCATE to dynamically allocate buffer.
* @param[inout] hash_len If providing a buffer, set this to the size of the buffer.
* If allocating, this is purely an output parameter and need not be initialized.
* @param flags Flags to modify the behavior of the hashing function.
* Use CL_HASH_FLAG_ALLOCATE to dynamically allocate the output buffer.
*
* @return cl_error_t CL_SUCCESS if the hash was successfully finalized.
* CL_E* error code if an error occurred.
*/
extern cl_error_t cl_finish_hash_ex(
cl_hash_ctx_t *ctx,
uint8_t **hash,
size_t *hash_len,
uint32_t flags);
/**
* @brief Destroy a hash context.
*
* @param ctx The hash context.
*/
extern void cl_hash_destroy_ex(cl_hash_ctx_t *ctx);
/**
* @brief Generate a hash of a file.
*
* @param alg The hashing algorithm to use.
* @param fd The file descriptor.
* @param offset The offset in the file to start hashing from.
* @param length The length of the data to hash. If 0, the entire file will be hashed.
* @param[inout] hash A buffer to store the generated hash.
* Set flags to CL_HASH_FLAG_ALLOCATE to dynamically allocate buffer.
* @param[inout] hash_len A pointer that stores how long the generated hash is.
* @param flags Flags to modify the behavior of the hashing function.
* Use CL_HASH_FLAG_ALLOCATE to dynamically allocate the output buffer.
* Use CL_HASH_FLAG_FIPS_BYPASS to bypass FIPS restrictions on which algorithms can be used.
*
* @return cl_error_t CL_SUCCESS if the hash was generated successfully.
*/
extern cl_error_t cl_hash_file_fd_ex(
const char *alg,
int fd,
size_t offset,
size_t length,
uint8_t **hash,
size_t *hash_len,
uint32_t flags);
/**
* @brief Generate a hash of data.
*
* @deprecated This function is deprecated and will be removed in a future release.
* Use `cl_hash_data_ex()` instead.
*
* Note: This function intentionally bypasses FIPS restrictions on which algorithms can be used.
* Do not use this function for cryptographic purposes or for false positive hash checks.
*
* @param alg The hashing algorithm to use.
* @param buf The data to be hashed.
* @param len The length of the to-be-hashed data.
* @param[out] obuf (optional) A buffer to store the generated hash. Use NULL to dynamically allocate buffer.
FIPS & FIPS-like limits on hash algs for cryptographic uses ClamAV will not function when using a FIPS-enabled OpenSSL 3.x. This is because ClamAV uses MD5 and SHA1 algorithms for a variety of purposes including matching for malware detection, matching to prevent false positives on known-clean files, and for verification of MD5-based RSA digital signatures for determining CVD (signature database archive) authenticity. Interestingly, FIPS had been intentionally bypassed when creating hashes based whole buffers and whole files (by descriptor or `FILE`-pointer): https://github.com/Cisco-Talos/clamav/commit/78d4a9985a06a418dd1338c94ee5db461035d75b Note: this bypassed FIPS the 1.x way with: `EVP_MD_CTX_set_flags(ctx, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);` It was NOT disabled when using `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. That likely worked by coincidence in that the hash was already calculated most of the time. It certainly would have made use of those functions if the hash had not been calculated prior: https://github.com/Cisco-Talos/clamav/blob/78d4a9985a06a418dd1338c94ee5db461035d75b/libclamav/matcher.c#L743 Regardless, bypassing FIPS entirely is not the correct solution. The FIPS restrictions against using MD5 and SHA1 are valid, particularly when verifying CVD digital siganatures, but also I think when using a hash to determine if the file is known-clean (i.e. the "clean cache" and also MD5-based and SHA1-based FP signatures). This commit extends the work to bypass FIPS using the newer 3.x method: `md = EVP_MD_fetch(NULL, alg, "-fips");` It does this for the legacy `cl_hash*()` functions including `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. It also introduces extended versions that allow the caller to choose if they want to bypass FIPS: - `cl_hash_data_ex()` - `cl_hash_init_ex()` - `cl_update_hash_ex()` - `cl_finish_hash_ex()` - `cl_hash_destroy_ex()` - `cl_hash_file_fd_ex()` See the `flags` parameter for each. Ironically, this commit does NOT use the new functions at this time. The rational is that ClamAV may need MD5, SHA1, and SHA-256 hashes of the same files both for determining if the file is malware, and for determining if the file is clean. So instead, this commit will do a checks when: 1. Creating a new ClamAV scanning engine. If FIPS-mode enabled, it will automatically toggle the "FIPS limits" engine option. When loading signatures, if the engine "FIPS limits" option is enabled, then MD5 and SHA1 FP signatures will be skipped. 2. Before verifying a CVD (e.g. also for loading, unpacking when verification enabled). If "FIPS limits" or FIPS-mode are enabled, then the legacy MD5-based RSA method is disabled. Note: This commit also refactors the interface for `cl_cvdverify_ex()` and `cl_cvdunpack_ex()` so they take a `flags` parameters, rather than a single `bool`. As these functions are new in this version, it does not break the ABI. The cache was already switched to use SHA2-256, so that's not a concern for checking FIPS-mode / FIPS limits options. This adds an option for `freshclam.conf` and `clamd.conf`: FIPSCryptoHashLimits yes And an equivalent command-line option for `clamscan` and `sigtool`: --fips-limits You may programmatically enable FIPS-limits for a ClamAV engine like this: ```C cl_engine_set_num(engine, CL_ENGINE_FIPS_LIMITS, 1); ``` CLAM-2792
2025-07-01 20:41:47 -04:00
* If providing, use the *_HASH_SIZE macros above to determine the required buffer size.
* @param[out] olen (optional) A pointer that stores how long the generated hash is.
FIPS & FIPS-like limits on hash algs for cryptographic uses ClamAV will not function when using a FIPS-enabled OpenSSL 3.x. This is because ClamAV uses MD5 and SHA1 algorithms for a variety of purposes including matching for malware detection, matching to prevent false positives on known-clean files, and for verification of MD5-based RSA digital signatures for determining CVD (signature database archive) authenticity. Interestingly, FIPS had been intentionally bypassed when creating hashes based whole buffers and whole files (by descriptor or `FILE`-pointer): https://github.com/Cisco-Talos/clamav/commit/78d4a9985a06a418dd1338c94ee5db461035d75b Note: this bypassed FIPS the 1.x way with: `EVP_MD_CTX_set_flags(ctx, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);` It was NOT disabled when using `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. That likely worked by coincidence in that the hash was already calculated most of the time. It certainly would have made use of those functions if the hash had not been calculated prior: https://github.com/Cisco-Talos/clamav/blob/78d4a9985a06a418dd1338c94ee5db461035d75b/libclamav/matcher.c#L743 Regardless, bypassing FIPS entirely is not the correct solution. The FIPS restrictions against using MD5 and SHA1 are valid, particularly when verifying CVD digital siganatures, but also I think when using a hash to determine if the file is known-clean (i.e. the "clean cache" and also MD5-based and SHA1-based FP signatures). This commit extends the work to bypass FIPS using the newer 3.x method: `md = EVP_MD_fetch(NULL, alg, "-fips");` It does this for the legacy `cl_hash*()` functions including `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. It also introduces extended versions that allow the caller to choose if they want to bypass FIPS: - `cl_hash_data_ex()` - `cl_hash_init_ex()` - `cl_update_hash_ex()` - `cl_finish_hash_ex()` - `cl_hash_destroy_ex()` - `cl_hash_file_fd_ex()` See the `flags` parameter for each. Ironically, this commit does NOT use the new functions at this time. The rational is that ClamAV may need MD5, SHA1, and SHA-256 hashes of the same files both for determining if the file is malware, and for determining if the file is clean. So instead, this commit will do a checks when: 1. Creating a new ClamAV scanning engine. If FIPS-mode enabled, it will automatically toggle the "FIPS limits" engine option. When loading signatures, if the engine "FIPS limits" option is enabled, then MD5 and SHA1 FP signatures will be skipped. 2. Before verifying a CVD (e.g. also for loading, unpacking when verification enabled). If "FIPS limits" or FIPS-mode are enabled, then the legacy MD5-based RSA method is disabled. Note: This commit also refactors the interface for `cl_cvdverify_ex()` and `cl_cvdunpack_ex()` so they take a `flags` parameters, rather than a single `bool`. As these functions are new in this version, it does not break the ABI. The cache was already switched to use SHA2-256, so that's not a concern for checking FIPS-mode / FIPS limits options. This adds an option for `freshclam.conf` and `clamd.conf`: FIPSCryptoHashLimits yes And an equivalent command-line option for `clamscan` and `sigtool`: --fips-limits You may programmatically enable FIPS-limits for a ClamAV engine like this: ```C cl_engine_set_num(engine, CL_ENGINE_FIPS_LIMITS, 1); ``` CLAM-2792
2025-07-01 20:41:47 -04:00
* This is purely an output parameter and need not be initialized.
* @return A pointer to the generated hash or obuf if obuf is not NULL.
2014-07-01 19:38:01 -04:00
*/
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
extern unsigned char *cl_hash_data(const char *alg, const void *buf, size_t len, unsigned char *obuf, unsigned int *olen);
2014-07-01 19:38:01 -04:00
/**
FIPS & FIPS-like limits on hash algs for cryptographic uses ClamAV will not function when using a FIPS-enabled OpenSSL 3.x. This is because ClamAV uses MD5 and SHA1 algorithms for a variety of purposes including matching for malware detection, matching to prevent false positives on known-clean files, and for verification of MD5-based RSA digital signatures for determining CVD (signature database archive) authenticity. Interestingly, FIPS had been intentionally bypassed when creating hashes based whole buffers and whole files (by descriptor or `FILE`-pointer): https://github.com/Cisco-Talos/clamav/commit/78d4a9985a06a418dd1338c94ee5db461035d75b Note: this bypassed FIPS the 1.x way with: `EVP_MD_CTX_set_flags(ctx, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);` It was NOT disabled when using `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. That likely worked by coincidence in that the hash was already calculated most of the time. It certainly would have made use of those functions if the hash had not been calculated prior: https://github.com/Cisco-Talos/clamav/blob/78d4a9985a06a418dd1338c94ee5db461035d75b/libclamav/matcher.c#L743 Regardless, bypassing FIPS entirely is not the correct solution. The FIPS restrictions against using MD5 and SHA1 are valid, particularly when verifying CVD digital siganatures, but also I think when using a hash to determine if the file is known-clean (i.e. the "clean cache" and also MD5-based and SHA1-based FP signatures). This commit extends the work to bypass FIPS using the newer 3.x method: `md = EVP_MD_fetch(NULL, alg, "-fips");` It does this for the legacy `cl_hash*()` functions including `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. It also introduces extended versions that allow the caller to choose if they want to bypass FIPS: - `cl_hash_data_ex()` - `cl_hash_init_ex()` - `cl_update_hash_ex()` - `cl_finish_hash_ex()` - `cl_hash_destroy_ex()` - `cl_hash_file_fd_ex()` See the `flags` parameter for each. Ironically, this commit does NOT use the new functions at this time. The rational is that ClamAV may need MD5, SHA1, and SHA-256 hashes of the same files both for determining if the file is malware, and for determining if the file is clean. So instead, this commit will do a checks when: 1. Creating a new ClamAV scanning engine. If FIPS-mode enabled, it will automatically toggle the "FIPS limits" engine option. When loading signatures, if the engine "FIPS limits" option is enabled, then MD5 and SHA1 FP signatures will be skipped. 2. Before verifying a CVD (e.g. also for loading, unpacking when verification enabled). If "FIPS limits" or FIPS-mode are enabled, then the legacy MD5-based RSA method is disabled. Note: This commit also refactors the interface for `cl_cvdverify_ex()` and `cl_cvdunpack_ex()` so they take a `flags` parameters, rather than a single `bool`. As these functions are new in this version, it does not break the ABI. The cache was already switched to use SHA2-256, so that's not a concern for checking FIPS-mode / FIPS limits options. This adds an option for `freshclam.conf` and `clamd.conf`: FIPSCryptoHashLimits yes And an equivalent command-line option for `clamscan` and `sigtool`: --fips-limits You may programmatically enable FIPS-limits for a ClamAV engine like this: ```C cl_engine_set_num(engine, CL_ENGINE_FIPS_LIMITS, 1); ``` CLAM-2792
2025-07-01 20:41:47 -04:00
* @brief Generate a hash of a file given a file descriptor.
*
* @deprecated This function is deprecated and will be removed in a future release.
* Note: there is no `cl_hash_file_fd_ctx_ex()` function. Use `cl_hash_file_fd_ex()` instead.
*
* @param ctx A pointer to the OpenSSL EVP_MD_CTX object.
* @param fd The file descriptor.
* @param[out] olen (optional) The length of the generated hash.
* @return A pointer to a malloc'd buffer that holds the generated hash.
2014-07-01 19:38:01 -04:00
*/
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
extern unsigned char *cl_hash_file_fd_ctx(EVP_MD_CTX *ctx, int fd, unsigned int *olen);
2014-07-01 19:38:01 -04:00
/**
FIPS & FIPS-like limits on hash algs for cryptographic uses ClamAV will not function when using a FIPS-enabled OpenSSL 3.x. This is because ClamAV uses MD5 and SHA1 algorithms for a variety of purposes including matching for malware detection, matching to prevent false positives on known-clean files, and for verification of MD5-based RSA digital signatures for determining CVD (signature database archive) authenticity. Interestingly, FIPS had been intentionally bypassed when creating hashes based whole buffers and whole files (by descriptor or `FILE`-pointer): https://github.com/Cisco-Talos/clamav/commit/78d4a9985a06a418dd1338c94ee5db461035d75b Note: this bypassed FIPS the 1.x way with: `EVP_MD_CTX_set_flags(ctx, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);` It was NOT disabled when using `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. That likely worked by coincidence in that the hash was already calculated most of the time. It certainly would have made use of those functions if the hash had not been calculated prior: https://github.com/Cisco-Talos/clamav/blob/78d4a9985a06a418dd1338c94ee5db461035d75b/libclamav/matcher.c#L743 Regardless, bypassing FIPS entirely is not the correct solution. The FIPS restrictions against using MD5 and SHA1 are valid, particularly when verifying CVD digital siganatures, but also I think when using a hash to determine if the file is known-clean (i.e. the "clean cache" and also MD5-based and SHA1-based FP signatures). This commit extends the work to bypass FIPS using the newer 3.x method: `md = EVP_MD_fetch(NULL, alg, "-fips");` It does this for the legacy `cl_hash*()` functions including `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. It also introduces extended versions that allow the caller to choose if they want to bypass FIPS: - `cl_hash_data_ex()` - `cl_hash_init_ex()` - `cl_update_hash_ex()` - `cl_finish_hash_ex()` - `cl_hash_destroy_ex()` - `cl_hash_file_fd_ex()` See the `flags` parameter for each. Ironically, this commit does NOT use the new functions at this time. The rational is that ClamAV may need MD5, SHA1, and SHA-256 hashes of the same files both for determining if the file is malware, and for determining if the file is clean. So instead, this commit will do a checks when: 1. Creating a new ClamAV scanning engine. If FIPS-mode enabled, it will automatically toggle the "FIPS limits" engine option. When loading signatures, if the engine "FIPS limits" option is enabled, then MD5 and SHA1 FP signatures will be skipped. 2. Before verifying a CVD (e.g. also for loading, unpacking when verification enabled). If "FIPS limits" or FIPS-mode are enabled, then the legacy MD5-based RSA method is disabled. Note: This commit also refactors the interface for `cl_cvdverify_ex()` and `cl_cvdunpack_ex()` so they take a `flags` parameters, rather than a single `bool`. As these functions are new in this version, it does not break the ABI. The cache was already switched to use SHA2-256, so that's not a concern for checking FIPS-mode / FIPS limits options. This adds an option for `freshclam.conf` and `clamd.conf`: FIPSCryptoHashLimits yes And an equivalent command-line option for `clamscan` and `sigtool`: --fips-limits You may programmatically enable FIPS-limits for a ClamAV engine like this: ```C cl_engine_set_num(engine, CL_ENGINE_FIPS_LIMITS, 1); ``` CLAM-2792
2025-07-01 20:41:47 -04:00
* @brief Generate a hash of a file given a file descriptor.
*
* @deprecated This function is deprecated and will be removed in a future release.
* Use `cl_hash_file_fd_ex()` instead.
*
* Note: This function intentionally bypasses FIPS restrictions on which algorithms can be used.
* Do not use this function for cryptographic purposes or for false positive hash checks.
*
* @param fd The file descriptor.
* @param alg The hashing algorithm to use.
* @param[out] olen (optional) The length of the generated hash.
* @return A pointer to a malloc'd buffer that holds the generated hash.
2014-07-01 19:38:01 -04:00
*/
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
extern unsigned char *cl_hash_file_fd(int fd, const char *alg, unsigned int *olen);
2014-07-01 19:38:01 -04:00
/**
FIPS & FIPS-like limits on hash algs for cryptographic uses ClamAV will not function when using a FIPS-enabled OpenSSL 3.x. This is because ClamAV uses MD5 and SHA1 algorithms for a variety of purposes including matching for malware detection, matching to prevent false positives on known-clean files, and for verification of MD5-based RSA digital signatures for determining CVD (signature database archive) authenticity. Interestingly, FIPS had been intentionally bypassed when creating hashes based whole buffers and whole files (by descriptor or `FILE`-pointer): https://github.com/Cisco-Talos/clamav/commit/78d4a9985a06a418dd1338c94ee5db461035d75b Note: this bypassed FIPS the 1.x way with: `EVP_MD_CTX_set_flags(ctx, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);` It was NOT disabled when using `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. That likely worked by coincidence in that the hash was already calculated most of the time. It certainly would have made use of those functions if the hash had not been calculated prior: https://github.com/Cisco-Talos/clamav/blob/78d4a9985a06a418dd1338c94ee5db461035d75b/libclamav/matcher.c#L743 Regardless, bypassing FIPS entirely is not the correct solution. The FIPS restrictions against using MD5 and SHA1 are valid, particularly when verifying CVD digital siganatures, but also I think when using a hash to determine if the file is known-clean (i.e. the "clean cache" and also MD5-based and SHA1-based FP signatures). This commit extends the work to bypass FIPS using the newer 3.x method: `md = EVP_MD_fetch(NULL, alg, "-fips");` It does this for the legacy `cl_hash*()` functions including `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. It also introduces extended versions that allow the caller to choose if they want to bypass FIPS: - `cl_hash_data_ex()` - `cl_hash_init_ex()` - `cl_update_hash_ex()` - `cl_finish_hash_ex()` - `cl_hash_destroy_ex()` - `cl_hash_file_fd_ex()` See the `flags` parameter for each. Ironically, this commit does NOT use the new functions at this time. The rational is that ClamAV may need MD5, SHA1, and SHA-256 hashes of the same files both for determining if the file is malware, and for determining if the file is clean. So instead, this commit will do a checks when: 1. Creating a new ClamAV scanning engine. If FIPS-mode enabled, it will automatically toggle the "FIPS limits" engine option. When loading signatures, if the engine "FIPS limits" option is enabled, then MD5 and SHA1 FP signatures will be skipped. 2. Before verifying a CVD (e.g. also for loading, unpacking when verification enabled). If "FIPS limits" or FIPS-mode are enabled, then the legacy MD5-based RSA method is disabled. Note: This commit also refactors the interface for `cl_cvdverify_ex()` and `cl_cvdunpack_ex()` so they take a `flags` parameters, rather than a single `bool`. As these functions are new in this version, it does not break the ABI. The cache was already switched to use SHA2-256, so that's not a concern for checking FIPS-mode / FIPS limits options. This adds an option for `freshclam.conf` and `clamd.conf`: FIPSCryptoHashLimits yes And an equivalent command-line option for `clamscan` and `sigtool`: --fips-limits You may programmatically enable FIPS-limits for a ClamAV engine like this: ```C cl_engine_set_num(engine, CL_ENGINE_FIPS_LIMITS, 1); ``` CLAM-2792
2025-07-01 20:41:47 -04:00
* @brief Generate a hash of a file given a FILE pointer.
*
* @deprecated This function is deprecated and will be removed in a future release.
* Note: There is no `cl_hash_file_fp_ex()` function. Use `cl_hash_file_fd_ex()` instead.
*
* Note: This function intentionally bypasses FIPS restrictions on which algorithms can be used.
* Do not use this function for cryptographic purposes or for false positive hash checks.
*
* @param fp A pointer to a FILE object.
* @param alg The hashing algorithm to use.
* @param[out] olen (optional) The length of the generated hash.
* @return A pointer to a malloc'd buffer that holds the generated hash.
2014-07-01 19:38:01 -04:00
*/
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
extern unsigned char *cl_hash_file_fp(FILE *fp, const char *alg, unsigned int *olen);
2014-07-01 19:38:01 -04:00
/**
Swap clean cache from MD5 to SHA2-256 Change the clean-cache to use SHA2-256 instead of MD5. Note that all references are changed to specify "SHA2-256" now instead of "SHA256", for clarity. But there is no plan to add support for SHA3 algorithms at this time. Significant code cleanup. E.g.: - Implemented goto-done error handling. - Used `uint8_t *` instead of `unsigned char *`. - Use `bool` for boolean checks, rather than `int. - Used `#defines` instead of magic numbers. - Removed duplicate `#defines` for things like hash length. Add new option to calculate and record additional hash types when the "generate metadata JSON" feature is enabled: - libclamav option: `CL_SCAN_GENERAL_STORE_EXTRA_HASHES` - clamscan option: `--json-store-extra-hashes` (default off) - clamd.conf option: `JsonStoreExtraHashes` (default 'no') Renamed the sigtool option `--sha256` to `--sha2-256`. The original option is still functional, but is deprecated. For the "generate metadata JSON" feature, the file hash is now stored as "sha2-256" instead of "FileMD5". If you enable the "extra hashes" option, then it will also record "md5" and "sha1". Deprecate and disable the internal "SHA collect" feature. This option had been hidden behind C #ifdef checks for an option that wasn't exposed through CMake, so it was basically unavailable anyways. Changes to calculate file hashes when they're needed and no sooner. For the FP feature in the matcher module, I have mimiced the optimization in the FMAP scan routine which makes it so that it can calculate multiple hashes in a single pass of the file. The `HandlerType` feature stores a hash of the file in the scan ctx to prevent retyping the exact same data more than once. I removed that hash field and replaced it with an attribute flag that is applied to the new recursion stack layer when retyping a file. This also closes a minor bug that would prevent retyping a file with an all-zero hash. :) The work upgrading cache.c to support SHA2-256 sized hashes thanks to: https://github.com/m-sola CLAM-255 CLAM-1858 CLAM-1859 CLAM-1860
2025-06-03 19:03:20 -04:00
* @brief Generate a sha2-256 hash of data.
*
FIPS & FIPS-like limits on hash algs for cryptographic uses ClamAV will not function when using a FIPS-enabled OpenSSL 3.x. This is because ClamAV uses MD5 and SHA1 algorithms for a variety of purposes including matching for malware detection, matching to prevent false positives on known-clean files, and for verification of MD5-based RSA digital signatures for determining CVD (signature database archive) authenticity. Interestingly, FIPS had been intentionally bypassed when creating hashes based whole buffers and whole files (by descriptor or `FILE`-pointer): https://github.com/Cisco-Talos/clamav/commit/78d4a9985a06a418dd1338c94ee5db461035d75b Note: this bypassed FIPS the 1.x way with: `EVP_MD_CTX_set_flags(ctx, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);` It was NOT disabled when using `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. That likely worked by coincidence in that the hash was already calculated most of the time. It certainly would have made use of those functions if the hash had not been calculated prior: https://github.com/Cisco-Talos/clamav/blob/78d4a9985a06a418dd1338c94ee5db461035d75b/libclamav/matcher.c#L743 Regardless, bypassing FIPS entirely is not the correct solution. The FIPS restrictions against using MD5 and SHA1 are valid, particularly when verifying CVD digital siganatures, but also I think when using a hash to determine if the file is known-clean (i.e. the "clean cache" and also MD5-based and SHA1-based FP signatures). This commit extends the work to bypass FIPS using the newer 3.x method: `md = EVP_MD_fetch(NULL, alg, "-fips");` It does this for the legacy `cl_hash*()` functions including `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. It also introduces extended versions that allow the caller to choose if they want to bypass FIPS: - `cl_hash_data_ex()` - `cl_hash_init_ex()` - `cl_update_hash_ex()` - `cl_finish_hash_ex()` - `cl_hash_destroy_ex()` - `cl_hash_file_fd_ex()` See the `flags` parameter for each. Ironically, this commit does NOT use the new functions at this time. The rational is that ClamAV may need MD5, SHA1, and SHA-256 hashes of the same files both for determining if the file is malware, and for determining if the file is clean. So instead, this commit will do a checks when: 1. Creating a new ClamAV scanning engine. If FIPS-mode enabled, it will automatically toggle the "FIPS limits" engine option. When loading signatures, if the engine "FIPS limits" option is enabled, then MD5 and SHA1 FP signatures will be skipped. 2. Before verifying a CVD (e.g. also for loading, unpacking when verification enabled). If "FIPS limits" or FIPS-mode are enabled, then the legacy MD5-based RSA method is disabled. Note: This commit also refactors the interface for `cl_cvdverify_ex()` and `cl_cvdunpack_ex()` so they take a `flags` parameters, rather than a single `bool`. As these functions are new in this version, it does not break the ABI. The cache was already switched to use SHA2-256, so that's not a concern for checking FIPS-mode / FIPS limits options. This adds an option for `freshclam.conf` and `clamd.conf`: FIPSCryptoHashLimits yes And an equivalent command-line option for `clamscan` and `sigtool`: --fips-limits You may programmatically enable FIPS-limits for a ClamAV engine like this: ```C cl_engine_set_num(engine, CL_ENGINE_FIPS_LIMITS, 1); ``` CLAM-2792
2025-07-01 20:41:47 -04:00
* @deprecated This function is deprecated and will be removed in a future release.
* Use `cl_hash_data_ex()` instead.
*
* @param buf The data to hash.
* @param len The length of the to-be-hashed data.
* @param[out] obuf (optional) A pointer to store the generated hash. Use NULL to dynamically allocate buffer.
* @param[out] olen (optional) The length of the generated hash.
* @return A pointer to a malloc'd buffer that holds the generated hash.
2014-07-01 19:38:01 -04:00
*/
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
extern unsigned char *cl_sha256(const void *buf, size_t len, unsigned char *obuf, unsigned int *olen);
2014-07-01 19:38:01 -04:00
/**
Swap clean cache from MD5 to SHA2-256 Change the clean-cache to use SHA2-256 instead of MD5. Note that all references are changed to specify "SHA2-256" now instead of "SHA256", for clarity. But there is no plan to add support for SHA3 algorithms at this time. Significant code cleanup. E.g.: - Implemented goto-done error handling. - Used `uint8_t *` instead of `unsigned char *`. - Use `bool` for boolean checks, rather than `int. - Used `#defines` instead of magic numbers. - Removed duplicate `#defines` for things like hash length. Add new option to calculate and record additional hash types when the "generate metadata JSON" feature is enabled: - libclamav option: `CL_SCAN_GENERAL_STORE_EXTRA_HASHES` - clamscan option: `--json-store-extra-hashes` (default off) - clamd.conf option: `JsonStoreExtraHashes` (default 'no') Renamed the sigtool option `--sha256` to `--sha2-256`. The original option is still functional, but is deprecated. For the "generate metadata JSON" feature, the file hash is now stored as "sha2-256" instead of "FileMD5". If you enable the "extra hashes" option, then it will also record "md5" and "sha1". Deprecate and disable the internal "SHA collect" feature. This option had been hidden behind C #ifdef checks for an option that wasn't exposed through CMake, so it was basically unavailable anyways. Changes to calculate file hashes when they're needed and no sooner. For the FP feature in the matcher module, I have mimiced the optimization in the FMAP scan routine which makes it so that it can calculate multiple hashes in a single pass of the file. The `HandlerType` feature stores a hash of the file in the scan ctx to prevent retyping the exact same data more than once. I removed that hash field and replaced it with an attribute flag that is applied to the new recursion stack layer when retyping a file. This also closes a minor bug that would prevent retyping a file with an all-zero hash. :) The work upgrading cache.c to support SHA2-256 sized hashes thanks to: https://github.com/m-sola CLAM-255 CLAM-1858 CLAM-1859 CLAM-1860
2025-06-03 19:03:20 -04:00
* @brief Generate a sha2-384 hash of data.
*
FIPS & FIPS-like limits on hash algs for cryptographic uses ClamAV will not function when using a FIPS-enabled OpenSSL 3.x. This is because ClamAV uses MD5 and SHA1 algorithms for a variety of purposes including matching for malware detection, matching to prevent false positives on known-clean files, and for verification of MD5-based RSA digital signatures for determining CVD (signature database archive) authenticity. Interestingly, FIPS had been intentionally bypassed when creating hashes based whole buffers and whole files (by descriptor or `FILE`-pointer): https://github.com/Cisco-Talos/clamav/commit/78d4a9985a06a418dd1338c94ee5db461035d75b Note: this bypassed FIPS the 1.x way with: `EVP_MD_CTX_set_flags(ctx, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);` It was NOT disabled when using `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. That likely worked by coincidence in that the hash was already calculated most of the time. It certainly would have made use of those functions if the hash had not been calculated prior: https://github.com/Cisco-Talos/clamav/blob/78d4a9985a06a418dd1338c94ee5db461035d75b/libclamav/matcher.c#L743 Regardless, bypassing FIPS entirely is not the correct solution. The FIPS restrictions against using MD5 and SHA1 are valid, particularly when verifying CVD digital siganatures, but also I think when using a hash to determine if the file is known-clean (i.e. the "clean cache" and also MD5-based and SHA1-based FP signatures). This commit extends the work to bypass FIPS using the newer 3.x method: `md = EVP_MD_fetch(NULL, alg, "-fips");` It does this for the legacy `cl_hash*()` functions including `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. It also introduces extended versions that allow the caller to choose if they want to bypass FIPS: - `cl_hash_data_ex()` - `cl_hash_init_ex()` - `cl_update_hash_ex()` - `cl_finish_hash_ex()` - `cl_hash_destroy_ex()` - `cl_hash_file_fd_ex()` See the `flags` parameter for each. Ironically, this commit does NOT use the new functions at this time. The rational is that ClamAV may need MD5, SHA1, and SHA-256 hashes of the same files both for determining if the file is malware, and for determining if the file is clean. So instead, this commit will do a checks when: 1. Creating a new ClamAV scanning engine. If FIPS-mode enabled, it will automatically toggle the "FIPS limits" engine option. When loading signatures, if the engine "FIPS limits" option is enabled, then MD5 and SHA1 FP signatures will be skipped. 2. Before verifying a CVD (e.g. also for loading, unpacking when verification enabled). If "FIPS limits" or FIPS-mode are enabled, then the legacy MD5-based RSA method is disabled. Note: This commit also refactors the interface for `cl_cvdverify_ex()` and `cl_cvdunpack_ex()` so they take a `flags` parameters, rather than a single `bool`. As these functions are new in this version, it does not break the ABI. The cache was already switched to use SHA2-256, so that's not a concern for checking FIPS-mode / FIPS limits options. This adds an option for `freshclam.conf` and `clamd.conf`: FIPSCryptoHashLimits yes And an equivalent command-line option for `clamscan` and `sigtool`: --fips-limits You may programmatically enable FIPS-limits for a ClamAV engine like this: ```C cl_engine_set_num(engine, CL_ENGINE_FIPS_LIMITS, 1); ``` CLAM-2792
2025-07-01 20:41:47 -04:00
* @deprecated This function is deprecated and will be removed in a future release.
* Use `cl_hash_data_ex()` instead.
*
* @param buf The data to hash.
* @param len The length of the to-be-hashed data.
* @param[out] obuf (optional) A pointer to store the generated hash. Use NULL to dynamically allocate buffer.
* @param[out] olen (optional) The length of the generated hash.
* @return A pointer to a malloc'd buffer that holds the generated hash.
*/
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
extern unsigned char *cl_sha384(const void *buf, size_t len, unsigned char *obuf, unsigned int *olen);
/**
Swap clean cache from MD5 to SHA2-256 Change the clean-cache to use SHA2-256 instead of MD5. Note that all references are changed to specify "SHA2-256" now instead of "SHA256", for clarity. But there is no plan to add support for SHA3 algorithms at this time. Significant code cleanup. E.g.: - Implemented goto-done error handling. - Used `uint8_t *` instead of `unsigned char *`. - Use `bool` for boolean checks, rather than `int. - Used `#defines` instead of magic numbers. - Removed duplicate `#defines` for things like hash length. Add new option to calculate and record additional hash types when the "generate metadata JSON" feature is enabled: - libclamav option: `CL_SCAN_GENERAL_STORE_EXTRA_HASHES` - clamscan option: `--json-store-extra-hashes` (default off) - clamd.conf option: `JsonStoreExtraHashes` (default 'no') Renamed the sigtool option `--sha256` to `--sha2-256`. The original option is still functional, but is deprecated. For the "generate metadata JSON" feature, the file hash is now stored as "sha2-256" instead of "FileMD5". If you enable the "extra hashes" option, then it will also record "md5" and "sha1". Deprecate and disable the internal "SHA collect" feature. This option had been hidden behind C #ifdef checks for an option that wasn't exposed through CMake, so it was basically unavailable anyways. Changes to calculate file hashes when they're needed and no sooner. For the FP feature in the matcher module, I have mimiced the optimization in the FMAP scan routine which makes it so that it can calculate multiple hashes in a single pass of the file. The `HandlerType` feature stores a hash of the file in the scan ctx to prevent retyping the exact same data more than once. I removed that hash field and replaced it with an attribute flag that is applied to the new recursion stack layer when retyping a file. This also closes a minor bug that would prevent retyping a file with an all-zero hash. :) The work upgrading cache.c to support SHA2-256 sized hashes thanks to: https://github.com/m-sola CLAM-255 CLAM-1858 CLAM-1859 CLAM-1860
2025-06-03 19:03:20 -04:00
* @brief Generate a sha2-512 hash of data.
*
FIPS & FIPS-like limits on hash algs for cryptographic uses ClamAV will not function when using a FIPS-enabled OpenSSL 3.x. This is because ClamAV uses MD5 and SHA1 algorithms for a variety of purposes including matching for malware detection, matching to prevent false positives on known-clean files, and for verification of MD5-based RSA digital signatures for determining CVD (signature database archive) authenticity. Interestingly, FIPS had been intentionally bypassed when creating hashes based whole buffers and whole files (by descriptor or `FILE`-pointer): https://github.com/Cisco-Talos/clamav/commit/78d4a9985a06a418dd1338c94ee5db461035d75b Note: this bypassed FIPS the 1.x way with: `EVP_MD_CTX_set_flags(ctx, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);` It was NOT disabled when using `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. That likely worked by coincidence in that the hash was already calculated most of the time. It certainly would have made use of those functions if the hash had not been calculated prior: https://github.com/Cisco-Talos/clamav/blob/78d4a9985a06a418dd1338c94ee5db461035d75b/libclamav/matcher.c#L743 Regardless, bypassing FIPS entirely is not the correct solution. The FIPS restrictions against using MD5 and SHA1 are valid, particularly when verifying CVD digital siganatures, but also I think when using a hash to determine if the file is known-clean (i.e. the "clean cache" and also MD5-based and SHA1-based FP signatures). This commit extends the work to bypass FIPS using the newer 3.x method: `md = EVP_MD_fetch(NULL, alg, "-fips");` It does this for the legacy `cl_hash*()` functions including `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. It also introduces extended versions that allow the caller to choose if they want to bypass FIPS: - `cl_hash_data_ex()` - `cl_hash_init_ex()` - `cl_update_hash_ex()` - `cl_finish_hash_ex()` - `cl_hash_destroy_ex()` - `cl_hash_file_fd_ex()` See the `flags` parameter for each. Ironically, this commit does NOT use the new functions at this time. The rational is that ClamAV may need MD5, SHA1, and SHA-256 hashes of the same files both for determining if the file is malware, and for determining if the file is clean. So instead, this commit will do a checks when: 1. Creating a new ClamAV scanning engine. If FIPS-mode enabled, it will automatically toggle the "FIPS limits" engine option. When loading signatures, if the engine "FIPS limits" option is enabled, then MD5 and SHA1 FP signatures will be skipped. 2. Before verifying a CVD (e.g. also for loading, unpacking when verification enabled). If "FIPS limits" or FIPS-mode are enabled, then the legacy MD5-based RSA method is disabled. Note: This commit also refactors the interface for `cl_cvdverify_ex()` and `cl_cvdunpack_ex()` so they take a `flags` parameters, rather than a single `bool`. As these functions are new in this version, it does not break the ABI. The cache was already switched to use SHA2-256, so that's not a concern for checking FIPS-mode / FIPS limits options. This adds an option for `freshclam.conf` and `clamd.conf`: FIPSCryptoHashLimits yes And an equivalent command-line option for `clamscan` and `sigtool`: --fips-limits You may programmatically enable FIPS-limits for a ClamAV engine like this: ```C cl_engine_set_num(engine, CL_ENGINE_FIPS_LIMITS, 1); ``` CLAM-2792
2025-07-01 20:41:47 -04:00
* @deprecated This function is deprecated and will be removed in a future release.
* Use `cl_hash_data_ex()` instead.
*
* @param buf The data to hash.
* @param len The length of the to-be-hashed data.
* @param[out] obuf (optional) A pointer to store the generated hash. Use NULL to dynamically allocate buffer.
* @param[out] olen (optional) The length of the generated hash.
* @return A pointer to a malloc'd buffer that holds the generated hash.
*/
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
extern unsigned char *cl_sha512(const void *buf, size_t len, unsigned char *obuf, unsigned int *olen);
/**
* @brief Generate a sha1 hash of data.
*
FIPS & FIPS-like limits on hash algs for cryptographic uses ClamAV will not function when using a FIPS-enabled OpenSSL 3.x. This is because ClamAV uses MD5 and SHA1 algorithms for a variety of purposes including matching for malware detection, matching to prevent false positives on known-clean files, and for verification of MD5-based RSA digital signatures for determining CVD (signature database archive) authenticity. Interestingly, FIPS had been intentionally bypassed when creating hashes based whole buffers and whole files (by descriptor or `FILE`-pointer): https://github.com/Cisco-Talos/clamav/commit/78d4a9985a06a418dd1338c94ee5db461035d75b Note: this bypassed FIPS the 1.x way with: `EVP_MD_CTX_set_flags(ctx, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);` It was NOT disabled when using `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. That likely worked by coincidence in that the hash was already calculated most of the time. It certainly would have made use of those functions if the hash had not been calculated prior: https://github.com/Cisco-Talos/clamav/blob/78d4a9985a06a418dd1338c94ee5db461035d75b/libclamav/matcher.c#L743 Regardless, bypassing FIPS entirely is not the correct solution. The FIPS restrictions against using MD5 and SHA1 are valid, particularly when verifying CVD digital siganatures, but also I think when using a hash to determine if the file is known-clean (i.e. the "clean cache" and also MD5-based and SHA1-based FP signatures). This commit extends the work to bypass FIPS using the newer 3.x method: `md = EVP_MD_fetch(NULL, alg, "-fips");` It does this for the legacy `cl_hash*()` functions including `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. It also introduces extended versions that allow the caller to choose if they want to bypass FIPS: - `cl_hash_data_ex()` - `cl_hash_init_ex()` - `cl_update_hash_ex()` - `cl_finish_hash_ex()` - `cl_hash_destroy_ex()` - `cl_hash_file_fd_ex()` See the `flags` parameter for each. Ironically, this commit does NOT use the new functions at this time. The rational is that ClamAV may need MD5, SHA1, and SHA-256 hashes of the same files both for determining if the file is malware, and for determining if the file is clean. So instead, this commit will do a checks when: 1. Creating a new ClamAV scanning engine. If FIPS-mode enabled, it will automatically toggle the "FIPS limits" engine option. When loading signatures, if the engine "FIPS limits" option is enabled, then MD5 and SHA1 FP signatures will be skipped. 2. Before verifying a CVD (e.g. also for loading, unpacking when verification enabled). If "FIPS limits" or FIPS-mode are enabled, then the legacy MD5-based RSA method is disabled. Note: This commit also refactors the interface for `cl_cvdverify_ex()` and `cl_cvdunpack_ex()` so they take a `flags` parameters, rather than a single `bool`. As these functions are new in this version, it does not break the ABI. The cache was already switched to use SHA2-256, so that's not a concern for checking FIPS-mode / FIPS limits options. This adds an option for `freshclam.conf` and `clamd.conf`: FIPSCryptoHashLimits yes And an equivalent command-line option for `clamscan` and `sigtool`: --fips-limits You may programmatically enable FIPS-limits for a ClamAV engine like this: ```C cl_engine_set_num(engine, CL_ENGINE_FIPS_LIMITS, 1); ``` CLAM-2792
2025-07-01 20:41:47 -04:00
* @deprecated This function is deprecated and will be removed in a future release.
* Use `cl_hash_data_ex()` instead.
*
* Note: This function intentionally bypasses FIPS restrictions on which algorithms can be used.
* Do not use this function for cryptographic purposes or for false positive hash checks.
*
* @param buf The data to hash.
* @param len The length of the to-be-hashed data.
* @param[out] obuf (optional) A pointer to store the generated hash. Use NULL to dynamically allocate buffer.
* @param[out] olen (optional) The length of the generated hash.
* @return A pointer to a malloc'd buffer that holds the generated hash.
2014-07-01 19:38:01 -04:00
*/
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
extern unsigned char *cl_sha1(const void *buf, size_t len, unsigned char *obuf, unsigned int *olen);
2014-07-01 19:38:01 -04:00
FIPS & FIPS-like limits on hash algs for cryptographic uses ClamAV will not function when using a FIPS-enabled OpenSSL 3.x. This is because ClamAV uses MD5 and SHA1 algorithms for a variety of purposes including matching for malware detection, matching to prevent false positives on known-clean files, and for verification of MD5-based RSA digital signatures for determining CVD (signature database archive) authenticity. Interestingly, FIPS had been intentionally bypassed when creating hashes based whole buffers and whole files (by descriptor or `FILE`-pointer): https://github.com/Cisco-Talos/clamav/commit/78d4a9985a06a418dd1338c94ee5db461035d75b Note: this bypassed FIPS the 1.x way with: `EVP_MD_CTX_set_flags(ctx, EVP_MD_CTX_FLAG_NON_FIPS_ALLOW);` It was NOT disabled when using `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. That likely worked by coincidence in that the hash was already calculated most of the time. It certainly would have made use of those functions if the hash had not been calculated prior: https://github.com/Cisco-Talos/clamav/blob/78d4a9985a06a418dd1338c94ee5db461035d75b/libclamav/matcher.c#L743 Regardless, bypassing FIPS entirely is not the correct solution. The FIPS restrictions against using MD5 and SHA1 are valid, particularly when verifying CVD digital siganatures, but also I think when using a hash to determine if the file is known-clean (i.e. the "clean cache" and also MD5-based and SHA1-based FP signatures). This commit extends the work to bypass FIPS using the newer 3.x method: `md = EVP_MD_fetch(NULL, alg, "-fips");` It does this for the legacy `cl_hash*()` functions including `cl_hash_init()` / `cl_update_hash()` / `cl_finish_hash()`. It also introduces extended versions that allow the caller to choose if they want to bypass FIPS: - `cl_hash_data_ex()` - `cl_hash_init_ex()` - `cl_update_hash_ex()` - `cl_finish_hash_ex()` - `cl_hash_destroy_ex()` - `cl_hash_file_fd_ex()` See the `flags` parameter for each. Ironically, this commit does NOT use the new functions at this time. The rational is that ClamAV may need MD5, SHA1, and SHA-256 hashes of the same files both for determining if the file is malware, and for determining if the file is clean. So instead, this commit will do a checks when: 1. Creating a new ClamAV scanning engine. If FIPS-mode enabled, it will automatically toggle the "FIPS limits" engine option. When loading signatures, if the engine "FIPS limits" option is enabled, then MD5 and SHA1 FP signatures will be skipped. 2. Before verifying a CVD (e.g. also for loading, unpacking when verification enabled). If "FIPS limits" or FIPS-mode are enabled, then the legacy MD5-based RSA method is disabled. Note: This commit also refactors the interface for `cl_cvdverify_ex()` and `cl_cvdunpack_ex()` so they take a `flags` parameters, rather than a single `bool`. As these functions are new in this version, it does not break the ABI. The cache was already switched to use SHA2-256, so that's not a concern for checking FIPS-mode / FIPS limits options. This adds an option for `freshclam.conf` and `clamd.conf`: FIPSCryptoHashLimits yes And an equivalent command-line option for `clamscan` and `sigtool`: --fips-limits You may programmatically enable FIPS-limits for a ClamAV engine like this: ```C cl_engine_set_num(engine, CL_ENGINE_FIPS_LIMITS, 1); ``` CLAM-2792
2025-07-01 20:41:47 -04:00
/**
* @brief Initialize a hash context.
*
* @deprecated This function is deprecated and will be removed in a future release.
* Use `cl_hash_init_ex()` instead.
*
* Note: This function intentionally bypasses FIPS restrictions on which algorithms can be used.
* Do not use this function for cryptographic purposes or for false positive hash checks.
*
* @param alg The hash algorithm to use.
* @return void*
*/
extern void *cl_hash_init(const char *alg);
/**
* @brief Update a hash context with new data.
*
* @deprecated This function is deprecated and will be removed in a future release.
* Use `cl_update_hash_ex()` instead.
*
* @param ctx The hash context.
* @param data The data to hash.
* @param sz The size of the data.
* @return int 0 on success, -1 on error.
*/
extern int cl_update_hash(void *ctx, const void *data, size_t sz);
/**
* @brief Finalize a hash context and get the resulting hash.
*
* @deprecated This function is deprecated and will be removed in a future release.
* Use `cl_finish_hash_ex()` instead.
*
* @param ctx The hash context.
* @param buf A buffer to store the resulting hash. Must be large enough to hold the hash.
* @return int 0 on success, -1 on error.
*/
extern int cl_finish_hash(void *ctx, void *buf);
/**
* @brief Destroy a hash context.
*
* @deprecated This function is deprecated and will be removed in a future release.
* Use `cl_hash_destroy_ex()` instead.
*
* @param ctx The hash context.
*/
extern void cl_hash_destroy(void *ctx);
/* -------------------------------------------------------------------------------
* Signing and verification functions.
*/
/**
* @brief Verify validity of signed data.
*
* @param pkey The public key of the keypair that signed the data.
* @param alg The algorithm used to hash the data.
* @param sig The signature block.
* @param siglen The length of the signature.
* @param data The data that was signed.
* @param datalen The length of the data.
* @param decode Whether or not to base64-decode the signature prior to verification. 1 for yes, 0 for no.
* @return 0 for success, -1 for error or invalid signature.
2014-07-01 19:38:01 -04:00
*/
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
extern int cl_verify_signature(
EVP_PKEY *pkey,
const char *alg,
unsigned char *sig,
unsigned int siglen,
unsigned char *data,
size_t datalen,
int decode);
2014-07-01 19:38:01 -04:00
/**
* @brief Verify validity of signed data.
*
* @param pkey The public key of the keypair that signed the data.
* @param alg The algorithm used to hash the data.
* @param sig The signature block.
* @param siglen The length of the signature.
* @param digest The hash of the signed data.
* @return 0 for success, -1 for error or invalid signature.
2014-07-01 19:38:01 -04:00
*/
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
extern int cl_verify_signature_hash(
EVP_PKEY *pkey,
const char *alg,
unsigned char *sig,
unsigned int siglen,
unsigned char *digest);
2014-07-01 19:38:01 -04:00
/**
* @brief Verify validity of signed data.
*
* @param pkey The public key of the keypair that signed the data.
* @param alg The algorithm used to hash the data.
* @param sig The signature block.
* @param siglen The length of the signature.
* @param fd The file descriptor.
* @return 0 for success, -1 for error or invalid signature.
2014-07-01 19:38:01 -04:00
*/
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
extern int cl_verify_signature_fd(
EVP_PKEY *pkey,
const char *alg,
unsigned char *sig,
unsigned int siglen,
int fd);
2014-07-01 19:38:01 -04:00
/**
* @brief Verify validity of signed data.
*
* @param x509path The path to the public key of the keypair that signed the data.
* @param alg The algorithm used to hash the data.
* @param sig The signature block.
* @param siglen The length of the signature.
* @param digest The hash of the signed data.
* @return 0 for success, -1 for error or invalid signature.
2014-07-01 19:38:01 -04:00
*/
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
extern int cl_verify_signature_hash_x509_keyfile(
char *x509path,
const char *alg,
unsigned char *sig,
unsigned int siglen,
unsigned char *digest);
2014-07-01 19:38:01 -04:00
/**
* @brief Verify validity of signed data.
*
* @param x509path The path to the public key of the keypair that signed the data.
* @param alg The algorithm used to hash the data.
* @param sig The signature block.
* @param siglen The length of the signature.
* @param fd The file descriptor.
* @return 0 for success, -1 for error or invalid signature.
2014-07-01 19:38:01 -04:00
*/
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
extern int cl_verify_signature_fd_x509_keyfile(
char *x509path,
const char *alg,
unsigned char *sig,
unsigned int siglen,
int fd);
2014-07-01 19:38:01 -04:00
/**
* @brief Verify validity of signed data.
*
* @param x509path The path to the public key of the keypair that signed the data.
* @param alg The algorithm used to hash the data.
* @param sig The signature block.
* @param siglen The length of the signature.
* @param data The data that was signed.
* @param datalen The length of the data.
* @param decode Whether or not to base64-decode the signature prior to verification. 1 for yes, 0 for no.
* @return 0 for success, -1 for error or invalid signature.
2014-07-01 19:38:01 -04:00
*/
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
extern int cl_verify_signature_x509_keyfile(
char *x509path,
const char *alg,
unsigned char *sig,
unsigned int siglen,
unsigned char *data,
size_t datalen,
int decode);
2014-07-01 19:38:01 -04:00
/**
* @brief Verify validity of signed data
*
* @param x509 The X509 object of the public key of the keypair that signed the data.
* @param alg The algorithm used to hash the data.
* @param sig The signature block.
* @param siglen The length of the signature.
* @param digest The hash of the signed data.
* @return 0 for success, -1 for error or invalid signature.
2014-07-01 19:38:01 -04:00
*/
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
extern int cl_verify_signature_hash_x509(
X509 *x509,
const char *alg,
unsigned char *sig,
unsigned int siglen,
unsigned char *digest);
2014-07-01 19:38:01 -04:00
/**
* @brief Verify validity of signed data.
*
* @param x509 The X509 object of the public key of the keypair that signed the data.
* @param alg The algorithm used to hash the data.
* @param sig The signature block.
* @param siglen The length of the signature.
* @param fd The file descriptor.
* @return 0 for success, -1 for error or invalid signature.
2014-07-01 19:38:01 -04:00
*/
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
extern int cl_verify_signature_fd_x509(X509 *x509, const char *alg, unsigned char *sig, unsigned int siglen, int fd);
2014-07-01 19:38:01 -04:00
/**
* @brief Verify validity of signed data.
*
* @param x509 The X509 object of the public key of the keypair that signed the data.
* @param alg The algorithm used to hash the data.
* @param sig The signature block.
* @param siglen The length of the signature.
* @param data The data that was signed.
* @param datalen The length of the data.
* @param decode Whether or not to base64-decode the signature prior to verification. 1 for yes, 0 for no.
* @return 0 for success, -1 for error or invalid signature.
2014-07-01 19:38:01 -04:00
*/
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
extern int cl_verify_signature_x509(
X509 *x509,
const char *alg,
unsigned char *sig,
unsigned int siglen,
unsigned char *data,
size_t datalen,
int decode);
2014-07-01 19:38:01 -04:00
/**
* @brief Get an X509 object from memory.
*
* @param data A pointer to a spot in memory that contains the PEM X509 cert.
* @param len The length of the data.
* @return A pointer to the X509 object on success, NULL on error.
2014-07-01 19:38:01 -04:00
*/
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
extern X509 *cl_get_x509_from_mem(void *data, unsigned int len);
2014-07-01 19:38:01 -04:00
/**
* @brief Validate an X509 certificate chain, with the chain being located in a directory.
*
* @param tsdir The path to the trust store directory.
* @param certpath The path to the X509 certificate to be validated.
* @return 0 for success, -1 for error or invalid certificate.
2014-07-01 19:38:01 -04:00
*/
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
extern int cl_validate_certificate_chain_ts_dir(char *tsdir, char *certpath);
2014-07-01 19:38:01 -04:00
/**
* @brief Validate an X509 certificate chain with support for a CRL.
*
* @param authorities A NULL-terminated array of strings that hold the path of the CA's X509 certificate.
* @param crlpath (optional) A path to the CRL file. NULL if no CRL.
* @param certpath The path to the X509 certificate to be validated.
* @return 0 for success, -1 for error or invalid certificate.
2014-07-01 19:38:01 -04:00
*/
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
extern int cl_validate_certificate_chain(char **authorities, char *crlpath, char *certpath);
2014-07-01 19:38:01 -04:00
/**
* @brief Load an X509 certificate from a file.
*
* @param certpath The path to the X509 certificate.
2014-07-01 19:38:01 -04:00
*/
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
extern X509 *cl_load_cert(const char *certpath);
2014-07-01 19:38:01 -04:00
/**
* @brief Parse an ASN1_TIME object.
*
* @param timeobj The ASN1_TIME object.
* @return A pointer to a (struct tm). Adjusted for time zone and daylight savings time.
2014-07-01 19:38:01 -04:00
*/
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
extern struct tm *cl_ASN1_GetTimeT(ASN1_TIME *timeobj);
2014-07-01 19:38:01 -04:00
/**
* @brief Load a CRL file into an X509_CRL object.
*
* @param file The path to the CRL.
* @return A pointer to an X509_CRL object or NULL on error.
2014-07-01 19:38:01 -04:00
*/
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
extern X509_CRL *cl_load_crl(const char *timeobj);
2014-07-01 19:38:01 -04:00
/**
* @brief Sign data with a key stored on disk.
*
* @param keypath The path to the RSA private key.
* @param alg The hash/signature algorithm to use.
* @param hash The hash to sign.
* @param[out] olen A pointer that stores the size of the signature.
* @param Whether or not to base64-encode the signature. 1 for yes, 0 for no.
* @return The generated signature.
2014-07-01 19:38:01 -04:00
*/
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
extern unsigned char *cl_sign_data_keyfile(
char *keypath,
const char *alg,
unsigned char *hash,
unsigned int *olen,
int encode);
2014-07-01 19:38:01 -04:00
/**
* @brief Sign data with an RSA private key object.
*
* @param pkey The RSA private key object.
* @param alg The hash/signature algorithm to use.
* @param hash The hash to sign.
* @param[out] olen A pointer that stores the size of the signature.
* @param Whether or not to base64-encode the signature. 1 for yes, 0 for no.
* @return The generated signature.
2014-07-01 19:38:01 -04:00
*/
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
extern unsigned char *cl_sign_data(EVP_PKEY *pkey, const char *alg, unsigned char *hash, unsigned int *olen, int encode);
2014-07-01 19:38:01 -04:00
/**
* @brief Sign a file with an RSA private key object.
*
* @param fd The file descriptor.
* @param pkey The RSA private key object.
* @param alg The hash/signature algorithm to use.
* @param[out] olen A pointer that stores the size of the signature.
* @param encode Whether or not to base64-encode the signature. 1 for yes, 0 for no.
* @return The generated signature.
2014-07-01 19:38:01 -04:00
*/
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
extern unsigned char *cl_sign_file_fd(int fd, EVP_PKEY *pkey, const char *alg, unsigned int *olen, int encode);
2014-07-01 19:38:01 -04:00
/**
* @brief Sign a file with an RSA private key object.
*
* @param fp A pointer to a FILE object.
* @param pkey The RSA private key object.
* @param alg The hash/signature algorithm to use.
* @param[out] olen A pointer that stores the size of the signature.
* @param encode Whether or not to base64-encode the signature. 1 for yes, 0 for no.
* @return The generated signature.
2014-07-01 19:38:01 -04:00
*/
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
extern unsigned char *cl_sign_file_fp(FILE *fp, EVP_PKEY *pkey, const char *alg, unsigned int *olen, int encode);
2014-07-01 19:38:01 -04:00
/**
* @brief Get the Private Key stored on disk.
*
* @param keypath The path on disk where the private key is stored.
* @return A pointer to the EVP_PKEY object that contains the private key in memory.
2014-07-01 19:38:01 -04:00
*/
libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
extern EVP_PKEY *cl_get_pkey_file(char *keypath);
2014-07-01 19:38:01 -04:00
2003-07-29 15:48:06 +00:00
#ifdef __cplusplus
}
2003-07-29 15:48:06 +00:00
#endif
#endif /* __CLAMAV_H */