clamav/libclamav/scan_layer.c

233 lines
6.2 KiB
C
Raw Permalink Normal View History

libclamav: scan-layer callback API functions Add the following scan callbacks: ```c cl_engine_set_scan_callback(engine, &pre_hash_callback, CL_SCAN_CALLBACK_PRE_HASH); cl_engine_set_scan_callback(engine, &pre_scan_callback, CL_SCAN_CALLBACK_PRE_SCAN); cl_engine_set_scan_callback(engine, &post_scan_callback, CL_SCAN_CALLBACK_POST_SCAN); cl_engine_set_scan_callback(engine, &alert_callback, CL_SCAN_CALLBACK_ALERT); cl_engine_set_scan_callback(engine, &file_type_callback, CL_SCAN_CALLBACK_FILE_TYPE); ``` Each callback may alter scan behavior using the following return codes: * CL_BREAK Scan aborted by callback (the rest of the scan is skipped). This does not mark the file as clean or infected, it just skips the rest of the scan. * CL_SUCCESS / CL_CLEAN File scan will continue. This is different than CL_VERIFIED because it does not affect prior or future alerts. Return CL_VERIFIED instead if you want to remove prior alerts for this layer and skip the rest of the scan for this layer. * CL_VIRUS This means you don't trust the file. A new alert will be added. For CL_SCAN_CALLBACK_ALERT: Means you agree with the alert (no extra alert needed). * CL_VERIFIED Layer explicitly trusted by the callback and previous alerts removed FOR THIS layer. You might want to do this if you trust the hash or verified a digital signature. The rest of the scan will be skipped FOR THIS layer. For contained files, this does NOT mean that the parent or adjacent layers are trusted. Each callback is given a pointer to the current scan layer from which they can get previous layers, can get the the layer's fmap, and then various attributes of the layer and of the fmap such as: - layer recursion level - layer object id - layer file type - layer attributes (was decerypted, normalized, embedded, or re-typed) - layer last alert - fmap name - fmap hash (md5, sha1, or sha2-256) - fmap data (pointer and size) - fmap file descriptor, if any (fd, offset, size) - fmap filepath, if any (filepath, offset, size) To make this possible, this commits introduced a handful of new APIs to query scan-layer details and fmap details: - `cl_error_t cl_fmap_set_name(cl_fmap_t *map, const char *name);` - `cl_error_t cl_fmap_get_name(cl_fmap_t *map, const char **name_out);` - `cl_error_t cl_fmap_set_path(cl_fmap_t *map, const char *path);` - `cl_error_t cl_fmap_get_path(cl_fmap_t *map, const char **path_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_fd(const cl_fmap_t *map, int *fd_out, size_t *offset_out, size_t *len_out);` - `cl_error_t cl_fmap_get_size(const cl_fmap_t *map, size_t *size_out);` - `cl_error_t cl_fmap_set_hash(const cl_fmap_t *map, const char *hash_alg, char hash);` - `cl_error_t cl_fmap_have_hash(const cl_fmap_t *map, const char *hash_alg, bool *have_hash_out);` - `cl_error_t cl_fmap_will_need_hash_later(const cl_fmap_t *map, const char *hash_alg);` - `cl_error_t cl_fmap_get_hash(const cl_fmap_t *map, const char *hash_alg, const char **hash_out);` - `cl_error_t cl_fmap_get_data(const cl_fmap_t *map, size_t offset, size_t len, const uint8_t **data_out, size_t *data_len_out);` - `cl_error_t cl_scan_layer_get_fmap(cl_scan_layer_t *layer, cl_fmap_t **fmap_out);` - `cl_error_t cl_scan_layer_get_parent_layer(cl_scan_layer_t *layer, cl_scan_layer_t **parent_layer_out);` - `cl_error_t cl_scan_layer_get_type(cl_scan_layer_t *layer, const char **type_out);` - `cl_error_t cl_scan_layer_get_recursion_level(cl_scan_layer_t *layer, uint32_t *recursion_level_out);` - `cl_error_t cl_scan_layer_get_object_id(cl_scan_layer_t *layer, uint64_t *object_id_out);` - `cl_error_t cl_scan_layer_get_last_alert(cl_scan_layer_t *layer, const char **alert_name_out);` - `cl_error_t cl_scan_layer_get_attributes(cl_scan_layer_t *layer, uint32_t *attributes_out);` This commit deprecates but does not remove the existing scan callbacks: - `void cl_engine_set_clcb_pre_cache(struct cl_engine *engine, clcb_pre_cache callback);` - `void cl_engine_set_clcb_file_inspection(struct cl_engine *engine, clcb_file_inspection callback);` - `void cl_engine_set_clcb_pre_scan(struct cl_engine *engine, clcb_pre_scan callback);` - `void cl_engine_set_clcb_post_scan(struct cl_engine *engine, clcb_post_scan callback);` - `void cl_engine_set_clcb_virus_found(struct cl_engine *engine, clcb_virus_found callback);` - `void cl_engine_set_clcb_hash(struct cl_engine *engine, clcb_hash callback);` This commit also adds an interactive test program to demonstrate the callbacks. See: `examples/ex_scan_callbacks.c` CLAM-255 CLAM-2485 CLAM-2626
2025-06-22 14:37:03 -04:00
/*
* Copyright (C) 2025 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
*
* Authors: Valerie Snyder
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include "scan_layer.h"
#include "clamav_rust.h"
/**
* @brief Get the file map associated with a scan layer.
*
* @param layer The scan layer to query.
* @param fmap_out Pointer to a variable to receive the file map.
* @return cl_error_t CL_SUCCESS if successful.
*/
extern cl_error_t cl_scan_layer_get_fmap(
cl_scan_layer_t *layer,
cl_fmap_t **fmap_out)
{
cl_error_t status = CL_ERROR;
cli_scan_layer_t *l = (cli_scan_layer_t *)layer;
if (!layer || !fmap_out) {
status = CL_ENULLARG;
goto done;
}
*fmap_out = l->fmap;
status = CL_SUCCESS;
done:
return status;
}
/**
* @brief Get the parent layer of a scan layer.
*
* @param layer The scan layer to query.
* @param parent_layer_out Pointer to a variable to receive the parent layer.
* @return cl_error_t CL_SUCCESS if successful.
*/
extern cl_error_t cl_scan_layer_get_parent_layer(
cl_scan_layer_t *layer,
cl_scan_layer_t **parent_layer_out)
{
cl_error_t status = CL_ERROR;
cli_scan_layer_t *l = (cli_scan_layer_t *)layer;
if (!layer || !parent_layer_out) {
status = CL_ENULLARG;
goto done;
}
*parent_layer_out = (cl_scan_layer_t *)l->parent;
status = CL_SUCCESS;
done:
return status;
}
/**
* @brief Get the file type of a scan layer.
*
* The file type as clamav currently believes it to be.
* It may change later in the scan, so consider using `clcb_file_type_correction`
* callback to access the file again if it is re-typed.
*
* @param layer The scan layer to query.
* @param type_out Pointer to a variable to receive the file type.
* @return cl_error_t CL_SUCCESS if successful.
*/
extern cl_error_t cl_scan_layer_get_type(
cl_scan_layer_t *layer,
const char **type_out)
{
cl_error_t status = CL_ERROR;
cli_scan_layer_t *l = (cli_scan_layer_t *)layer;
if (!layer || !type_out) {
status = CL_ENULLARG;
goto done;
}
*type_out = cli_ftname(l->type);
status = CL_SUCCESS;
done:
return status;
}
/**
* @brief Get the recursion level of a scan layer.
*
* @param layer The scan layer to query.
* @param recursion_level_out Pointer to a variable to receive the recursion level.
* @return cl_error_t CL_SUCCESS if successful.
*/
extern cl_error_t cl_scan_layer_get_recursion_level(
cl_scan_layer_t *layer,
uint32_t *recursion_level_out)
{
cl_error_t status = CL_ERROR;
cli_scan_layer_t *l = (cli_scan_layer_t *)layer;
if (!layer || !recursion_level_out) {
status = CL_ENULLARG;
goto done;
}
*recursion_level_out = l->recursion_level;
status = CL_SUCCESS;
done:
return status;
}
/**
* @brief Get the object ID of a scan layer.
*
* Object ID is a unique identifier for the scan layer. It counts up from 0, although the callback interface
* may skip some IDs if the scan layer is processed immediately rather than being handled as distinct file type.
* For example, HTML may be normalized several ways and they're each given an Object ID, but we immediately
* pattern match them and do not handle them as distinct file types that were contained within the HTML.
*
* @param layer The scan layer to query.
* @param object_id_out Pointer to a variable to receive the object ID.
* @return cl_error_t CL_SUCCESS if successful.
*/
extern cl_error_t cl_scan_layer_get_object_id(
cl_scan_layer_t *layer,
uint64_t *object_id_out)
{
cl_error_t status = CL_ERROR;
cli_scan_layer_t *l = (cli_scan_layer_t *)layer;
if (!layer || !object_id_out) {
status = CL_ENULLARG;
goto done;
}
*object_id_out = l->object_id;
status = CL_SUCCESS;
done:
return status;
}
/**
* @brief Get the last detected alert (aka Strong indicator) name from a scan layer.
*
* @param layer The scan layer to query.
* @param alert_name_out Pointer to a variable to receive the alert name.
* If the layer has no alerts, this will be set to NULL.
* @return cl_error_t CL_SUCCESS if successful.
*/
extern cl_error_t cl_scan_layer_get_last_alert(
cl_scan_layer_t *layer,
const char **alert_name_out)
{
cl_error_t status = CL_ERROR;
cli_scan_layer_t *l = (cli_scan_layer_t *)layer;
if (!layer || !alert_name_out) {
status = CL_ENULLARG;
goto done;
}
if (NULL != l->evidence) {
const char *alert_name = evidence_get_last_alert(l->evidence);
if (alert_name) {
*alert_name_out = alert_name;
} else {
*alert_name_out = NULL;
}
} else {
*alert_name_out = NULL;
}
status = CL_SUCCESS;
done:
return status;
}
/**
* @brief Get the attributes of a scan layer.
*
* @param layer The scan layer to query.
* @param attributes_out Pointer to a variable to receive the layer attributes.
* @return cl_error_t CL_SUCCESS if successful.
*/
extern cl_error_t cl_scan_layer_get_attributes(
cl_scan_layer_t *layer,
uint32_t *attributes_out)
{
cl_error_t status = CL_ERROR;
cli_scan_layer_t *l = (cli_scan_layer_t *)layer;
if (!layer || !attributes_out) {
status = CL_ENULLARG;
goto done;
}
*attributes_out = l->attributes;
status = CL_SUCCESS;
done:
return status;
}