clamav/unit_tests/clamscan/embedded_files_test.py
Val S. a77a271fb5
Reduce unnecessary scanning of embedded file FPs (#1571)
When embedded file type recognition finds a possible embedded file, it
is being scanned as a new embedded file even if it turns out it was a
false positive and parsing fails. My solution is to pre-parse the file
headers as little possible to determine if it is valid. If possible,
also determine the file size based on the headers. That will make it so
we don't have to scan additional data when the embedded file is not at
the very end.

This commit adds header checks prior to embedded ZIP, ARJ, and CAB
scanning. For these types I was also able to use the header checks to
determine the object size so as to prevent excessive pattern matching.

TODO: Add the same for RAR, EGG, 7Z, NULSFT, AUTOIT, IShield, and PDF.

This commit also removes duplicate matching for embedded MSEXE.
The embedded MSEXE detection and scanning logic was accidentally
creating an extra duplicate layer in between scanning and detection
because of the logic within the `cli_scanembpe()` function.
That function was effectively doing the header check which this commit
adds for ZIP, ARJ, and CAB but minus the size check.
Note: It is unfortunately not possible to get an accurage size from PE
file headers.
The `cli_scanembpe()` function also used to dump to a temp file for no
reason since FMAPs were extended to support windows into other FMAPs.
So this commit removes the intermediate layer as well as dropping a temp
file for each embedded PE file.

Further, this commit adds configuration and DCONF safeguards around all
embedded file type scanning.

Finally, this commit adds a set of tests to validate proper extraction
of embedded ZIP, ARJ, CAB, and MSEXE files.

CLAM-2862

Co-authored-by: TheRaynMan <draynor@sourcefire.com>
2025-09-23 15:57:28 -04:00

130 lines
5.1 KiB
Python

# Copyright (C) 2020-2025 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
"""
Run clamscan tests.
"""
import sys
from zipfile import ZIP_DEFLATED, ZipFile
sys.path.append('../unit_tests')
import testcase
class TC(testcase.TestCase):
@classmethod
def setUpClass(cls):
super(TC, cls).setUpClass()
@classmethod
def tearDownClass(cls):
super(TC, cls).tearDownClass()
def setUp(self):
super(TC, self).setUp()
def tearDown(self):
super(TC, self).tearDown()
self.verify_valgrind_log()
def test_embedded_zips(self):
self.step_name('Test that clamav can successfully extract and alert on multiple embedded ZIP files')
path_db = TC.path_source / 'unit_tests' / 'input' / 'embedded_testfiles' / 'signatures'
testfiles = TC.path_source / 'unit_tests' / 'input' / 'embedded_testfiles' / 'test.png.emb-zips'
command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfiles} --gen-json --debug --allmatch'.format(
valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan,
path_db=path_db,
testfiles=testfiles,
)
output = self.execute_command(command)
assert output.ec == 1 # no virus, no failures
expected_stdout = [
'test.png.emb-zips: test-file-1-1.UNOFFICIAL FOUND',
'test.png.emb-zips: test-file-1-2.UNOFFICIAL FOUND',
'test.png.emb-zips: test-file-2-1.UNOFFICIAL FOUND',
'test.png.emb-zips: test-file-2-2.UNOFFICIAL FOUND',
]
unexpected_stdout = [
'OK',
]
self.verify_output(output.out, expected=expected_stdout, unexpected=unexpected_stdout)
def test_embedded_arjs(self):
self.step_name('Test that clamav can successfully extract and alert on multiple embedded ARJ files')
path_db = TC.path_source / 'unit_tests' / 'input' / 'embedded_testfiles' / 'signatures'
testfiles = TC.path_source / 'unit_tests' / 'input' / 'embedded_testfiles' / 'test.png.emb-arjs'
command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfiles} --gen-json --debug --allmatch'.format(
valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan,
path_db=path_db,
testfiles=testfiles,
)
output = self.execute_command(command)
assert output.ec == 1 # no virus, no failures
expected_stdout = [
'test.png.emb-arjs: test-file-1-1.UNOFFICIAL FOUND',
'test.png.emb-arjs: test-file-1-2.UNOFFICIAL FOUND',
'test.png.emb-arjs: test-file-2-1.UNOFFICIAL FOUND',
'test.png.emb-arjs: test-file-2-2.UNOFFICIAL FOUND',
]
unexpected_stdout = [
'OK',
]
self.verify_output(output.out, expected=expected_stdout, unexpected=unexpected_stdout)
def test_embedded_cabs(self):
self.step_name('Test that clamav can successfully extract and alert on multiple embedded CAB files')
path_db = TC.path_source / 'unit_tests' / 'input' / 'embedded_testfiles' / 'signatures'
testfiles = TC.path_source / 'unit_tests' / 'input' / 'embedded_testfiles' / 'test.png.emb-cabs'
command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfiles} --gen-json --debug --allmatch'.format(
valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan,
path_db=path_db,
testfiles=testfiles,
)
output = self.execute_command(command)
assert output.ec == 1 # no virus, no failures
expected_stdout = [
'test.png.emb-cabs: test-file-1-1.UNOFFICIAL FOUND',
'test.png.emb-cabs: test-file-1-2.UNOFFICIAL FOUND',
'test.png.emb-cabs: test-file-2-1.UNOFFICIAL FOUND',
'test.png.emb-cabs: test-file-2-2.UNOFFICIAL FOUND',
]
unexpected_stdout = [
'OK',
]
self.verify_output(output.out, expected=expected_stdout, unexpected=unexpected_stdout)
def test_embedded_exes(self):
self.step_name('Test that clamav can successfully extract and alert on multiple embedded EXE files')
path_db = TC.path_source / 'unit_tests' / 'input' / 'embedded_testfiles' / 'signatures'
testfiles = TC.path_source / 'unit_tests' / 'input' / 'embedded_testfiles' / 'clam.exe.emb-exes'
command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfiles} --gen-json --debug --allmatch'.format(
valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan,
path_db=path_db,
testfiles=testfiles,
)
output = self.execute_command(command)
assert output.ec == 1 # no virus, no failures
expected_stdout = [
'clam.exe.emb-exes: Win.Test.LilEXE.UNOFFICIAL FOUND',
'clam.exe.emb-exes: Win.Test.SmolEXE.UNOFFICIAL FOUND',
]
unexpected_stdout = [
'OK',
]
self.verify_output(output.out, expected=expected_stdout, unexpected=unexpected_stdout)