clamav/unit_tests/clamscan/save_html_uris_test.py
John Humlick e1e3d4c64d
libclamav: Add URI scanning support to PDF parser
Threat Research requests scanning URIs in PDF files and adding them to
the json report file.

This change adds URI scanning support to the PDF parser, including
support for object references to URIs in PDF files.

Jira: CLAM-2588

Fix out-of-order references and other minor improvements.

CLAM-2588, CLAM-2757
2025-05-30 12:41:17 -07:00

60 lines
1.7 KiB
Python

# Copyright (C) 2020-2025 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
"""
Run clamscan tests.
"""
import sys
import os
import re
import shutil
sys.path.append('../unit_tests')
import testcase
class TC(testcase.TestCase):
@classmethod
def setUpClass(cls):
super(TC, cls).setUpClass()
@classmethod
def tearDownClass(cls):
super(TC, cls).tearDownClass()
def setUp(self):
super(TC, self).setUp()
def tearDown(self):
super(TC, self).tearDown()
# Remove scan temps directory between tests
if (self.path_tmp / "TD").exists():
shutil.rmtree(self.path_tmp / "TD")
self.verify_valgrind_log()
def test_save_links(self):
self.step_name('Extract Links')
tempdir=self.path_tmp / "TD"
if not os.path.isdir(tempdir):
os.makedirs(tempdir)
testfile = TC.path_source / 'unit_tests' / 'input' / 'other_scanfiles' / 'html' / 'index.html'
command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} --gen-json --leave-temps --tempdir={tempdir} {testfile}'.format(
valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan,
path_db=TC.path_source / 'unit_tests' / 'input' / 'other_sigs' / 'Clamav-Unit-Test-Signature.ndb',
tempdir=tempdir,
testfile=testfile,
)
output = self.execute_command(command)
assert output.ec == 0 # clean
expected_strings = [
'URIs',
'"https://www.clamav.net/reports/malware"',
'"http://www.google.com"'
]
self.verify_metadata_json(tempdir, expected_strings)