[3.14] gh-90949: add Expat API to prevent XML deadly allocations (CVE-2025-59375) (GH-139234) (#139359)

* [3.14] gh-90949: add Expat API to prevent XML deadly allocations (CVE-2025-59375) (GH-139234)

Expose the XML Expat 2.7.2 mitigation APIs to disallow use of
disproportional amounts of dynamic memory from within an Expat
parser (see CVE-2025-59375 for instance).

The exposed APIs are available on Expat parsers, that is,
parsers created by `xml.parsers.expat.ParserCreate()`, as:

- `parser.SetAllocTrackerActivationThreshold(threshold)`, and
- `parser.SetAllocTrackerMaximumAmplification(max_factor)`.

(cherry picked from commit f04bea44c3)
(cherry picked from commit 68a1778b77)
This commit is contained in:
Bénédikt Tran 2025-11-02 10:33:36 +01:00 committed by GitHub
parent 4d7fab9b15
commit bf2865f80f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 586 additions and 32 deletions

View file

@ -1,7 +1,10 @@
# XXX TypeErrors on calling handlers, or on bad return values from a
# handler, are obscure and unhelpful.
import abc
import functools
import os
import re
import sys
import sysconfig
import textwrap
@ -9,7 +12,7 @@
import traceback
from io import BytesIO
from test import support
from test.support import os_helper
from test.support import import_helper, os_helper
from xml.parsers import expat
from xml.parsers.expat import errors
@ -863,5 +866,199 @@ def start_element(name, _):
self.assertEqual(started, ['doc'])
class AttackProtectionTestBase(abc.ABC):
"""
Base class for testing protections against XML payloads with
disproportionate amplification.
The protections being tested should detect and prevent attacks
that leverage disproportionate amplification from small inputs.
"""
@staticmethod
def exponential_expansion_payload(*, nrows, ncols, text='.'):
"""Create a billion laughs attack payload.
Be careful: the number of total items is pow(n, k), thereby
requiring at least pow(ncols, nrows) * sizeof(text) memory!
"""
template = textwrap.dedent(f"""\
<?xml version="1.0"?>
<!DOCTYPE doc [
<!ENTITY row0 "{text}">
<!ELEMENT doc (#PCDATA)>
{{body}}
]>
<doc>&row{nrows};</doc>
""").rstrip()
body = '\n'.join(
f'<!ENTITY row{i + 1} "{f"&row{i};" * ncols}">'
for i in range(nrows)
)
body = textwrap.indent(body, ' ' * 4)
return template.format(body=body)
def test_payload_generation(self):
# self-test for exponential_expansion_payload()
payload = self.exponential_expansion_payload(nrows=2, ncols=3)
self.assertEqual(payload, textwrap.dedent("""\
<?xml version="1.0"?>
<!DOCTYPE doc [
<!ENTITY row0 ".">
<!ELEMENT doc (#PCDATA)>
<!ENTITY row1 "&row0;&row0;&row0;">
<!ENTITY row2 "&row1;&row1;&row1;">
]>
<doc>&row2;</doc>
""").rstrip())
def assert_root_parser_failure(self, func, /, *args, **kwargs):
"""Check that func(*args, **kwargs) is invalid for a sub-parser."""
msg = "parser must be a root parser"
self.assertRaisesRegex(expat.ExpatError, msg, func, *args, **kwargs)
@abc.abstractmethod
def assert_rejected(self, func, /, *args, **kwargs):
"""Assert that func(*args, **kwargs) triggers the attack protection.
Note: this method must ensure that the attack protection being tested
is the one that is actually triggered at runtime, e.g., by matching
the exact error message.
"""
@abc.abstractmethod
def set_activation_threshold(self, parser, threshold):
"""Set the activation threshold for the tested protection."""
@abc.abstractmethod
def set_maximum_amplification(self, parser, max_factor):
"""Set the maximum amplification factor for the tested protection."""
@abc.abstractmethod
def test_set_activation_threshold__threshold_reached(self):
"""Test when the activation threshold is exceeded."""
@abc.abstractmethod
def test_set_activation_threshold__threshold_not_reached(self):
"""Test when the activation threshold is not exceeded."""
def test_set_activation_threshold__invalid_threshold_type(self):
parser = expat.ParserCreate()
setter = functools.partial(self.set_activation_threshold, parser)
self.assertRaises(TypeError, setter, 1.0)
self.assertRaises(TypeError, setter, -1.5)
self.assertRaises(ValueError, setter, -5)
def test_set_activation_threshold__invalid_threshold_range(self):
_testcapi = import_helper.import_module("_testcapi")
parser = expat.ParserCreate()
setter = functools.partial(self.set_activation_threshold, parser)
self.assertRaises(OverflowError, setter, _testcapi.ULLONG_MAX + 1)
def test_set_activation_threshold__fail_for_subparser(self):
parser = expat.ParserCreate()
subparser = parser.ExternalEntityParserCreate(None)
setter = functools.partial(self.set_activation_threshold, subparser)
self.assert_root_parser_failure(setter, 12345)
@abc.abstractmethod
def test_set_maximum_amplification__amplification_exceeded(self):
"""Test when the amplification factor is exceeded."""
@abc.abstractmethod
def test_set_maximum_amplification__amplification_not_exceeded(self):
"""Test when the amplification factor is not exceeded."""
def test_set_maximum_amplification__infinity(self):
inf = float('inf') # an 'inf' threshold is allowed by Expat
parser = expat.ParserCreate()
self.assertIsNone(self.set_maximum_amplification(parser, inf))
def test_set_maximum_amplification__invalid_max_factor_type(self):
parser = expat.ParserCreate()
setter = functools.partial(self.set_maximum_amplification, parser)
self.assertRaises(TypeError, setter, None)
self.assertRaises(TypeError, setter, 'abc')
def test_set_maximum_amplification__invalid_max_factor_range(self):
parser = expat.ParserCreate()
setter = functools.partial(self.set_maximum_amplification, parser)
msg = re.escape("'max_factor' must be at least 1.0")
self.assertRaisesRegex(expat.ExpatError, msg, setter, float('nan'))
self.assertRaisesRegex(expat.ExpatError, msg, setter, 0.99)
def test_set_maximum_amplification__fail_for_subparser(self):
parser = expat.ParserCreate()
subparser = parser.ExternalEntityParserCreate(None)
setter = functools.partial(self.set_maximum_amplification, subparser)
self.assert_root_parser_failure(setter, 123.45)
@unittest.skipIf(expat.version_info < (2, 7, 2), "requires Expat >= 2.7.2")
class MemoryProtectionTest(AttackProtectionTestBase, unittest.TestCase):
# NOTE: with the default Expat configuration, the billion laughs protection
# may hit before the allocation limiter if exponential_expansion_payload()
# is not carefully parametrized. As such, the payloads should be chosen so
# that either the allocation limiter is hit before other protections are
# triggered or no protection at all is triggered.
def assert_rejected(self, func, /, *args, **kwargs):
"""Check that func(*args, **kwargs) hits the allocation limit."""
msg = r"out of memory: line \d+, column \d+"
self.assertRaisesRegex(expat.ExpatError, msg, func, *args, **kwargs)
def set_activation_threshold(self, parser, threshold):
return parser.SetAllocTrackerActivationThreshold(threshold)
def set_maximum_amplification(self, parser, max_factor):
return parser.SetAllocTrackerMaximumAmplification(max_factor)
def test_set_activation_threshold__threshold_reached(self):
parser = expat.ParserCreate()
# Choose a threshold expected to be always reached.
self.set_activation_threshold(parser, 3)
# Check that the threshold is reached by choosing a small factor
# and a payload whose peak amplification factor exceeds it.
self.assertIsNone(self.set_maximum_amplification(parser, 1.0))
payload = self.exponential_expansion_payload(ncols=10, nrows=4)
self.assert_rejected(parser.Parse, payload, True)
def test_set_activation_threshold__threshold_not_reached(self):
parser = expat.ParserCreate()
# Choose a threshold expected to be never reached.
self.set_activation_threshold(parser, pow(10, 5))
# Check that the threshold is reached by choosing a small factor
# and a payload whose peak amplification factor exceeds it.
self.assertIsNone(self.set_maximum_amplification(parser, 1.0))
payload = self.exponential_expansion_payload(ncols=10, nrows=4)
self.assertIsNotNone(parser.Parse(payload, True))
def test_set_maximum_amplification__amplification_exceeded(self):
parser = expat.ParserCreate()
# Unconditionally enable maximum activation factor.
self.set_activation_threshold(parser, 0)
# Choose a max amplification factor expected to always be exceeded.
self.assertIsNone(self.set_maximum_amplification(parser, 1.0))
# Craft a payload for which the peak amplification factor is > 1.0.
payload = self.exponential_expansion_payload(ncols=1, nrows=2)
self.assert_rejected(parser.Parse, payload, True)
def test_set_maximum_amplification__amplification_not_exceeded(self):
parser = expat.ParserCreate()
# Unconditionally enable maximum activation factor.
self.set_activation_threshold(parser, 0)
# Choose a max amplification factor expected to never be exceeded.
self.assertIsNone(self.set_maximum_amplification(parser, 1e4))
# Craft a payload for which the peak amplification factor is < 1e4.
payload = self.exponential_expansion_payload(ncols=1, nrows=2)
self.assertIsNotNone(parser.Parse(payload, True))
if __name__ == "__main__":
unittest.main()