gh-148441: Avoid integer overflow in Expat's CharacterDataHandler (#148904)

Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
This commit is contained in:
ByteFlow 2026-05-10 21:42:04 +08:00 committed by GitHub
parent a2c1657cab
commit bc1be4f617
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 19 additions and 1 deletions

View file

@ -712,6 +712,20 @@ def test_change_size_2(self):
parser.Parse(xml2, True)
self.assertEqual(self.n, 4)
@support.requires_resource('cpu')
@support.requires_resource('walltime')
@support.bigmemtest(size=2**31, memuse=4, dry_run=False)
def test_large_character_data_does_not_crash(self):
# See https://github.com/python/cpython/issues/148441
parser = expat.ParserCreate()
parser.buffer_text = True
parser.buffer_size = 2**31 - 1 # INT_MAX
N = 2049 * (1 << 20) - 3 # Character data greater than INT_MAX
self.assertGreater(N, parser.buffer_size)
parser.CharacterDataHandler = lambda text: None
xml_data = b"<r>" + b"A" * N + b"</r>"
self.assertEqual(parser.Parse(xml_data, True), 1)
class ElementDeclHandlerTest(unittest.TestCase):
def test_trigger_leak(self):
# Unfixed, this test would leak the memory of the so-called

View file

@ -0,0 +1,4 @@
:mod:`xml.parsers.expat`: prevent a crash in
:meth:`~xml.parsers.expat.xmlparser.CharacterDataHandler`
when the character data size exceeds the parser's
:attr:`buffer size <xml.parsers.expat.xmlparser.buffer_size>`.

View file

@ -393,7 +393,7 @@ my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
if (self->buffer == NULL)
call_character_handler(self, data, len);
else {
if ((self->buffer_used + len) > self->buffer_size) {
if (len > (self->buffer_size - self->buffer_used)) {
if (flush_character_buffer(self) < 0)
return;
/* handler might have changed; drop the rest on the floor