[3.14] gh-145986: Avoid unbound C recursion in conv_content_model in pyexpat.c (CVE 2026-4224) (GH-145987) (#145995)

gh-145986: Avoid unbound C recursion in `conv_content_model` in `pyexpat.c` (CVE 2026-4224) (GH-145987)

Fix C stack overflow (CVE-2026-4224) when an Expat parser
with a registered `ElementDeclHandler` parses inline DTD
containing deeply nested content model.

---------
(cherry picked from commit eb0e8be3a7)

Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com>
Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
This commit is contained in:
Miss Islington (bot) 2026-03-15 23:13:58 +01:00 committed by GitHub
parent 64e2acbc8e
commit e0a8a6da90
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 31 additions and 1 deletions

View file

@ -689,6 +689,25 @@ def test_trigger_leak(self):
parser.ElementDeclHandler = lambda _1, _2: None
self.assertRaises(TypeError, parser.Parse, data, True)
@support.skip_if_unlimited_stack_size
@support.skip_emscripten_stack_overflow()
@support.skip_wasi_stack_overflow()
def test_deeply_nested_content_model(self):
# This should raise a RecursionError and not crash.
# See https://github.com/python/cpython/issues/145986.
N = 500_000
data = (
b'<!DOCTYPE root [\n<!ELEMENT root '
+ b'(a, ' * N + b'a' + b')' * N
+ b'>\n]>\n<root/>\n'
)
parser = expat.ParserCreate()
parser.ElementDeclHandler = lambda _1, _2: None
with support.infinite_recursion():
with self.assertRaises(RecursionError):
parser.Parse(data)
class MalformedInputTest(unittest.TestCase):
def test1(self):
xml = b"\0\r\n"

View file

@ -0,0 +1,4 @@
:mod:`xml.parsers.expat`: Fixed a crash caused by unbounded C recursion when
converting deeply nested XML content models with
:meth:`~xml.parsers.expat.xmlparser.ElementDeclHandler`.
This addresses :cve:`2026-4224`.

View file

@ -3,6 +3,7 @@
#endif
#include "Python.h"
#include "pycore_ceval.h" // _Py_EnterRecursiveCall()
#include "pycore_import.h" // _PyImport_SetModule()
#include "pycore_pyhash.h" // _Py_HashSecret
#include "pycore_traceback.h" // _PyTraceback_Add()
@ -603,6 +604,10 @@ static PyObject *
conv_content_model(XML_Content * const model,
PyObject *(*conv_string)(void *))
{
if (_Py_EnterRecursiveCall(" in conv_content_model")) {
return NULL;
}
PyObject *result = NULL;
PyObject *children = PyTuple_New(model->numchildren);
int i;
@ -614,7 +619,7 @@ conv_content_model(XML_Content * const model,
conv_string);
if (child == NULL) {
Py_XDECREF(children);
return NULL;
goto done;
}
PyTuple_SET_ITEM(children, i, child);
}
@ -622,6 +627,8 @@ conv_content_model(XML_Content * const model,
model->type, model->quant,
conv_string, model->name, children);
}
done:
_Py_LeaveRecursiveCall();
return result;
}