gh-139400: Make sure that parent parsers outlive their subparsers in pyexpat (#139403)

* Modules/pyexpat.c: Disallow collection of in-use parent parsers.

Within libexpat, a parser created via `XML_ExternalEntityParserCreate`
is relying on its parent parser throughout its entire lifetime.
Prior to this fix, is was possible for the parent parser to be
garbage-collected too early.
This commit is contained in:
Sebastian Pipping 2025-10-05 17:37:42 +02:00 committed by GitHub
parent 13dc2fde8c
commit 6edb2ddb5f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 65 additions and 0 deletions

View file

@ -771,6 +771,42 @@ def resolve_entity(context, base, system_id, public_id):
self.assertEqual(handler_call_args, [("bar", "baz")])
class ParentParserLifetimeTest(unittest.TestCase):
"""
Subparsers make use of their parent XML_Parser inside of Expat.
As a result, parent parsers need to outlive subparsers.
See https://github.com/python/cpython/issues/139400.
"""
def test_parent_parser_outlives_its_subparsers__single(self):
parser = expat.ParserCreate()
subparser = parser.ExternalEntityParserCreate(None)
# Now try to cause garbage collection of the parent parser
# while it's still being referenced by a related subparser.
del parser
def test_parent_parser_outlives_its_subparsers__multiple(self):
parser = expat.ParserCreate()
subparser_one = parser.ExternalEntityParserCreate(None)
subparser_two = parser.ExternalEntityParserCreate(None)
# Now try to cause garbage collection of the parent parser
# while it's still being referenced by a related subparser.
del parser
def test_parent_parser_outlives_its_subparsers__chain(self):
parser = expat.ParserCreate()
subparser = parser.ExternalEntityParserCreate(None)
subsubparser = subparser.ExternalEntityParserCreate(None)
# Now try to cause garbage collection of the parent parsers
# while they are still being referenced by a related subparser.
del parser
del subparser
class ReparseDeferralTest(unittest.TestCase):
def test_getter_setter_round_trip(self):
parser = expat.ParserCreate()

View file

@ -0,0 +1,4 @@
:mod:`xml.parsers.expat`: Make sure that parent Expat parsers are only
garbage-collected once they are no longer referenced by subparsers created
by :meth:`~xml.parsers.expat.xmlparser.ExternalEntityParserCreate`.
Patch by Sebastian Pipping.

View file

@ -76,6 +76,15 @@ typedef struct {
PyObject_HEAD
XML_Parser itself;
/*
* Strong reference to a parent `xmlparseobject` if this parser
* is a child parser. Set to NULL if this parser is a root parser.
* This is needed to keep the parent parser alive as long as it has
* at least one child parser.
*
* See https://github.com/python/cpython/issues/139400 for details.
*/
PyObject *parent;
int ordered_attributes; /* Return attributes as a list. */
int specified_attributes; /* Report only specified attributes. */
int in_callback; /* Is a callback active? */
@ -1067,6 +1076,11 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
return NULL;
}
// The new subparser will make use of the parent XML_Parser inside of Expat.
// So we need to take subparsers into account with the reference counting
// of their parent parser.
Py_INCREF(self);
new_parser->buffer_size = self->buffer_size;
new_parser->buffer_used = 0;
new_parser->buffer = NULL;
@ -1076,6 +1090,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
new_parser->ns_prefixes = self->ns_prefixes;
new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
encoding);
new_parser->parent = (PyObject *)self;
new_parser->handlers = 0;
new_parser->intern = Py_XNewRef(self->intern);
@ -1083,11 +1098,13 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
if (new_parser->buffer == NULL) {
Py_DECREF(new_parser);
Py_DECREF(self);
return PyErr_NoMemory();
}
}
if (!new_parser->itself) {
Py_DECREF(new_parser);
Py_DECREF(self);
return PyErr_NoMemory();
}
@ -1101,6 +1118,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
new_parser->handlers = PyMem_New(PyObject *, i);
if (!new_parser->handlers) {
Py_DECREF(new_parser);
Py_DECREF(self);
return PyErr_NoMemory();
}
clear_handlers(new_parser, 1);
@ -1481,6 +1499,7 @@ newxmlparseobject(pyexpat_state *state, const char *encoding,
/* namespace_separator is either NULL or contains one char + \0 */
self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
namespace_separator);
self->parent = NULL;
if (self->itself == NULL) {
PyErr_SetString(PyExc_RuntimeError,
"XML_ParserCreate failed");
@ -1517,6 +1536,7 @@ xmlparse_traverse(PyObject *op, visitproc visit, void *arg)
for (size_t i = 0; handler_info[i].name != NULL; i++) {
Py_VISIT(self->handlers[i]);
}
Py_VISIT(self->parent);
Py_VISIT(Py_TYPE(op));
return 0;
}
@ -1527,6 +1547,10 @@ xmlparse_clear(PyObject *op)
xmlparseobject *self = xmlparseobject_CAST(op);
clear_handlers(self, 0);
Py_CLEAR(self->intern);
// NOTE: We cannot call Py_CLEAR(self->parent) prior to calling
// XML_ParserFree(self->itself), or a subparser could lose its parent
// XML_Parser while still making use of it internally.
// https://github.com/python/cpython/issues/139400
return 0;
}
@ -1540,6 +1564,7 @@ xmlparse_dealloc(PyObject *op)
XML_ParserFree(self->itself);
}
self->itself = NULL;
Py_CLEAR(self->parent);
if (self->handlers != NULL) {
PyMem_Free(self->handlers);