[3.13] gh-139400: Make sure that parent parsers outlive their subparsers in pyexpat (GH-139403) (GH-139608)

Within libexpat, a parser created via `XML_ExternalEntityParserCreate`
is relying on its parent parser throughout its entire lifetime.
Prior to this fix, is was possible for the parent parser to be
garbage-collected too early.

(cherry picked from commit 6edb2ddb5f)
This commit is contained in:
Sebastian Pipping 2025-10-06 15:17:37 +02:00 committed by GitHub
parent 1d39dba759
commit 69ab8fbb85
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 65 additions and 0 deletions

View file

@ -755,6 +755,42 @@ def resolve_entity(context, base, system_id, public_id):
self.assertEqual(handler_call_args, [("bar", "baz")])
class ParentParserLifetimeTest(unittest.TestCase):
"""
Subparsers make use of their parent XML_Parser inside of Expat.
As a result, parent parsers need to outlive subparsers.
See https://github.com/python/cpython/issues/139400.
"""
def test_parent_parser_outlives_its_subparsers__single(self):
parser = expat.ParserCreate()
subparser = parser.ExternalEntityParserCreate(None)
# Now try to cause garbage collection of the parent parser
# while it's still being referenced by a related subparser.
del parser
def test_parent_parser_outlives_its_subparsers__multiple(self):
parser = expat.ParserCreate()
subparser_one = parser.ExternalEntityParserCreate(None)
subparser_two = parser.ExternalEntityParserCreate(None)
# Now try to cause garbage collection of the parent parser
# while it's still being referenced by a related subparser.
del parser
def test_parent_parser_outlives_its_subparsers__chain(self):
parser = expat.ParserCreate()
subparser = parser.ExternalEntityParserCreate(None)
subsubparser = subparser.ExternalEntityParserCreate(None)
# Now try to cause garbage collection of the parent parsers
# while they are still being referenced by a related subparser.
del parser
del subparser
class ReparseDeferralTest(unittest.TestCase):
def test_getter_setter_round_trip(self):
parser = expat.ParserCreate()

View file

@ -0,0 +1,4 @@
:mod:`xml.parsers.expat`: Make sure that parent Expat parsers are only
garbage-collected once they are no longer referenced by subparsers created
by :meth:`~xml.parsers.expat.xmlparser.ExternalEntityParserCreate`.
Patch by Sebastian Pipping.

View file

@ -74,6 +74,15 @@ typedef struct {
PyObject_HEAD
XML_Parser itself;
/*
* Strong reference to a parent `xmlparseobject` if this parser
* is a child parser. Set to NULL if this parser is a root parser.
* This is needed to keep the parent parser alive as long as it has
* at least one child parser.
*
* See https://github.com/python/cpython/issues/139400 for details.
*/
PyObject *parent;
int ordered_attributes; /* Return attributes as a list. */
int specified_attributes; /* Report only specified attributes. */
int in_callback; /* Is a callback active? */
@ -988,6 +997,11 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
return NULL;
}
// The new subparser will make use of the parent XML_Parser inside of Expat.
// So we need to take subparsers into account with the reference counting
// of their parent parser.
Py_INCREF(self);
new_parser->buffer_size = self->buffer_size;
new_parser->buffer_used = 0;
new_parser->buffer = NULL;
@ -997,6 +1011,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
new_parser->ns_prefixes = self->ns_prefixes;
new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
encoding);
new_parser->parent = (PyObject *)self;
new_parser->handlers = 0;
new_parser->intern = Py_XNewRef(self->intern);
@ -1004,11 +1019,13 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
if (new_parser->buffer == NULL) {
Py_DECREF(new_parser);
Py_DECREF(self);
return PyErr_NoMemory();
}
}
if (!new_parser->itself) {
Py_DECREF(new_parser);
Py_DECREF(self);
return PyErr_NoMemory();
}
@ -1021,6 +1038,7 @@ pyexpat_xmlparser_ExternalEntityParserCreate_impl(xmlparseobject *self,
new_parser->handlers = PyMem_New(PyObject *, i);
if (!new_parser->handlers) {
Py_DECREF(new_parser);
Py_DECREF(self);
return PyErr_NoMemory();
}
clear_handlers(new_parser, 1);
@ -1210,6 +1228,7 @@ newxmlparseobject(pyexpat_state *state, const char *encoding,
/* namespace_separator is either NULL or contains one char + \0 */
self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
namespace_separator);
self->parent = NULL;
if (self->itself == NULL) {
PyErr_SetString(PyExc_RuntimeError,
"XML_ParserCreate failed");
@ -1245,6 +1264,7 @@ xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
for (int i = 0; handler_info[i].name != NULL; i++) {
Py_VISIT(op->handlers[i]);
}
Py_VISIT(op->parent);
Py_VISIT(Py_TYPE(op));
return 0;
}
@ -1254,6 +1274,10 @@ xmlparse_clear(xmlparseobject *op)
{
clear_handlers(op, 0);
Py_CLEAR(op->intern);
// NOTE: We cannot call Py_CLEAR(op->parent) prior to calling
// XML_ParserFree(op->itself), or a subparser could lose its parent
// XML_Parser while still making use of it internally.
// https://github.com/python/cpython/issues/139400
return 0;
}
@ -1265,6 +1289,7 @@ xmlparse_dealloc(xmlparseobject *self)
if (self->itself != NULL)
XML_ParserFree(self->itself);
self->itself = NULL;
Py_CLEAR(self->parent);
if (self->handlers != NULL) {
PyMem_Free(self->handlers);