mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	gh-126033: fix UAF in xml.etree.ElementTree.Element.remove when concurrent mutations happen (#126124)
				
					
				
			This commit is contained in:
		
							parent
							
								
									c57623c221
								
							
						
					
					
						commit
						bab1398a47
					
				
					 3 changed files with 214 additions and 37 deletions
				
			
		|  | @ -18,9 +18,11 @@ | ||||||
| import textwrap | import textwrap | ||||||
| import types | import types | ||||||
| import unittest | import unittest | ||||||
|  | import unittest.mock as mock | ||||||
| import warnings | import warnings | ||||||
| import weakref | import weakref | ||||||
| 
 | 
 | ||||||
|  | from contextlib import nullcontext | ||||||
| from functools import partial | from functools import partial | ||||||
| from itertools import product, islice | from itertools import product, islice | ||||||
| from test import support | from test import support | ||||||
|  | @ -121,6 +123,21 @@ | ||||||
| </foo> | </foo> | ||||||
| """ | """ | ||||||
| 
 | 
 | ||||||
|  | def is_python_implementation(): | ||||||
|  |     assert ET is not None, "ET must be initialized" | ||||||
|  |     assert pyET is not None, "pyET must be initialized" | ||||||
|  |     return ET is pyET | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def equal_wrapper(cls): | ||||||
|  |     """Mock cls.__eq__ to check whether it has been called or not. | ||||||
|  | 
 | ||||||
|  |     The behaviour of cls.__eq__ (side-effects included) is left as is. | ||||||
|  |     """ | ||||||
|  |     eq = cls.__eq__ | ||||||
|  |     return mock.patch.object(cls, "__eq__", autospec=True, wraps=eq) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def checkwarnings(*filters, quiet=False): | def checkwarnings(*filters, quiet=False): | ||||||
|     def decorator(test): |     def decorator(test): | ||||||
|         def newtest(*args, **kwargs): |         def newtest(*args, **kwargs): | ||||||
|  | @ -2642,6 +2659,7 @@ def test_pickle_issue18997(self): | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class BadElementTest(ElementTestCase, unittest.TestCase): | class BadElementTest(ElementTestCase, unittest.TestCase): | ||||||
|  | 
 | ||||||
|     def test_extend_mutable_list(self): |     def test_extend_mutable_list(self): | ||||||
|         class X: |         class X: | ||||||
|             @property |             @property | ||||||
|  | @ -2680,18 +2698,168 @@ class Y(X, ET.Element): | ||||||
|         e = ET.Element('foo') |         e = ET.Element('foo') | ||||||
|         e.extend(L) |         e.extend(L) | ||||||
| 
 | 
 | ||||||
|     def test_remove_with_mutating(self): |     def test_remove_with_clear_assume_missing(self): | ||||||
|         class X(ET.Element): |         # gh-126033: Check that a concurrent clear() for an assumed-to-be | ||||||
|             def __eq__(self, o): |         # missing element does not make the interpreter crash. | ||||||
|                 del e[:] |         self.do_test_remove_with_clear(raises=True) | ||||||
|                 return False |  | ||||||
|         e = ET.Element('foo') |  | ||||||
|         e.extend([X('bar')]) |  | ||||||
|         self.assertRaises(ValueError, e.remove, ET.Element('baz')) |  | ||||||
| 
 | 
 | ||||||
|         e = ET.Element('foo') |     def test_remove_with_clear_assume_existing(self): | ||||||
|         e.extend([ET.Element('bar')]) |         # gh-126033: Check that a concurrent clear() for an assumed-to-be | ||||||
|         self.assertRaises(ValueError, e.remove, X('baz')) |         # existing element does not make the interpreter crash. | ||||||
|  |         self.do_test_remove_with_clear(raises=False) | ||||||
|  | 
 | ||||||
|  |     def do_test_remove_with_clear(self, *, raises): | ||||||
|  | 
 | ||||||
|  |         # Until the discrepency between "del root[:]" and "root.clear()" is | ||||||
|  |         # resolved, we need to keep two tests. Previously, using "del root[:]" | ||||||
|  |         # did not crash with the reproducer of gh-126033 while "root.clear()" | ||||||
|  |         # did. | ||||||
|  | 
 | ||||||
|  |         class E(ET.Element): | ||||||
|  |             """Local class to be able to mock E.__eq__ for introspection.""" | ||||||
|  | 
 | ||||||
|  |         class X(E): | ||||||
|  |             def __eq__(self, o): | ||||||
|  |                 del root[:] | ||||||
|  |                 return not raises | ||||||
|  | 
 | ||||||
|  |         class Y(E): | ||||||
|  |             def __eq__(self, o): | ||||||
|  |                 root.clear() | ||||||
|  |                 return not raises | ||||||
|  | 
 | ||||||
|  |         if raises: | ||||||
|  |             get_checker_context = lambda: self.assertRaises(ValueError) | ||||||
|  |         else: | ||||||
|  |             get_checker_context = nullcontext | ||||||
|  | 
 | ||||||
|  |         self.assertIs(E.__eq__, object.__eq__) | ||||||
|  | 
 | ||||||
|  |         for Z, side_effect in [(X, 'del root[:]'), (Y, 'root.clear()')]: | ||||||
|  |             self.enterContext(self.subTest(side_effect=side_effect)) | ||||||
|  | 
 | ||||||
|  |             # test removing R() from [U()] | ||||||
|  |             for R, U, description in [ | ||||||
|  |                 (E, Z, "remove missing E() from [Z()]"), | ||||||
|  |                 (Z, E, "remove missing Z() from [E()]"), | ||||||
|  |                 (Z, Z, "remove missing Z() from [Z()]"), | ||||||
|  |             ]: | ||||||
|  |                 with self.subTest(description): | ||||||
|  |                     root = E('top') | ||||||
|  |                     root.extend([U('one')]) | ||||||
|  |                     with get_checker_context(): | ||||||
|  |                         root.remove(R('missing')) | ||||||
|  | 
 | ||||||
|  |             # test removing R() from [U(), V()] | ||||||
|  |             cases = self.cases_for_remove_missing_with_mutations(E, Z) | ||||||
|  |             for R, U, V, description in cases: | ||||||
|  |                 with self.subTest(description): | ||||||
|  |                     root = E('top') | ||||||
|  |                     root.extend([U('one'), V('two')]) | ||||||
|  |                     with get_checker_context(): | ||||||
|  |                         root.remove(R('missing')) | ||||||
|  | 
 | ||||||
|  |             # Test removing root[0] from [Z()]. | ||||||
|  |             # | ||||||
|  |             # Since we call root.remove() with root[0], Z.__eq__() | ||||||
|  |             # will not be called (we branch on the fast Py_EQ path). | ||||||
|  |             with self.subTest("remove root[0] from [Z()]"): | ||||||
|  |                 root = E('top') | ||||||
|  |                 root.append(Z('rem')) | ||||||
|  |                 with equal_wrapper(E) as f, equal_wrapper(Z) as g: | ||||||
|  |                     root.remove(root[0]) | ||||||
|  |                 f.assert_not_called() | ||||||
|  |                 g.assert_not_called() | ||||||
|  | 
 | ||||||
|  |             # Test removing root[1] (of type R) from [U(), R()]. | ||||||
|  |             is_special = is_python_implementation() and raises and Z is Y | ||||||
|  |             if is_python_implementation() and raises and Z is Y: | ||||||
|  |                 # In pure Python, using root.clear() sets the children | ||||||
|  |                 # list to [] without calling list.clear(). | ||||||
|  |                 # | ||||||
|  |                 # For this reason, the call to root.remove() first | ||||||
|  |                 # checks root[0] and sets the children list to [] | ||||||
|  |                 # since either root[0] or root[1] is an evil element. | ||||||
|  |                 # | ||||||
|  |                 # Since checking root[1] still uses the old reference | ||||||
|  |                 # to the children list, PyObject_RichCompareBool() branches | ||||||
|  |                 # to the fast Py_EQ path and Y.__eq__() is called exactly | ||||||
|  |                 # once (when checking root[0]). | ||||||
|  |                 continue | ||||||
|  |             else: | ||||||
|  |                 cases = self.cases_for_remove_existing_with_mutations(E, Z) | ||||||
|  |                 for R, U, description in cases: | ||||||
|  |                     with self.subTest(description): | ||||||
|  |                         root = E('top') | ||||||
|  |                         root.extend([U('one'), R('rem')]) | ||||||
|  |                         with get_checker_context(): | ||||||
|  |                             root.remove(root[1]) | ||||||
|  | 
 | ||||||
|  |     def test_remove_with_mutate_root_assume_missing(self): | ||||||
|  |         # gh-126033: Check that a concurrent mutation for an assumed-to-be | ||||||
|  |         # missing element does not make the interpreter crash. | ||||||
|  |         self.do_test_remove_with_mutate_root(raises=True) | ||||||
|  | 
 | ||||||
|  |     def test_remove_with_mutate_root_assume_existing(self): | ||||||
|  |         # gh-126033: Check that a concurrent mutation for an assumed-to-be | ||||||
|  |         # existing element does not make the interpreter crash. | ||||||
|  |         self.do_test_remove_with_mutate_root(raises=False) | ||||||
|  | 
 | ||||||
|  |     def do_test_remove_with_mutate_root(self, *, raises): | ||||||
|  |         E = ET.Element | ||||||
|  | 
 | ||||||
|  |         class Z(E): | ||||||
|  |             def __eq__(self, o): | ||||||
|  |                 del root[0] | ||||||
|  |                 return not raises | ||||||
|  | 
 | ||||||
|  |         if raises: | ||||||
|  |             get_checker_context = lambda: self.assertRaises(ValueError) | ||||||
|  |         else: | ||||||
|  |             get_checker_context = nullcontext | ||||||
|  | 
 | ||||||
|  |         # test removing R() from [U(), V()] | ||||||
|  |         cases = self.cases_for_remove_missing_with_mutations(E, Z) | ||||||
|  |         for R, U, V, description in cases: | ||||||
|  |             with self.subTest(description): | ||||||
|  |                 root = E('top') | ||||||
|  |                 root.extend([U('one'), V('two')]) | ||||||
|  |                 with get_checker_context(): | ||||||
|  |                     root.remove(R('missing')) | ||||||
|  | 
 | ||||||
|  |         # test removing root[1] (of type R) from [U(), R()] | ||||||
|  |         cases = self.cases_for_remove_existing_with_mutations(E, Z) | ||||||
|  |         for R, U, description in cases: | ||||||
|  |             with self.subTest(description): | ||||||
|  |                 root = E('top') | ||||||
|  |                 root.extend([U('one'), R('rem')]) | ||||||
|  |                 with get_checker_context(): | ||||||
|  |                     root.remove(root[1]) | ||||||
|  | 
 | ||||||
|  |     def cases_for_remove_missing_with_mutations(self, E, Z): | ||||||
|  |         # Cases for removing R() from [U(), V()]. | ||||||
|  |         # The case U = V = R = E is not interesting as there is no mutation. | ||||||
|  |         for U, V in [(E, Z), (Z, E), (Z, Z)]: | ||||||
|  |             description = (f"remove missing {E.__name__}() from " | ||||||
|  |                            f"[{U.__name__}(), {V.__name__}()]") | ||||||
|  |             yield E, U, V, description | ||||||
|  | 
 | ||||||
|  |         for U, V in [(E, E), (E, Z), (Z, E), (Z, Z)]: | ||||||
|  |             description = (f"remove missing {Z.__name__}() from " | ||||||
|  |                            f"[{U.__name__}(), {V.__name__}()]") | ||||||
|  |             yield Z, U, V, description | ||||||
|  | 
 | ||||||
|  |     def cases_for_remove_existing_with_mutations(self, E, Z): | ||||||
|  |         # Cases for removing root[1] (of type R) from [U(), R()]. | ||||||
|  |         # The case U = R = E is not interesting as there is no mutation. | ||||||
|  |         for U, R, description in [ | ||||||
|  |             (E, Z, "remove root[1] from [E(), Z()]"), | ||||||
|  |             (Z, E, "remove root[1] from [Z(), E()]"), | ||||||
|  |             (Z, Z, "remove root[1] from [Z(), Z()]"), | ||||||
|  |         ]: | ||||||
|  |             description = (f"remove root[1] (of type {R.__name__}) " | ||||||
|  |                            f"from [{U.__name__}(), {R.__name__}()]") | ||||||
|  |             yield R, U, description | ||||||
| 
 | 
 | ||||||
|     @support.infinite_recursion(25) |     @support.infinite_recursion(25) | ||||||
|     def test_recursive_repr(self): |     def test_recursive_repr(self): | ||||||
|  |  | ||||||
|  | @ -0,0 +1,3 @@ | ||||||
|  | :mod:`xml.etree.ElementTree`: Fix a crash in :meth:`Element.remove | ||||||
|  | <xml.etree.ElementTree.Element.remove>` when the element is | ||||||
|  | concurrently mutated. Patch by Bénédikt Tran. | ||||||
|  | @ -850,6 +850,7 @@ _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo) | ||||||
|         if (element_resize(element, self->extra->length) < 0) |         if (element_resize(element, self->extra->length) < 0) | ||||||
|             goto error; |             goto error; | ||||||
| 
 | 
 | ||||||
|  |         // TODO(picnixz): check for an evil child's __deepcopy__ on 'self'
 | ||||||
|         for (i = 0; i < self->extra->length; i++) { |         for (i = 0; i < self->extra->length; i++) { | ||||||
|             PyObject* child = deepcopy(st, self->extra->children[i], memo); |             PyObject* child = deepcopy(st, self->extra->children[i], memo); | ||||||
|             if (!child || !Element_Check(st, child)) { |             if (!child || !Element_Check(st, child)) { | ||||||
|  | @ -1629,42 +1630,47 @@ _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement) | ||||||
| /*[clinic end generated code: output=38fe6c07d6d87d1f input=6133e1d05597d5ee]*/ | /*[clinic end generated code: output=38fe6c07d6d87d1f input=6133e1d05597d5ee]*/ | ||||||
| { | { | ||||||
|     Py_ssize_t i; |     Py_ssize_t i; | ||||||
|     int rc; |     // When iterating over the list of children, we need to check that the
 | ||||||
|     PyObject *found; |     // list is not cleared (self->extra != NULL) and that we are still within
 | ||||||
| 
 |     // the correct bounds (i < self->extra->length).
 | ||||||
|     if (!self->extra) { |     //
 | ||||||
|         /* element has no children, so raise exception */ |     // We deliberately avoid protecting against children lists that grow
 | ||||||
|         PyErr_SetString( |     // faster than the index since list objects do not protect against it.
 | ||||||
|             PyExc_ValueError, |     int rc = 0; | ||||||
|             "list.remove(x): x not in list" |     for (i = 0; self->extra && i < self->extra->length; i++) { | ||||||
|             ); |         if (self->extra->children[i] == subelement) { | ||||||
|         return NULL; |             rc = 1; | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     for (i = 0; i < self->extra->length; i++) { |  | ||||||
|         if (self->extra->children[i] == subelement) |  | ||||||
|             break; |             break; | ||||||
|         rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ); |         } | ||||||
|         if (rc > 0) |         PyObject *child = Py_NewRef(self->extra->children[i]); | ||||||
|             break; |         rc = PyObject_RichCompareBool(child, subelement, Py_EQ); | ||||||
|         if (rc < 0) |         Py_DECREF(child); | ||||||
|  |         if (rc < 0) { | ||||||
|             return NULL; |             return NULL; | ||||||
|  |         } | ||||||
|  |         else if (rc > 0) { | ||||||
|  |             break; | ||||||
|  |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     if (i >= self->extra->length) { |     if (rc == 0) { | ||||||
|         /* subelement is not in children, so raise exception */ |         PyErr_SetString(PyExc_ValueError, "list.remove(x): x not in list"); | ||||||
|         PyErr_SetString( |  | ||||||
|             PyExc_ValueError, |  | ||||||
|             "list.remove(x): x not in list" |  | ||||||
|             ); |  | ||||||
|         return NULL; |         return NULL; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     found = self->extra->children[i]; |     // An extra check must be done if the mutation occurs at the very last
 | ||||||
|  |     // step and removes or clears the 'extra' list (the condition on the
 | ||||||
|  |     // length would not be satisfied any more).
 | ||||||
|  |     if (self->extra == NULL || i >= self->extra->length) { | ||||||
|  |         Py_RETURN_NONE; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     PyObject *found = self->extra->children[i]; | ||||||
| 
 | 
 | ||||||
|     self->extra->length--; |     self->extra->length--; | ||||||
|     for (; i < self->extra->length; i++) |     for (; i < self->extra->length; i++) { | ||||||
|         self->extra->children[i] = self->extra->children[i+1]; |         self->extra->children[i] = self->extra->children[i+1]; | ||||||
|  |     } | ||||||
| 
 | 
 | ||||||
|     Py_DECREF(found); |     Py_DECREF(found); | ||||||
|     Py_RETURN_NONE; |     Py_RETURN_NONE; | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Bénédikt Tran
						Bénédikt Tran