gh-142831: Fix use-after-free in json encoder during re-entrant mutation (gh-142851)

Hold strong references to borrowed items unconditionally (not only in                                                                        
free-threading builds) in _encoder_iterate_mapping_lock_held and                                                                             
_encoder_iterate_fast_seq_lock_held.  User callbacks invoked during                                                                          
encoding can mutate or clear the underlying container, invalidating                                                                          
borrowed references.                                                                                                                         
                                                                                                                                               
The dict iteration path was already fixed by gh-145244.

Co-authored-by: Kumar Aditya <kumaraditya@python.org>
Co-authored-by: Gregory P. Smith <greg@krypto.org>
This commit is contained in:
Shamil 2026-04-12 03:14:50 +03:00 committed by GitHub
parent d761f539bd
commit 235fa7244a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 69 additions and 22 deletions

View file

@ -1,4 +1,5 @@
from test.test_json import CTest
from test.support import gc_collect
class BadBool:
@ -111,3 +112,63 @@ def test_current_indent_level(self):
self.assertEqual(enc(['spam', {'ham': 'eggs'}], 3)[0], expected2)
self.assertRaises(TypeError, enc, ['spam', {'ham': 'eggs'}], 3.0)
self.assertRaises(TypeError, enc, ['spam', {'ham': 'eggs'}])
def test_mutate_dict_items_during_encode(self):
# gh-142831: Clearing the items list via a re-entrant key encoder
# must not cause a use-after-free. BadDict.items() returns a
# mutable list; encode_str clears it while iterating.
items = None
class BadDict(dict):
def items(self):
nonlocal items
items = [("boom", object())]
return items
cleared = False
def encode_str(obj):
nonlocal items, cleared
if items is not None:
items.clear()
items = None
cleared = True
gc_collect()
return '"x"'
encoder = self.json.encoder.c_make_encoder(
None, lambda o: "null",
encode_str, None,
": ", ", ", False,
False, True
)
# Must not crash (use-after-free under ASan before fix)
encoder(BadDict(real=1), 0)
self.assertTrue(cleared)
def test_mutate_list_during_encode(self):
# gh-142831: Clearing a list mid-iteration via the default
# callback must not cause a use-after-free.
call_count = 0
lst = [object() for _ in range(10)]
def default(obj):
nonlocal call_count
call_count += 1
if call_count == 3:
lst.clear()
gc_collect()
return None
encoder = self.json.encoder.c_make_encoder(
None, default,
self.json.encoder.c_encode_basestring, None,
": ", ", ", False,
False, True
)
# Must not crash (use-after-free under ASan before fix)
encoder(lst, 0)
# Verify the mutation path was actually hit and the loop
# stopped iterating after the list was cleared.
self.assertEqual(call_count, 3)

View file

@ -0,0 +1,2 @@
Fix a crash in the :mod:`json` module where a use-after-free could occur if
the object being encoded is modified during serialization.

View file

@ -1745,15 +1745,12 @@ _encoder_iterate_mapping_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer,
PyObject *key, *value;
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(items); i++) {
PyObject *item = PyList_GET_ITEM(items, i);
#ifdef Py_GIL_DISABLED
// gh-119438: in the free-threading build the critical section on items can get suspended
// gh-142831: encoder_encode_key_value() can invoke user code
// that mutates the items list, invalidating this borrowed ref.
Py_INCREF(item);
#endif
if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
#ifdef Py_GIL_DISABLED
Py_DECREF(item);
#endif
return -1;
}
@ -1762,14 +1759,10 @@ _encoder_iterate_mapping_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer,
if (encoder_encode_key_value(s, writer, first, dct, key, value,
indent_level, indent_cache,
separator) < 0) {
#ifdef Py_GIL_DISABLED
Py_DECREF(item);
#endif
return -1;
}
#ifdef Py_GIL_DISABLED
Py_DECREF(item);
#endif
}
return 0;
@ -1784,10 +1777,8 @@ _encoder_iterate_dict_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer,
PyObject *key, *value;
Py_ssize_t pos = 0;
while (PyDict_Next(dct, &pos, &key, &value)) {
// gh-119438, gh-145244: key and value are borrowed refs from
// PyDict_Next(). encoder_encode_key_value() may invoke user
// Python code (the 'default' callback) that can mutate or
// clear the dict, so we must hold strong references.
// gh-145244: encoder_encode_key_value() can invoke user code
// that mutates the dict, invalidating these borrowed refs.
Py_INCREF(key);
Py_INCREF(value);
if (encoder_encode_key_value(s, writer, first, dct, key, value,
@ -1902,28 +1893,21 @@ _encoder_iterate_fast_seq_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer,
{
for (Py_ssize_t i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
#ifdef Py_GIL_DISABLED
// gh-119438: in the free-threading build the critical section on s_fast can get suspended
// gh-142831: encoder_listencode_obj() can invoke user code
// that mutates the sequence, invalidating this borrowed ref.
Py_INCREF(obj);
#endif
if (i) {
if (PyUnicodeWriter_WriteStr(writer, separator) < 0) {
#ifdef Py_GIL_DISABLED
Py_DECREF(obj);
#endif
return -1;
}
}
if (encoder_listencode_obj(s, writer, obj, indent_level, indent_cache)) {
_PyErr_FormatNote("when serializing %T item %zd", seq, i);
#ifdef Py_GIL_DISABLED
Py_DECREF(obj);
#endif
return -1;
}
#ifdef Py_GIL_DISABLED
Py_DECREF(obj);
#endif
}
return 0;
}