mirror of
https://github.com/python/cpython.git
synced 2026-04-20 02:40:59 +00:00
gh-142831: Fix use-after-free in json encoder during re-entrant mutation (gh-142851)
Hold strong references to borrowed items unconditionally (not only in
free-threading builds) in _encoder_iterate_mapping_lock_held and
_encoder_iterate_fast_seq_lock_held. User callbacks invoked during
encoding can mutate or clear the underlying container, invalidating
borrowed references.
The dict iteration path was already fixed by gh-145244.
Co-authored-by: Kumar Aditya <kumaraditya@python.org>
Co-authored-by: Gregory P. Smith <greg@krypto.org>
This commit is contained in:
parent
d761f539bd
commit
235fa7244a
3 changed files with 69 additions and 22 deletions
|
|
@ -1,4 +1,5 @@
|
|||
from test.test_json import CTest
|
||||
from test.support import gc_collect
|
||||
|
||||
|
||||
class BadBool:
|
||||
|
|
@ -111,3 +112,63 @@ def test_current_indent_level(self):
|
|||
self.assertEqual(enc(['spam', {'ham': 'eggs'}], 3)[0], expected2)
|
||||
self.assertRaises(TypeError, enc, ['spam', {'ham': 'eggs'}], 3.0)
|
||||
self.assertRaises(TypeError, enc, ['spam', {'ham': 'eggs'}])
|
||||
|
||||
def test_mutate_dict_items_during_encode(self):
|
||||
# gh-142831: Clearing the items list via a re-entrant key encoder
|
||||
# must not cause a use-after-free. BadDict.items() returns a
|
||||
# mutable list; encode_str clears it while iterating.
|
||||
items = None
|
||||
|
||||
class BadDict(dict):
|
||||
def items(self):
|
||||
nonlocal items
|
||||
items = [("boom", object())]
|
||||
return items
|
||||
|
||||
cleared = False
|
||||
def encode_str(obj):
|
||||
nonlocal items, cleared
|
||||
if items is not None:
|
||||
items.clear()
|
||||
items = None
|
||||
cleared = True
|
||||
gc_collect()
|
||||
return '"x"'
|
||||
|
||||
encoder = self.json.encoder.c_make_encoder(
|
||||
None, lambda o: "null",
|
||||
encode_str, None,
|
||||
": ", ", ", False,
|
||||
False, True
|
||||
)
|
||||
|
||||
# Must not crash (use-after-free under ASan before fix)
|
||||
encoder(BadDict(real=1), 0)
|
||||
self.assertTrue(cleared)
|
||||
|
||||
def test_mutate_list_during_encode(self):
|
||||
# gh-142831: Clearing a list mid-iteration via the default
|
||||
# callback must not cause a use-after-free.
|
||||
call_count = 0
|
||||
lst = [object() for _ in range(10)]
|
||||
|
||||
def default(obj):
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
if call_count == 3:
|
||||
lst.clear()
|
||||
gc_collect()
|
||||
return None
|
||||
|
||||
encoder = self.json.encoder.c_make_encoder(
|
||||
None, default,
|
||||
self.json.encoder.c_encode_basestring, None,
|
||||
": ", ", ", False,
|
||||
False, True
|
||||
)
|
||||
|
||||
# Must not crash (use-after-free under ASan before fix)
|
||||
encoder(lst, 0)
|
||||
# Verify the mutation path was actually hit and the loop
|
||||
# stopped iterating after the list was cleared.
|
||||
self.assertEqual(call_count, 3)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,2 @@
|
|||
Fix a crash in the :mod:`json` module where a use-after-free could occur if
|
||||
the object being encoded is modified during serialization.
|
||||
|
|
@ -1745,15 +1745,12 @@ _encoder_iterate_mapping_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer,
|
|||
PyObject *key, *value;
|
||||
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(items); i++) {
|
||||
PyObject *item = PyList_GET_ITEM(items, i);
|
||||
#ifdef Py_GIL_DISABLED
|
||||
// gh-119438: in the free-threading build the critical section on items can get suspended
|
||||
// gh-142831: encoder_encode_key_value() can invoke user code
|
||||
// that mutates the items list, invalidating this borrowed ref.
|
||||
Py_INCREF(item);
|
||||
#endif
|
||||
if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
|
||||
PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
|
||||
#ifdef Py_GIL_DISABLED
|
||||
Py_DECREF(item);
|
||||
#endif
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
|
@ -1762,14 +1759,10 @@ _encoder_iterate_mapping_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer,
|
|||
if (encoder_encode_key_value(s, writer, first, dct, key, value,
|
||||
indent_level, indent_cache,
|
||||
separator) < 0) {
|
||||
#ifdef Py_GIL_DISABLED
|
||||
Py_DECREF(item);
|
||||
#endif
|
||||
return -1;
|
||||
}
|
||||
#ifdef Py_GIL_DISABLED
|
||||
Py_DECREF(item);
|
||||
#endif
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
@ -1784,10 +1777,8 @@ _encoder_iterate_dict_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer,
|
|||
PyObject *key, *value;
|
||||
Py_ssize_t pos = 0;
|
||||
while (PyDict_Next(dct, &pos, &key, &value)) {
|
||||
// gh-119438, gh-145244: key and value are borrowed refs from
|
||||
// PyDict_Next(). encoder_encode_key_value() may invoke user
|
||||
// Python code (the 'default' callback) that can mutate or
|
||||
// clear the dict, so we must hold strong references.
|
||||
// gh-145244: encoder_encode_key_value() can invoke user code
|
||||
// that mutates the dict, invalidating these borrowed refs.
|
||||
Py_INCREF(key);
|
||||
Py_INCREF(value);
|
||||
if (encoder_encode_key_value(s, writer, first, dct, key, value,
|
||||
|
|
@ -1902,28 +1893,21 @@ _encoder_iterate_fast_seq_lock_held(PyEncoderObject *s, PyUnicodeWriter *writer,
|
|||
{
|
||||
for (Py_ssize_t i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
|
||||
PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
|
||||
#ifdef Py_GIL_DISABLED
|
||||
// gh-119438: in the free-threading build the critical section on s_fast can get suspended
|
||||
// gh-142831: encoder_listencode_obj() can invoke user code
|
||||
// that mutates the sequence, invalidating this borrowed ref.
|
||||
Py_INCREF(obj);
|
||||
#endif
|
||||
if (i) {
|
||||
if (PyUnicodeWriter_WriteStr(writer, separator) < 0) {
|
||||
#ifdef Py_GIL_DISABLED
|
||||
Py_DECREF(obj);
|
||||
#endif
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
if (encoder_listencode_obj(s, writer, obj, indent_level, indent_cache)) {
|
||||
_PyErr_FormatNote("when serializing %T item %zd", seq, i);
|
||||
#ifdef Py_GIL_DISABLED
|
||||
Py_DECREF(obj);
|
||||
#endif
|
||||
return -1;
|
||||
}
|
||||
#ifdef Py_GIL_DISABLED
|
||||
Py_DECREF(obj);
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue