mirror of
https://github.com/python/cpython.git
synced 2025-10-20 08:23:47 +00:00
GH-135552: Make the GC clear weakrefs later (GH-136189)
Fix a bug caused by the garbage collector clearing weakrefs too early. The weakrefs in the ``tp_subclasses`` dictionary are needed in order to correctly invalidate type caches (for example, by calling ``PyType_Modified()``). Clearing weakrefs before calling finalizers causes the caches to not be correctly invalidated. That can cause crashes since the caches can refer to invalid objects. Defer the clearing of weakrefs without callbacks until after finalizers are executed.
This commit is contained in:
parent
deb385a143
commit
350c58ba4e
9 changed files with 317 additions and 140 deletions
|
@ -1492,22 +1492,49 @@ move_legacy_finalizer_reachable(struct collection_state *state)
|
|||
return 0;
|
||||
}
|
||||
|
||||
// Clear all weakrefs to unreachable objects. Weakrefs with callbacks are
|
||||
// optionally enqueued in `wrcb_to_call`, but not invoked yet.
|
||||
// Weakrefs with callbacks are enqueued in `wrcb_to_call`, but not invoked
|
||||
// yet. Note that it's possible for such weakrefs to be outside the
|
||||
// unreachable set -- indeed, those are precisely the weakrefs whose callbacks
|
||||
// must be invoked. See gc_weakref.txt for overview & some details.
|
||||
//
|
||||
// The clearing of weakrefs is suble and must be done carefully, as there was
|
||||
// previous bugs related to this. First, weakrefs to the unreachable set of
|
||||
// objects must be cleared before we start calling `tp_clear`. If we don't,
|
||||
// those weakrefs can reveal unreachable objects to Python-level code and that
|
||||
// is not safe. Many objects are not usable after `tp_clear` has been called
|
||||
// and could even cause crashes if accessed (see bpo-38006 and gh-91636 as
|
||||
// example bugs).
|
||||
//
|
||||
// Weakrefs with callbacks always need to be cleared before executing the
|
||||
// callback. That's because sometimes a callback will call the ref object,
|
||||
// to check if the reference is actually dead (KeyedRef does this, for
|
||||
// example). We want to indicate that it is dead, even though it is possible
|
||||
// a finalizer might resurrect it. Clearing also prevents the callback from
|
||||
// executing more than once.
|
||||
//
|
||||
// Since Python 2.3, all weakrefs to cyclic garbage have been cleared *before*
|
||||
// calling finalizers. However, since tp_subclasses started being necessary
|
||||
// to invalidate caches (e.g. by PyType_Modified), that clearing has created
|
||||
// a bug. If the weakref to the subclass is cleared before a finalizer is
|
||||
// called, the cache may not be correctly invalidated. That can lead to
|
||||
// segfaults since the caches can refer to deallocated objects (GH-91636
|
||||
// is an example). Now, we delay the clear of weakrefs without callbacks
|
||||
// until *after* finalizers have been executed. That means weakrefs without
|
||||
// callbacks are still usable while finalizers are being executed.
|
||||
//
|
||||
// The weakrefs that are inside the unreachable set must also be cleared.
|
||||
// The reason this is required is not immediately obvious. If the weakref
|
||||
// refers to an object outside of the unreachable set, that object might go
|
||||
// away when we start clearing objects. Normally, the object should also be
|
||||
// part of the unreachable set but that's not true in the case of incomplete
|
||||
// or missing `tp_traverse` methods. When that object goes away, the callback
|
||||
// for weakref can be executed and that could reveal unreachable objects to
|
||||
// Python-level code. See bpo-38006 as an example bug.
|
||||
static void
|
||||
clear_weakrefs(struct collection_state *state, bool enqueue_callbacks)
|
||||
find_weakref_callbacks(struct collection_state *state)
|
||||
{
|
||||
PyObject *op;
|
||||
WORKSTACK_FOR_EACH(&state->unreachable, op) {
|
||||
if (PyWeakref_Check(op)) {
|
||||
// Clear weakrefs that are themselves unreachable to ensure their
|
||||
// callbacks will not be executed later from a `tp_clear()`
|
||||
// inside delete_garbage(). That would be unsafe: it could
|
||||
// resurrect a dead object or access a an already cleared object.
|
||||
// See bpo-38006 for one example.
|
||||
_PyWeakref_ClearRef((PyWeakReference *)op);
|
||||
}
|
||||
|
||||
if (!_PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) {
|
||||
continue;
|
||||
}
|
||||
|
@ -1519,16 +1546,21 @@ clear_weakrefs(struct collection_state *state, bool enqueue_callbacks)
|
|||
// `op` may have some weakrefs. March over the list, clear
|
||||
// all the weakrefs, and enqueue the weakrefs with callbacks
|
||||
// that must be called into wrcb_to_call.
|
||||
for (PyWeakReference *wr = *wrlist; wr != NULL; wr = *wrlist) {
|
||||
// _PyWeakref_ClearRef clears the weakref but leaves
|
||||
// the callback pointer intact. Obscure: it also
|
||||
// changes *wrlist.
|
||||
_PyObject_ASSERT((PyObject *)wr, wr->wr_object == op);
|
||||
_PyWeakref_ClearRef(wr);
|
||||
_PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None);
|
||||
PyWeakReference *next_wr;
|
||||
for (PyWeakReference *wr = *wrlist; wr != NULL; wr = next_wr) {
|
||||
// Get the next list element to get iterator progress if we omit
|
||||
// clearing of the weakref (because _PyWeakref_ClearRef changes
|
||||
// next pointer in the wrlist).
|
||||
next_wr = wr->wr_next;
|
||||
|
||||
if (!enqueue_callbacks) {
|
||||
continue;
|
||||
// Weakrefs with callbacks always need to be cleared before
|
||||
// executing the callback.
|
||||
if (wr->wr_callback != NULL) {
|
||||
// _PyWeakref_ClearRef clears the weakref but leaves the
|
||||
// callback pointer intact. Obscure: it also changes *wrlist.
|
||||
_PyObject_ASSERT((PyObject *)wr, wr->wr_object == op);
|
||||
_PyWeakref_ClearRef(wr);
|
||||
_PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None);
|
||||
}
|
||||
|
||||
// We do not invoke callbacks for weakrefs that are themselves
|
||||
|
@ -1550,6 +1582,39 @@ clear_weakrefs(struct collection_state *state, bool enqueue_callbacks)
|
|||
}
|
||||
}
|
||||
|
||||
// Clear weakrefs to objects in the unreachable set. See comments
|
||||
// above find_weakref_callbacks() for why this clearing is required.
|
||||
static void
|
||||
clear_weakrefs(struct collection_state *state)
|
||||
{
|
||||
PyObject *op;
|
||||
WORKSTACK_FOR_EACH(&state->unreachable, op) {
|
||||
if (PyWeakref_Check(op)) {
|
||||
// Clear weakrefs that are themselves unreachable.
|
||||
_PyWeakref_ClearRef((PyWeakReference *)op);
|
||||
}
|
||||
|
||||
if (!_PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// NOTE: This is never triggered for static types so we can avoid the
|
||||
// (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR().
|
||||
PyWeakReference **wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op);
|
||||
|
||||
// `op` may have some weakrefs. March over the list, clear
|
||||
// all the weakrefs.
|
||||
for (PyWeakReference *wr = *wrlist; wr != NULL; wr = *wrlist) {
|
||||
// _PyWeakref_ClearRef clears the weakref but leaves
|
||||
// the callback pointer intact. Obscure: it also
|
||||
// changes *wrlist.
|
||||
_PyObject_ASSERT((PyObject *)wr, wr->wr_object == op);
|
||||
_PyWeakref_ClearRef(wr);
|
||||
_PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
call_weakref_callbacks(struct collection_state *state)
|
||||
{
|
||||
|
@ -2222,8 +2287,8 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
|
|||
// Record the number of live GC objects
|
||||
interp->gc.long_lived_total = state->long_lived_total;
|
||||
|
||||
// Clear weakrefs and enqueue callbacks (but do not call them).
|
||||
clear_weakrefs(state, true);
|
||||
// Find weakref callbacks we will honor (but do not call them).
|
||||
find_weakref_callbacks(state);
|
||||
_PyEval_StartTheWorld(interp);
|
||||
|
||||
// Deallocate any object from the refcount merge step
|
||||
|
@ -2240,12 +2305,7 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
|
|||
// Clear free lists in all threads
|
||||
_PyGC_ClearAllFreeLists(interp);
|
||||
if (err == 0) {
|
||||
// Clear weakrefs to objects in the unreachable set. No Python-level
|
||||
// code must be allowed to access those unreachable objects. During
|
||||
// delete_garbage(), finalizers outside the unreachable set might
|
||||
// run and create new weakrefs. If those weakrefs were not cleared,
|
||||
// they could reveal unreachable objects.
|
||||
clear_weakrefs(state, false);
|
||||
clear_weakrefs(state);
|
||||
}
|
||||
_PyEval_StartTheWorld(interp);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue