gh-110481: Implement inter-thread queue for biased reference counting (#114824)

Biased reference counting maintains two refcount fields in each object:
`ob_ref_local` and `ob_ref_shared`. The true refcount is the sum of these two
fields. In some cases, when refcounting operations are split across threads,
the ob_ref_shared field can be negative (although the total refcount must be
at least zero). In this case, the thread that decremented the refcount
requests that the owning thread give up ownership and merge the refcount
fields.
This commit is contained in:
Sam Gross 2024-02-09 17:08:32 -05:00 committed by GitHub
parent a225520af9
commit a3af3cb4f4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 418 additions and 11 deletions

View file

@ -1,5 +1,6 @@
// Cyclic garbage collector implementation for free-threaded build.
#include "Python.h"
#include "pycore_brc.h" // struct _brc_thread_state
#include "pycore_ceval.h" // _Py_set_eval_breaker_bit()
#include "pycore_context.h"
#include "pycore_dict.h" // _PyDict_MaybeUntrack()
@ -152,8 +153,7 @@ gc_decref(PyObject *op)
op->ob_tid -= 1;
}
// Merge refcounts while the world is stopped.
static void
static Py_ssize_t
merge_refcount(PyObject *op, Py_ssize_t extra)
{
assert(_PyInterpreterState_GET()->stoptheworld.world_stopped);
@ -169,6 +169,7 @@ merge_refcount(PyObject *op, Py_ssize_t extra)
op->ob_tid = 0;
op->ob_ref_local = 0;
op->ob_ref_shared = _Py_REF_SHARED(refcount, _Py_REF_MERGED);
return refcount;
}
static void
@ -282,6 +283,41 @@ gc_visit_heaps(PyInterpreterState *interp, mi_block_visit_fun *visitor,
return err;
}
static void
merge_queued_objects(_PyThreadStateImpl *tstate, struct collection_state *state)
{
struct _brc_thread_state *brc = &tstate->brc;
_PyObjectStack_Merge(&brc->local_objects_to_merge, &brc->objects_to_merge);
PyObject *op;
while ((op = _PyObjectStack_Pop(&brc->local_objects_to_merge)) != NULL) {
// Subtract one when merging because the queue had a reference.
Py_ssize_t refcount = merge_refcount(op, -1);
if (!_PyObject_GC_IS_TRACKED(op) && refcount == 0) {
// GC objects with zero refcount are handled subsequently by the
// GC as if they were cyclic trash, but we have to handle dead
// non-GC objects here. Add one to the refcount so that we can
// decref and deallocate the object once we start the world again.
op->ob_ref_shared += (1 << _Py_REF_SHARED_SHIFT);
#ifdef Py_REF_DEBUG
_Py_IncRefTotal(_PyInterpreterState_GET());
#endif
worklist_push(&state->objs_to_decref, op);
}
}
}
static void
merge_all_queued_objects(PyInterpreterState *interp, struct collection_state *state)
{
HEAD_LOCK(&_PyRuntime);
for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
merge_queued_objects((_PyThreadStateImpl *)p, state);
}
HEAD_UNLOCK(&_PyRuntime);
}
// Subtract an incoming reference from the computed "gc_refs" refcount.
static int
visit_decref(PyObject *op, void *arg)
@ -927,6 +963,9 @@ static void
gc_collect_internal(PyInterpreterState *interp, struct collection_state *state)
{
_PyEval_StopTheWorld(interp);
// merge refcounts for all queued objects
merge_all_queued_objects(interp, state);
// Find unreachable objects
int err = deduce_unreachable_heap(interp, state);
if (err < 0) {
@ -946,6 +985,9 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state)
clear_weakrefs(state);
_PyEval_StartTheWorld(interp);
// Deallocate any object from the refcount merge step
cleanup_worklist(&state->objs_to_decref);
// Call weakref callbacks and finalizers after unpausing other threads to
// avoid potential deadlocks.
call_weakref_callbacks(state);