GH-140638: Add a GC "candidates" stat (GH-141814)

This commit is contained in:
Brandt Bucher 2025-11-22 13:59:14 -08:00 committed by GitHub
parent 425fd85ca3
commit 227b9d326e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 50 additions and 20 deletions

View file

@ -483,11 +483,12 @@ validate_consistent_old_space(PyGC_Head *head)
/* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 and
* PREV_MASK_COLLECTING bit is set for all objects in containers.
*/
static void
static Py_ssize_t
update_refs(PyGC_Head *containers)
{
PyGC_Head *next;
PyGC_Head *gc = GC_NEXT(containers);
Py_ssize_t candidates = 0;
while (gc != containers) {
next = GC_NEXT(gc);
@ -519,7 +520,9 @@ update_refs(PyGC_Head *containers)
*/
_PyObject_ASSERT(op, gc_get_refs(gc) != 0);
gc = next;
candidates++;
}
return candidates;
}
/* A traversal callback for subtract_refs. */
@ -1240,7 +1243,7 @@ flag set but it does not clear it to skip unnecessary iteration. Before the
flag is cleared (for example, by using 'clear_unreachable_mask' function or
by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal
list and we can not use most gc_list_* functions for it. */
static inline void
static inline Py_ssize_t
deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
validate_list(base, collecting_clear_unreachable_clear);
/* Using ob_refcnt and gc_refs, calculate which objects in the
@ -1248,7 +1251,7 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
* refcount greater than 0 when all the references within the
* set are taken into account).
*/
update_refs(base); // gc_prev is used for gc_refs
Py_ssize_t candidates = update_refs(base); // gc_prev is used for gc_refs
subtract_refs(base);
/* Leave everything reachable from outside base in base, and move
@ -1289,6 +1292,7 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
move_unreachable(base, unreachable); // gc_prev is pointer again
validate_list(base, collecting_clear_unreachable_clear);
validate_list(unreachable, collecting_set_unreachable_set);
return candidates;
}
/* Handle objects that may have resurrected after a call to 'finalize_garbage', moving
@ -1366,6 +1370,7 @@ add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats)
gcstate->generation_stats[gen].duration += stats->duration;
gcstate->generation_stats[gen].collected += stats->collected;
gcstate->generation_stats[gen].uncollectable += stats->uncollectable;
gcstate->generation_stats[gen].candidates += stats->candidates;
gcstate->generation_stats[gen].collections += 1;
}
@ -1662,6 +1667,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
Py_ssize_t objects_marked = mark_at_start(tstate);
GC_STAT_ADD(1, objects_transitively_reachable, objects_marked);
gcstate->work_to_do -= objects_marked;
stats->candidates += objects_marked;
validate_spaces(gcstate);
return;
}
@ -1754,7 +1760,7 @@ gc_collect_region(PyThreadState *tstate,
assert(!_PyErr_Occurred(tstate));
gc_list_init(&unreachable);
deduce_unreachable(from, &unreachable);
stats->candidates = deduce_unreachable(from, &unreachable);
validate_consistent_old_space(from);
untrack_tuples(from);
@ -1844,10 +1850,11 @@ do_gc_callback(GCState *gcstate, const char *phase,
assert(PyList_CheckExact(gcstate->callbacks));
PyObject *info = NULL;
if (PyList_GET_SIZE(gcstate->callbacks) != 0) {
info = Py_BuildValue("{sisnsnsd}",
info = Py_BuildValue("{sisnsnsnsd}",
"generation", generation,
"collected", stats->collected,
"uncollectable", stats->uncollectable,
"candidates", stats->candidates,
"duration", stats->duration);
if (info == NULL) {
PyErr_FormatUnraisable("Exception ignored while invoking gc callbacks");

View file

@ -100,6 +100,7 @@ struct collection_state {
int skip_deferred_objects;
Py_ssize_t collected;
Py_ssize_t uncollectable;
Py_ssize_t candidates;
Py_ssize_t long_lived_total;
struct worklist unreachable;
struct worklist legacy_finalizers;
@ -975,15 +976,12 @@ static bool
update_refs(const mi_heap_t *heap, const mi_heap_area_t *area,
void *block, size_t block_size, void *args)
{
struct collection_state *state = (struct collection_state *)args;
PyObject *op = op_from_block(block, args, false);
if (op == NULL) {
return true;
}
if (gc_is_alive(op)) {
return true;
}
// Exclude immortal objects from garbage collection
if (_Py_IsImmortal(op)) {
op->ob_tid = 0;
@ -991,6 +989,11 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t *area,
gc_clear_unreachable(op);
return true;
}
// Marked objects count as candidates, immortals don't:
state->candidates++;
if (gc_is_alive(op)) {
return true;
}
Py_ssize_t refcount = Py_REFCNT(op);
if (_PyObject_HasDeferredRefcount(op)) {
@ -1911,7 +1914,8 @@ handle_resurrected_objects(struct collection_state *state)
static void
invoke_gc_callback(PyThreadState *tstate, const char *phase,
int generation, Py_ssize_t collected,
Py_ssize_t uncollectable, double duration)
Py_ssize_t uncollectable, Py_ssize_t candidates,
double duration)
{
assert(!_PyErr_Occurred(tstate));
@ -1925,10 +1929,11 @@ invoke_gc_callback(PyThreadState *tstate, const char *phase,
assert(PyList_CheckExact(gcstate->callbacks));
PyObject *info = NULL;
if (PyList_GET_SIZE(gcstate->callbacks) != 0) {
info = Py_BuildValue("{sisnsnsd}",
info = Py_BuildValue("{sisnsnsnsd}",
"generation", generation,
"collected", collected,
"uncollectable", uncollectable,
"candidates", candidates,
"duration", duration);
if (info == NULL) {
PyErr_FormatUnraisable("Exception ignored while "
@ -2372,7 +2377,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
GC_STAT_ADD(generation, collections, 1);
if (reason != _Py_GC_REASON_SHUTDOWN) {
invoke_gc_callback(tstate, "start", generation, 0, 0, 0);
invoke_gc_callback(tstate, "start", generation, 0, 0, 0, 0.0);
}
if (gcstate->debug & _PyGC_DEBUG_STATS) {
@ -2427,6 +2432,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
stats->collected += m;
stats->uncollectable += n;
stats->duration += duration;
stats->candidates += state.candidates;
GC_STAT_ADD(generation, objects_collected, m);
#ifdef Py_STATS
@ -2445,7 +2451,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
}
if (reason != _Py_GC_REASON_SHUTDOWN) {
invoke_gc_callback(tstate, "stop", generation, m, n, duration);
invoke_gc_callback(tstate, "stop", generation, m, n, state.candidates, duration);
}
assert(!_PyErr_Occurred(tstate));