GH-140638: Add a GC "candidates" stat (GH-141814)

This commit is contained in:
Brandt Bucher 2025-11-22 13:59:14 -08:00 committed by GitHub
parent 425fd85ca3
commit 227b9d326e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 50 additions and 20 deletions

View file

@ -110,13 +110,16 @@ The :mod:`gc` module provides the following functions:
to be uncollectable (and were therefore moved to the :data:`garbage`
list) inside this generation;
* ``candidates`` is the total number of objects in this generation which were
considered for collection and traversed;
* ``duration`` is the total time in seconds spent in collections for this
generation.
.. versionadded:: 3.4
.. versionchanged:: next
Add ``duration``.
Add ``duration`` and ``candidates``.
.. function:: set_threshold(threshold0, [threshold1, [threshold2]])
@ -319,6 +322,9 @@ values but should not rebind them):
"uncollectable": When *phase* is "stop", the number of objects
that could not be collected and were put in :data:`garbage`.
"candidates": When *phase* is "stop", the total number of objects in this
generation which were considered for collection and traversed.
"duration": When *phase* is "stop", the time in seconds spent in the
collection.
@ -335,7 +341,7 @@ values but should not rebind them):
.. versionadded:: 3.3
.. versionchanged:: next
Add "duration".
Add "duration" and "candidates".
The following constants are provided for use with :func:`set_debug`:

View file

@ -179,6 +179,8 @@ struct gc_collection_stats {
Py_ssize_t collected;
/* total number of uncollectable objects (put into gc.garbage) */
Py_ssize_t uncollectable;
// Total number of objects considered for collection and traversed:
Py_ssize_t candidates;
// Duration of the collection in seconds:
double duration;
};
@ -191,6 +193,8 @@ struct gc_generation_stats {
Py_ssize_t collected;
/* total number of uncollectable objects (put into gc.garbage) */
Py_ssize_t uncollectable;
// Total number of objects considered for collection and traversed:
Py_ssize_t candidates;
// Duration of the collection in seconds:
double duration;
};

View file

@ -846,11 +846,14 @@ def test_get_stats(self):
self.assertEqual(len(stats), 3)
for st in stats:
self.assertIsInstance(st, dict)
self.assertEqual(set(st),
{"collected", "collections", "uncollectable", "duration"})
self.assertEqual(
set(st),
{"collected", "collections", "uncollectable", "candidates", "duration"}
)
self.assertGreaterEqual(st["collected"], 0)
self.assertGreaterEqual(st["collections"], 0)
self.assertGreaterEqual(st["uncollectable"], 0)
self.assertGreaterEqual(st["candidates"], 0)
self.assertGreaterEqual(st["duration"], 0)
# Check that collection counts are incremented correctly
if gc.isenabled():
@ -865,7 +868,7 @@ def test_get_stats(self):
self.assertGreater(new[0]["duration"], old[0]["duration"])
self.assertEqual(new[1]["duration"], old[1]["duration"])
self.assertEqual(new[2]["duration"], old[2]["duration"])
for stat in ["collected", "uncollectable"]:
for stat in ["collected", "uncollectable", "candidates"]:
self.assertGreaterEqual(new[0][stat], old[0][stat])
self.assertEqual(new[1][stat], old[1][stat])
self.assertEqual(new[2][stat], old[2][stat])
@ -877,7 +880,7 @@ def test_get_stats(self):
self.assertEqual(new[0]["duration"], old[0]["duration"])
self.assertEqual(new[1]["duration"], old[1]["duration"])
self.assertGreater(new[2]["duration"], old[2]["duration"])
for stat in ["collected", "uncollectable"]:
for stat in ["collected", "uncollectable", "candidates"]:
self.assertEqual(new[0][stat], old[0][stat])
self.assertEqual(new[1][stat], old[1][stat])
self.assertGreaterEqual(new[2][stat], old[2][stat])
@ -1316,6 +1319,7 @@ def test_collect(self):
self.assertIn("generation", info)
self.assertIn("collected", info)
self.assertIn("uncollectable", info)
self.assertIn("candidates", info)
self.assertIn("duration", info)
def test_collect_generation(self):

View file

@ -0,0 +1,2 @@
Expose a ``"candidates"`` stat in :func:`gc.get_stats` and
:data:`gc.callbacks`.

View file

@ -358,10 +358,11 @@ gc_get_stats_impl(PyObject *module)
for (i = 0; i < NUM_GENERATIONS; i++) {
PyObject *dict;
st = &stats[i];
dict = Py_BuildValue("{snsnsnsd}",
dict = Py_BuildValue("{snsnsnsnsd}",
"collections", st->collections,
"collected", st->collected,
"uncollectable", st->uncollectable,
"candidates", st->candidates,
"duration", st->duration
);
if (dict == NULL)

View file

@ -483,11 +483,12 @@ validate_consistent_old_space(PyGC_Head *head)
/* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 and
* PREV_MASK_COLLECTING bit is set for all objects in containers.
*/
static void
static Py_ssize_t
update_refs(PyGC_Head *containers)
{
PyGC_Head *next;
PyGC_Head *gc = GC_NEXT(containers);
Py_ssize_t candidates = 0;
while (gc != containers) {
next = GC_NEXT(gc);
@ -519,7 +520,9 @@ update_refs(PyGC_Head *containers)
*/
_PyObject_ASSERT(op, gc_get_refs(gc) != 0);
gc = next;
candidates++;
}
return candidates;
}
/* A traversal callback for subtract_refs. */
@ -1240,7 +1243,7 @@ flag set but it does not clear it to skip unnecessary iteration. Before the
flag is cleared (for example, by using 'clear_unreachable_mask' function or
by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal
list and we can not use most gc_list_* functions for it. */
static inline void
static inline Py_ssize_t
deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
validate_list(base, collecting_clear_unreachable_clear);
/* Using ob_refcnt and gc_refs, calculate which objects in the
@ -1248,7 +1251,7 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
* refcount greater than 0 when all the references within the
* set are taken into account).
*/
update_refs(base); // gc_prev is used for gc_refs
Py_ssize_t candidates = update_refs(base); // gc_prev is used for gc_refs
subtract_refs(base);
/* Leave everything reachable from outside base in base, and move
@ -1289,6 +1292,7 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
move_unreachable(base, unreachable); // gc_prev is pointer again
validate_list(base, collecting_clear_unreachable_clear);
validate_list(unreachable, collecting_set_unreachable_set);
return candidates;
}
/* Handle objects that may have resurrected after a call to 'finalize_garbage', moving
@ -1366,6 +1370,7 @@ add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats)
gcstate->generation_stats[gen].duration += stats->duration;
gcstate->generation_stats[gen].collected += stats->collected;
gcstate->generation_stats[gen].uncollectable += stats->uncollectable;
gcstate->generation_stats[gen].candidates += stats->candidates;
gcstate->generation_stats[gen].collections += 1;
}
@ -1662,6 +1667,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
Py_ssize_t objects_marked = mark_at_start(tstate);
GC_STAT_ADD(1, objects_transitively_reachable, objects_marked);
gcstate->work_to_do -= objects_marked;
stats->candidates += objects_marked;
validate_spaces(gcstate);
return;
}
@ -1754,7 +1760,7 @@ gc_collect_region(PyThreadState *tstate,
assert(!_PyErr_Occurred(tstate));
gc_list_init(&unreachable);
deduce_unreachable(from, &unreachable);
stats->candidates = deduce_unreachable(from, &unreachable);
validate_consistent_old_space(from);
untrack_tuples(from);
@ -1844,10 +1850,11 @@ do_gc_callback(GCState *gcstate, const char *phase,
assert(PyList_CheckExact(gcstate->callbacks));
PyObject *info = NULL;
if (PyList_GET_SIZE(gcstate->callbacks) != 0) {
info = Py_BuildValue("{sisnsnsd}",
info = Py_BuildValue("{sisnsnsnsd}",
"generation", generation,
"collected", stats->collected,
"uncollectable", stats->uncollectable,
"candidates", stats->candidates,
"duration", stats->duration);
if (info == NULL) {
PyErr_FormatUnraisable("Exception ignored while invoking gc callbacks");

View file

@ -100,6 +100,7 @@ struct collection_state {
int skip_deferred_objects;
Py_ssize_t collected;
Py_ssize_t uncollectable;
Py_ssize_t candidates;
Py_ssize_t long_lived_total;
struct worklist unreachable;
struct worklist legacy_finalizers;
@ -975,15 +976,12 @@ static bool
update_refs(const mi_heap_t *heap, const mi_heap_area_t *area,
void *block, size_t block_size, void *args)
{
struct collection_state *state = (struct collection_state *)args;
PyObject *op = op_from_block(block, args, false);
if (op == NULL) {
return true;
}
if (gc_is_alive(op)) {
return true;
}
// Exclude immortal objects from garbage collection
if (_Py_IsImmortal(op)) {
op->ob_tid = 0;
@ -991,6 +989,11 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t *area,
gc_clear_unreachable(op);
return true;
}
// Marked objects count as candidates, immortals don't:
state->candidates++;
if (gc_is_alive(op)) {
return true;
}
Py_ssize_t refcount = Py_REFCNT(op);
if (_PyObject_HasDeferredRefcount(op)) {
@ -1911,7 +1914,8 @@ handle_resurrected_objects(struct collection_state *state)
static void
invoke_gc_callback(PyThreadState *tstate, const char *phase,
int generation, Py_ssize_t collected,
Py_ssize_t uncollectable, double duration)
Py_ssize_t uncollectable, Py_ssize_t candidates,
double duration)
{
assert(!_PyErr_Occurred(tstate));
@ -1925,10 +1929,11 @@ invoke_gc_callback(PyThreadState *tstate, const char *phase,
assert(PyList_CheckExact(gcstate->callbacks));
PyObject *info = NULL;
if (PyList_GET_SIZE(gcstate->callbacks) != 0) {
info = Py_BuildValue("{sisnsnsd}",
info = Py_BuildValue("{sisnsnsnsd}",
"generation", generation,
"collected", collected,
"uncollectable", uncollectable,
"candidates", candidates,
"duration", duration);
if (info == NULL) {
PyErr_FormatUnraisable("Exception ignored while "
@ -2372,7 +2377,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
GC_STAT_ADD(generation, collections, 1);
if (reason != _Py_GC_REASON_SHUTDOWN) {
invoke_gc_callback(tstate, "start", generation, 0, 0, 0);
invoke_gc_callback(tstate, "start", generation, 0, 0, 0, 0.0);
}
if (gcstate->debug & _PyGC_DEBUG_STATS) {
@ -2427,6 +2432,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
stats->collected += m;
stats->uncollectable += n;
stats->duration += duration;
stats->candidates += state.candidates;
GC_STAT_ADD(generation, objects_collected, m);
#ifdef Py_STATS
@ -2445,7 +2451,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
}
if (reason != _Py_GC_REASON_SHUTDOWN) {
invoke_gc_callback(tstate, "stop", generation, m, n, duration);
invoke_gc_callback(tstate, "stop", generation, m, n, state.candidates, duration);
}
assert(!_PyErr_Occurred(tstate));