mirror of
https://github.com/python/cpython.git
synced 2025-10-24 02:13:49 +00:00

* gh-112529: Implement GC for free-threaded builds This implements a mark and sweep GC for the free-threaded builds of CPython. The implementation relies on mimalloc to find GC tracked objects (i.e., "containers").
1723 lines
49 KiB
C
1723 lines
49 KiB
C
// Cyclic garbage collector implementation for free-threaded build.
|
|
#include "Python.h"
|
|
#include "pycore_ceval.h" // _Py_set_eval_breaker_bit()
|
|
#include "pycore_context.h"
|
|
#include "pycore_dict.h" // _PyDict_MaybeUntrack()
|
|
#include "pycore_initconfig.h"
|
|
#include "pycore_interp.h" // PyInterpreterState.gc
|
|
#include "pycore_object.h"
|
|
#include "pycore_object_alloc.h" // _PyObject_MallocWithType()
|
|
#include "pycore_object_stack.h"
|
|
#include "pycore_pyerrors.h"
|
|
#include "pycore_pystate.h" // _PyThreadState_GET()
|
|
#include "pycore_tstate.h" // _PyThreadStateImpl
|
|
#include "pycore_weakref.h" // _PyWeakref_ClearRef()
|
|
#include "pydtrace.h"
|
|
|
|
#ifdef Py_GIL_DISABLED
|
|
|
|
typedef struct _gc_runtime_state GCState;
|
|
|
|
#ifdef Py_DEBUG
|
|
# define GC_DEBUG
|
|
#endif
|
|
|
|
// Automatically choose the generation that needs collecting.
|
|
#define GENERATION_AUTO (-1)
|
|
|
|
// A linked-list of objects using the `ob_tid` field as the next pointer.
|
|
struct worklist {
|
|
uintptr_t head;
|
|
};
|
|
|
|
struct worklist_iter {
|
|
uintptr_t *ptr; // pointer to current object
|
|
uintptr_t *next; // next value of ptr
|
|
};
|
|
|
|
struct visitor_args {
|
|
size_t offset; // offset of PyObject from start of block
|
|
};
|
|
|
|
// Per-collection state
|
|
struct collection_state {
|
|
struct visitor_args base;
|
|
PyInterpreterState *interp;
|
|
GCState *gcstate;
|
|
Py_ssize_t collected;
|
|
Py_ssize_t uncollectable;
|
|
struct worklist unreachable;
|
|
struct worklist legacy_finalizers;
|
|
struct worklist wrcb_to_call;
|
|
struct worklist objs_to_decref;
|
|
};
|
|
|
|
// iterate over a worklist
|
|
#define WORKSTACK_FOR_EACH(stack, op) \
|
|
for ((op) = (PyObject *)(stack)->head; (op) != NULL; (op) = (PyObject *)(op)->ob_tid)
|
|
|
|
// iterate over a worklist with support for removing the current object
|
|
#define WORKSTACK_FOR_EACH_ITER(stack, iter, op) \
|
|
for (worklist_iter_init((iter), &(stack)->head), (op) = (PyObject *)(*(iter)->ptr); \
|
|
(op) != NULL; \
|
|
worklist_iter_init((iter), (iter)->next), (op) = (PyObject *)(*(iter)->ptr))
|
|
|
|
static void
|
|
worklist_push(struct worklist *worklist, PyObject *op)
|
|
{
|
|
assert(op->ob_tid == 0);
|
|
op->ob_tid = worklist->head;
|
|
worklist->head = (uintptr_t)op;
|
|
}
|
|
|
|
static PyObject *
|
|
worklist_pop(struct worklist *worklist)
|
|
{
|
|
PyObject *op = (PyObject *)worklist->head;
|
|
if (op != NULL) {
|
|
worklist->head = op->ob_tid;
|
|
op->ob_tid = 0;
|
|
}
|
|
return op;
|
|
}
|
|
|
|
static void
|
|
worklist_iter_init(struct worklist_iter *iter, uintptr_t *next)
|
|
{
|
|
iter->ptr = next;
|
|
PyObject *op = (PyObject *)*(iter->ptr);
|
|
if (op) {
|
|
iter->next = &op->ob_tid;
|
|
}
|
|
}
|
|
|
|
static void
|
|
worklist_remove(struct worklist_iter *iter)
|
|
{
|
|
PyObject *op = (PyObject *)*(iter->ptr);
|
|
*(iter->ptr) = op->ob_tid;
|
|
op->ob_tid = 0;
|
|
iter->next = iter->ptr;
|
|
}
|
|
|
|
static inline int
|
|
gc_is_unreachable(PyObject *op)
|
|
{
|
|
return (op->ob_gc_bits & _PyGC_BITS_UNREACHABLE) != 0;
|
|
}
|
|
|
|
static void
|
|
gc_set_unreachable(PyObject *op)
|
|
{
|
|
op->ob_gc_bits |= _PyGC_BITS_UNREACHABLE;
|
|
}
|
|
|
|
static void
|
|
gc_clear_unreachable(PyObject *op)
|
|
{
|
|
op->ob_gc_bits &= ~_PyGC_BITS_UNREACHABLE;
|
|
}
|
|
|
|
// Initialize the `ob_tid` field to zero if the object is not already
|
|
// initialized as unreachable.
|
|
static void
|
|
gc_maybe_init_refs(PyObject *op)
|
|
{
|
|
if (!gc_is_unreachable(op)) {
|
|
gc_set_unreachable(op);
|
|
op->ob_tid = 0;
|
|
}
|
|
}
|
|
|
|
static inline Py_ssize_t
|
|
gc_get_refs(PyObject *op)
|
|
{
|
|
return (Py_ssize_t)op->ob_tid;
|
|
}
|
|
|
|
static inline void
|
|
gc_add_refs(PyObject *op, Py_ssize_t refs)
|
|
{
|
|
assert(_PyObject_GC_IS_TRACKED(op));
|
|
op->ob_tid += refs;
|
|
}
|
|
|
|
static inline void
|
|
gc_decref(PyObject *op)
|
|
{
|
|
op->ob_tid -= 1;
|
|
}
|
|
|
|
// Merge refcounts while the world is stopped.
|
|
static void
|
|
merge_refcount(PyObject *op, Py_ssize_t extra)
|
|
{
|
|
assert(_PyInterpreterState_GET()->stoptheworld.world_stopped);
|
|
|
|
Py_ssize_t refcount = Py_REFCNT(op);
|
|
refcount += extra;
|
|
|
|
#ifdef Py_REF_DEBUG
|
|
_Py_AddRefTotal(_PyInterpreterState_GET(), extra);
|
|
#endif
|
|
|
|
// No atomics necessary; all other threads in this interpreter are paused.
|
|
op->ob_tid = 0;
|
|
op->ob_ref_local = 0;
|
|
op->ob_ref_shared = _Py_REF_SHARED(refcount, _Py_REF_MERGED);
|
|
}
|
|
|
|
static void
|
|
gc_restore_tid(PyObject *op)
|
|
{
|
|
mi_segment_t *segment = _mi_ptr_segment(op);
|
|
if (_Py_REF_IS_MERGED(op->ob_ref_shared)) {
|
|
op->ob_tid = 0;
|
|
}
|
|
else {
|
|
// NOTE: may change ob_tid if the object was re-initialized by
|
|
// a different thread or its segment was abandoned and reclaimed.
|
|
// The segment thread id might be zero, in which case we should
|
|
// ensure the refcounts are now merged.
|
|
op->ob_tid = segment->thread_id;
|
|
if (op->ob_tid == 0) {
|
|
merge_refcount(op, 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
gc_restore_refs(PyObject *op)
|
|
{
|
|
if (gc_is_unreachable(op)) {
|
|
gc_restore_tid(op);
|
|
gc_clear_unreachable(op);
|
|
}
|
|
}
|
|
|
|
// Given a mimalloc memory block return the PyObject stored in it or NULL if
|
|
// the block is not allocated or the object is not tracked or is immortal.
|
|
static PyObject *
|
|
op_from_block(void *block, void *arg, bool include_frozen)
|
|
{
|
|
struct visitor_args *a = arg;
|
|
if (block == NULL) {
|
|
return NULL;
|
|
}
|
|
PyObject *op = (PyObject *)((char*)block + a->offset);
|
|
assert(PyObject_IS_GC(op));
|
|
if (!_PyObject_GC_IS_TRACKED(op)) {
|
|
return NULL;
|
|
}
|
|
if (!include_frozen && (op->ob_gc_bits & _PyGC_BITS_FROZEN) != 0) {
|
|
return NULL;
|
|
}
|
|
return op;
|
|
}
|
|
|
|
static int
|
|
gc_visit_heaps_lock_held(PyInterpreterState *interp, mi_block_visit_fun *visitor,
|
|
struct visitor_args *arg)
|
|
{
|
|
// Offset of PyObject header from start of memory block.
|
|
Py_ssize_t offset_base = sizeof(PyGC_Head);
|
|
if (_PyMem_DebugEnabled()) {
|
|
// The debug allocator adds two words at the beginning of each block.
|
|
offset_base += 2 * sizeof(size_t);
|
|
}
|
|
|
|
// Objects with Py_TPFLAGS_PREHEADER have two extra fields
|
|
Py_ssize_t offset_pre = offset_base + 2 * sizeof(PyObject*);
|
|
|
|
// visit each thread's heaps for GC objects
|
|
for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) {
|
|
struct _mimalloc_thread_state *m = &((_PyThreadStateImpl *)p)->mimalloc;
|
|
|
|
arg->offset = offset_base;
|
|
if (!mi_heap_visit_blocks(&m->heaps[_Py_MIMALLOC_HEAP_GC], true,
|
|
visitor, arg)) {
|
|
return -1;
|
|
}
|
|
arg->offset = offset_pre;
|
|
if (!mi_heap_visit_blocks(&m->heaps[_Py_MIMALLOC_HEAP_GC_PRE], true,
|
|
visitor, arg)) {
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
// visit blocks in the per-interpreter abandoned pool (from dead threads)
|
|
mi_abandoned_pool_t *pool = &interp->mimalloc.abandoned_pool;
|
|
arg->offset = offset_base;
|
|
if (!_mi_abandoned_pool_visit_blocks(pool, _Py_MIMALLOC_HEAP_GC, true,
|
|
visitor, arg)) {
|
|
return -1;
|
|
}
|
|
arg->offset = offset_pre;
|
|
if (!_mi_abandoned_pool_visit_blocks(pool, _Py_MIMALLOC_HEAP_GC_PRE, true,
|
|
visitor, arg)) {
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
// Visits all GC objects in the interpreter's heaps.
|
|
// NOTE: It is not safe to allocate or free any mimalloc managed memory while
|
|
// this function is running.
|
|
static int
|
|
gc_visit_heaps(PyInterpreterState *interp, mi_block_visit_fun *visitor,
|
|
struct visitor_args *arg)
|
|
{
|
|
// Other threads in the interpreter must be paused so that we can safely
|
|
// traverse their heaps.
|
|
assert(interp->stoptheworld.world_stopped);
|
|
|
|
int err;
|
|
HEAD_LOCK(&_PyRuntime);
|
|
err = gc_visit_heaps_lock_held(interp, visitor, arg);
|
|
HEAD_UNLOCK(&_PyRuntime);
|
|
return err;
|
|
}
|
|
|
|
// Subtract an incoming reference from the computed "gc_refs" refcount.
|
|
static int
|
|
visit_decref(PyObject *op, void *arg)
|
|
{
|
|
if (_PyObject_GC_IS_TRACKED(op) && !_Py_IsImmortal(op)) {
|
|
// If update_refs hasn't reached this object yet, mark it
|
|
// as (tentatively) unreachable and initialize ob_tid to zero.
|
|
gc_maybe_init_refs(op);
|
|
gc_decref(op);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
// Compute the number of external references to objects in the heap
|
|
// by subtracting internal references from the refcount. The difference is
|
|
// computed in the ob_tid field (we restore it later).
|
|
static bool
|
|
update_refs(const mi_heap_t *heap, const mi_heap_area_t *area,
|
|
void *block, size_t block_size, void *args)
|
|
{
|
|
PyObject *op = op_from_block(block, args, false);
|
|
if (op == NULL) {
|
|
return true;
|
|
}
|
|
|
|
// Exclude immortal objects from garbage collection
|
|
if (_Py_IsImmortal(op)) {
|
|
op->ob_tid = 0;
|
|
_PyObject_GC_UNTRACK(op);
|
|
gc_clear_unreachable(op);
|
|
return true;
|
|
}
|
|
|
|
// Untrack tuples and dicts as necessary in this pass.
|
|
if (PyTuple_CheckExact(op)) {
|
|
_PyTuple_MaybeUntrack(op);
|
|
if (!_PyObject_GC_IS_TRACKED(op)) {
|
|
gc_restore_refs(op);
|
|
return true;
|
|
}
|
|
}
|
|
else if (PyDict_CheckExact(op)) {
|
|
_PyDict_MaybeUntrack(op);
|
|
if (!_PyObject_GC_IS_TRACKED(op)) {
|
|
gc_restore_refs(op);
|
|
return true;
|
|
}
|
|
}
|
|
|
|
Py_ssize_t refcount = Py_REFCNT(op);
|
|
_PyObject_ASSERT(op, refcount >= 0);
|
|
|
|
// Add the actual refcount to ob_tid.
|
|
gc_maybe_init_refs(op);
|
|
gc_add_refs(op, refcount);
|
|
|
|
// Subtract internal references from ob_tid. Objects with ob_tid > 0
|
|
// are directly reachable from outside containers, and so can't be
|
|
// collected.
|
|
Py_TYPE(op)->tp_traverse(op, visit_decref, NULL);
|
|
return true;
|
|
}
|
|
|
|
static int
|
|
visit_clear_unreachable(PyObject *op, _PyObjectStack *stack)
|
|
{
|
|
if (gc_is_unreachable(op)) {
|
|
_PyObject_ASSERT(op, _PyObject_GC_IS_TRACKED(op));
|
|
gc_clear_unreachable(op);
|
|
return _PyObjectStack_Push(stack, op);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
// Transitively clear the unreachable bit on all objects reachable from op.
|
|
static int
|
|
mark_reachable(PyObject *op)
|
|
{
|
|
_PyObjectStack stack = { NULL };
|
|
do {
|
|
traverseproc traverse = Py_TYPE(op)->tp_traverse;
|
|
if (traverse(op, (visitproc)&visit_clear_unreachable, &stack) < 0) {
|
|
_PyObjectStack_Clear(&stack);
|
|
return -1;
|
|
}
|
|
op = _PyObjectStack_Pop(&stack);
|
|
} while (op != NULL);
|
|
return 0;
|
|
}
|
|
|
|
#ifdef GC_DEBUG
|
|
static bool
|
|
validate_gc_objects(const mi_heap_t *heap, const mi_heap_area_t *area,
|
|
void *block, size_t block_size, void *args)
|
|
{
|
|
PyObject *op = op_from_block(block, args, false);
|
|
if (op == NULL) {
|
|
return true;
|
|
}
|
|
|
|
_PyObject_ASSERT(op, gc_is_unreachable(op));
|
|
_PyObject_ASSERT_WITH_MSG(op, gc_get_refs(op) >= 0,
|
|
"refcount is too small");
|
|
return true;
|
|
}
|
|
#endif
|
|
|
|
static bool
|
|
mark_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area,
|
|
void *block, size_t block_size, void *args)
|
|
{
|
|
PyObject *op = op_from_block(block, args, false);
|
|
if (op == NULL) {
|
|
return true;
|
|
}
|
|
|
|
if (gc_is_unreachable(op) && gc_get_refs(op) != 0) {
|
|
// Object is reachable but currently marked as unreachable.
|
|
// Mark it as reachable and traverse its pointers to find
|
|
// any other object that may be directly reachable from it.
|
|
gc_clear_unreachable(op);
|
|
|
|
// Transitively mark reachable objects by clearing the unreachable flag.
|
|
if (mark_reachable(op) < 0) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/* Return true if object has a pre-PEP 442 finalization method. */
|
|
static int
|
|
has_legacy_finalizer(PyObject *op)
|
|
{
|
|
return Py_TYPE(op)->tp_del != NULL;
|
|
}
|
|
|
|
static bool
|
|
scan_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area,
|
|
void *block, size_t block_size, void *args)
|
|
{
|
|
PyObject *op = op_from_block(block, args, false);
|
|
if (op == NULL) {
|
|
return true;
|
|
}
|
|
|
|
struct collection_state *state = (struct collection_state *)args;
|
|
if (gc_is_unreachable(op)) {
|
|
// Merge and add one to the refcount to prevent deallocation while we
|
|
// are holding on to it in a worklist.
|
|
merge_refcount(op, 1);
|
|
|
|
if (has_legacy_finalizer(op)) {
|
|
// would be unreachable, but has legacy finalizer
|
|
gc_clear_unreachable(op);
|
|
worklist_push(&state->legacy_finalizers, op);
|
|
}
|
|
else {
|
|
worklist_push(&state->unreachable, op);
|
|
}
|
|
}
|
|
else {
|
|
// object is reachable, restore `ob_tid`; we're done with these objects
|
|
gc_restore_tid(op);
|
|
state->gcstate->long_lived_total++;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static int
|
|
move_legacy_finalizer_reachable(struct collection_state *state);
|
|
|
|
static int
|
|
deduce_unreachable_heap(PyInterpreterState *interp,
|
|
struct collection_state *state)
|
|
{
|
|
// Identify objects that are directly reachable from outside the GC heap
|
|
// by computing the difference between the refcount and the number of
|
|
// incoming references.
|
|
gc_visit_heaps(interp, &update_refs, &state->base);
|
|
|
|
#ifdef GC_DEBUG
|
|
// Check that all objects are marked as unreachable and that the computed
|
|
// reference count difference (stored in `ob_tid`) is non-negative.
|
|
gc_visit_heaps(interp, &validate_gc_objects, &state->base);
|
|
#endif
|
|
|
|
// Transitively mark reachable objects by clearing the
|
|
// _PyGC_BITS_UNREACHABLE flag.
|
|
if (gc_visit_heaps(interp, &mark_heap_visitor, &state->base) < 0) {
|
|
return -1;
|
|
}
|
|
|
|
// Identify remaining unreachable objects and push them onto a stack.
|
|
// Restores ob_tid for reachable objects.
|
|
gc_visit_heaps(interp, &scan_heap_visitor, &state->base);
|
|
|
|
if (state->legacy_finalizers.head) {
|
|
// There may be objects reachable from legacy finalizers that are in
|
|
// the unreachable set. We need to mark them as reachable.
|
|
if (move_legacy_finalizer_reachable(state) < 0) {
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
move_legacy_finalizer_reachable(struct collection_state *state)
|
|
{
|
|
// Clear the reachable bit on all objects transitively reachable
|
|
// from the objects with legacy finalizers.
|
|
PyObject *op;
|
|
WORKSTACK_FOR_EACH(&state->legacy_finalizers, op) {
|
|
if (mark_reachable(op) < 0) {
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
// Move the reachable objects from the unreachable worklist to the legacy
|
|
// finalizer worklist.
|
|
struct worklist_iter iter;
|
|
WORKSTACK_FOR_EACH_ITER(&state->unreachable, &iter, op) {
|
|
if (!gc_is_unreachable(op)) {
|
|
worklist_remove(&iter);
|
|
worklist_push(&state->legacy_finalizers, op);
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
// Clear all weakrefs to unreachable objects. Weakrefs with callbacks are
|
|
// enqueued in `wrcb_to_call`, but not invoked yet.
|
|
static void
|
|
clear_weakrefs(struct collection_state *state)
|
|
{
|
|
PyObject *op;
|
|
WORKSTACK_FOR_EACH(&state->unreachable, op) {
|
|
if (PyWeakref_Check(op)) {
|
|
// Clear weakrefs that are themselves unreachable to ensure their
|
|
// callbacks will not be executed later from a `tp_clear()`
|
|
// inside delete_garbage(). That would be unsafe: it could
|
|
// resurrect a dead object or access a an already cleared object.
|
|
// See bpo-38006 for one example.
|
|
_PyWeakref_ClearRef((PyWeakReference *)op);
|
|
}
|
|
|
|
if (!_PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) {
|
|
continue;
|
|
}
|
|
|
|
// NOTE: This is never triggered for static types so we can avoid the
|
|
// (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR().
|
|
PyWeakReference **wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op);
|
|
|
|
// `op` may have some weakrefs. March over the list, clear
|
|
// all the weakrefs, and enqueue the weakrefs with callbacks
|
|
// that must be called into wrcb_to_call.
|
|
for (PyWeakReference *wr = *wrlist; wr != NULL; wr = *wrlist) {
|
|
// _PyWeakref_ClearRef clears the weakref but leaves
|
|
// the callback pointer intact. Obscure: it also
|
|
// changes *wrlist.
|
|
_PyObject_ASSERT((PyObject *)wr, wr->wr_object == op);
|
|
_PyWeakref_ClearRef(wr);
|
|
_PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None);
|
|
|
|
// We do not invoke callbacks for weakrefs that are themselves
|
|
// unreachable. This is partly for historical reasons: weakrefs
|
|
// predate safe object finalization, and a weakref that is itself
|
|
// unreachable may have a callback that resurrects other
|
|
// unreachable objects.
|
|
if (wr->wr_callback == NULL || gc_is_unreachable((PyObject *)wr)) {
|
|
continue;
|
|
}
|
|
|
|
// Create a new reference so that wr can't go away before we can
|
|
// process it again.
|
|
merge_refcount((PyObject *)wr, 1);
|
|
|
|
// Enqueue weakref to be called later.
|
|
worklist_push(&state->wrcb_to_call, (PyObject *)wr);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
call_weakref_callbacks(struct collection_state *state)
|
|
{
|
|
// Invoke the callbacks we decided to honor.
|
|
PyObject *op;
|
|
while ((op = worklist_pop(&state->wrcb_to_call)) != NULL) {
|
|
_PyObject_ASSERT(op, PyWeakref_Check(op));
|
|
|
|
PyWeakReference *wr = (PyWeakReference *)op;
|
|
PyObject *callback = wr->wr_callback;
|
|
_PyObject_ASSERT(op, callback != NULL);
|
|
|
|
/* copy-paste of weakrefobject.c's handle_callback() */
|
|
PyObject *temp = PyObject_CallOneArg(callback, (PyObject *)wr);
|
|
if (temp == NULL) {
|
|
PyErr_WriteUnraisable(callback);
|
|
}
|
|
else {
|
|
Py_DECREF(temp);
|
|
}
|
|
|
|
gc_restore_tid(op);
|
|
Py_DECREF(op); // drop worklist reference
|
|
}
|
|
}
|
|
|
|
|
|
static GCState *
|
|
get_gc_state(void)
|
|
{
|
|
PyInterpreterState *interp = _PyInterpreterState_GET();
|
|
return &interp->gc;
|
|
}
|
|
|
|
|
|
void
|
|
_PyGC_InitState(GCState *gcstate)
|
|
{
|
|
}
|
|
|
|
|
|
PyStatus
|
|
_PyGC_Init(PyInterpreterState *interp)
|
|
{
|
|
GCState *gcstate = &interp->gc;
|
|
|
|
gcstate->garbage = PyList_New(0);
|
|
if (gcstate->garbage == NULL) {
|
|
return _PyStatus_NO_MEMORY();
|
|
}
|
|
|
|
gcstate->callbacks = PyList_New(0);
|
|
if (gcstate->callbacks == NULL) {
|
|
return _PyStatus_NO_MEMORY();
|
|
}
|
|
|
|
return _PyStatus_OK();
|
|
}
|
|
|
|
static void
|
|
debug_cycle(const char *msg, PyObject *op)
|
|
{
|
|
PySys_FormatStderr("gc: %s <%s %p>\n",
|
|
msg, Py_TYPE(op)->tp_name, op);
|
|
}
|
|
|
|
/* Run first-time finalizers (if any) on all the objects in collectable.
|
|
* Note that this may remove some (or even all) of the objects from the
|
|
* list, due to refcounts falling to 0.
|
|
*/
|
|
static void
|
|
finalize_garbage(struct collection_state *state)
|
|
{
|
|
// NOTE: the unreachable worklist holds a strong reference to the object
|
|
// to prevent it from being deallocated while we are holding on to it.
|
|
PyObject *op;
|
|
WORKSTACK_FOR_EACH(&state->unreachable, op) {
|
|
if (!_PyGC_FINALIZED(op)) {
|
|
destructor finalize = Py_TYPE(op)->tp_finalize;
|
|
if (finalize != NULL) {
|
|
_PyGC_SET_FINALIZED(op);
|
|
finalize(op);
|
|
assert(!_PyErr_Occurred(_PyThreadState_GET()));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Break reference cycles by clearing the containers involved.
|
|
static void
|
|
delete_garbage(struct collection_state *state)
|
|
{
|
|
PyThreadState *tstate = _PyThreadState_GET();
|
|
GCState *gcstate = state->gcstate;
|
|
|
|
assert(!_PyErr_Occurred(tstate));
|
|
|
|
PyObject *op;
|
|
while ((op = worklist_pop(&state->objs_to_decref)) != NULL) {
|
|
Py_DECREF(op);
|
|
}
|
|
|
|
while ((op = worklist_pop(&state->unreachable)) != NULL) {
|
|
_PyObject_ASSERT(op, gc_is_unreachable(op));
|
|
|
|
// Clear the unreachable flag.
|
|
gc_clear_unreachable(op);
|
|
|
|
if (!_PyObject_GC_IS_TRACKED(op)) {
|
|
// Object might have been untracked by some other tp_clear() call.
|
|
Py_DECREF(op); // drop the reference from the worklist
|
|
continue;
|
|
}
|
|
|
|
state->collected++;
|
|
|
|
if (gcstate->debug & _PyGC_DEBUG_SAVEALL) {
|
|
assert(gcstate->garbage != NULL);
|
|
if (PyList_Append(gcstate->garbage, op) < 0) {
|
|
_PyErr_Clear(tstate);
|
|
}
|
|
}
|
|
else {
|
|
inquiry clear = Py_TYPE(op)->tp_clear;
|
|
if (clear != NULL) {
|
|
(void) clear(op);
|
|
if (_PyErr_Occurred(tstate)) {
|
|
PyErr_FormatUnraisable("Exception ignored in tp_clear of %s",
|
|
Py_TYPE(op)->tp_name);
|
|
}
|
|
}
|
|
}
|
|
|
|
Py_DECREF(op); // drop the reference from the worklist
|
|
}
|
|
}
|
|
|
|
static void
|
|
handle_legacy_finalizers(struct collection_state *state)
|
|
{
|
|
GCState *gcstate = state->gcstate;
|
|
assert(gcstate->garbage != NULL);
|
|
|
|
PyObject *op;
|
|
while ((op = worklist_pop(&state->legacy_finalizers)) != NULL) {
|
|
state->uncollectable++;
|
|
|
|
if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE) {
|
|
debug_cycle("uncollectable", op);
|
|
}
|
|
|
|
if ((gcstate->debug & _PyGC_DEBUG_SAVEALL) || has_legacy_finalizer(op)) {
|
|
if (PyList_Append(gcstate->garbage, op) < 0) {
|
|
PyErr_Clear();
|
|
}
|
|
}
|
|
Py_DECREF(op); // drop worklist reference
|
|
}
|
|
}
|
|
|
|
// Show stats for objects in each generations
|
|
static void
|
|
show_stats_each_generations(GCState *gcstate)
|
|
{
|
|
// TODO
|
|
}
|
|
|
|
// Traversal callback for handle_resurrected_objects.
|
|
static int
|
|
visit_decref_unreachable(PyObject *op, void *data)
|
|
{
|
|
if (gc_is_unreachable(op) && _PyObject_GC_IS_TRACKED(op)) {
|
|
op->ob_ref_local -= 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
// Handle objects that may have resurrected after a call to 'finalize_garbage'.
|
|
static int
|
|
handle_resurrected_objects(struct collection_state *state)
|
|
{
|
|
// First, find externally reachable objects by computing the reference
|
|
// count difference in ob_ref_local. We can't use ob_tid here because
|
|
// that's already used to store the unreachable worklist.
|
|
PyObject *op;
|
|
struct worklist_iter iter;
|
|
WORKSTACK_FOR_EACH_ITER(&state->unreachable, &iter, op) {
|
|
assert(gc_is_unreachable(op));
|
|
assert(_Py_REF_IS_MERGED(op->ob_ref_shared));
|
|
|
|
if (!_PyObject_GC_IS_TRACKED(op)) {
|
|
// Object was untracked by a finalizer. Schedule it for a Py_DECREF
|
|
// after we finish with the stop-the-world pause.
|
|
gc_clear_unreachable(op);
|
|
worklist_remove(&iter);
|
|
worklist_push(&state->objs_to_decref, op);
|
|
continue;
|
|
}
|
|
|
|
Py_ssize_t refcount = (op->ob_ref_shared >> _Py_REF_SHARED_SHIFT);
|
|
if (refcount > INT32_MAX) {
|
|
// The refcount is too big to fit in `ob_ref_local`. Mark the
|
|
// object as immortal and bail out.
|
|
gc_clear_unreachable(op);
|
|
worklist_remove(&iter);
|
|
_Py_SetImmortal(op);
|
|
continue;
|
|
}
|
|
|
|
op->ob_ref_local += (uint32_t)refcount;
|
|
|
|
// Subtract one to account for the reference from the worklist.
|
|
op->ob_ref_local -= 1;
|
|
|
|
traverseproc traverse = Py_TYPE(op)->tp_traverse;
|
|
(void) traverse(op,
|
|
(visitproc)visit_decref_unreachable,
|
|
NULL);
|
|
}
|
|
|
|
// Find resurrected objects
|
|
bool any_resurrected = false;
|
|
WORKSTACK_FOR_EACH(&state->unreachable, op) {
|
|
int32_t gc_refs = (int32_t)op->ob_ref_local;
|
|
op->ob_ref_local = 0; // restore ob_ref_local
|
|
|
|
_PyObject_ASSERT(op, gc_refs >= 0);
|
|
|
|
if (gc_is_unreachable(op) && gc_refs > 0) {
|
|
// Clear the unreachable flag on any transitively reachable objects
|
|
// from this one.
|
|
any_resurrected = true;
|
|
gc_clear_unreachable(op);
|
|
if (mark_reachable(op) < 0) {
|
|
return -1;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (any_resurrected) {
|
|
// Remove resurrected objects from the unreachable list.
|
|
WORKSTACK_FOR_EACH_ITER(&state->unreachable, &iter, op) {
|
|
if (!gc_is_unreachable(op)) {
|
|
_PyObject_ASSERT(op, Py_REFCNT(op) > 1);
|
|
worklist_remove(&iter);
|
|
merge_refcount(op, -1); // remove worklist reference
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef GC_DEBUG
|
|
WORKSTACK_FOR_EACH(&state->unreachable, op) {
|
|
_PyObject_ASSERT(op, gc_is_unreachable(op));
|
|
_PyObject_ASSERT(op, _PyObject_GC_IS_TRACKED(op));
|
|
_PyObject_ASSERT(op, op->ob_ref_local == 0);
|
|
_PyObject_ASSERT(op, _Py_REF_IS_MERGED(op->ob_ref_shared));
|
|
}
|
|
#endif
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/* Invoke progress callbacks to notify clients that garbage collection
|
|
* is starting or stopping
|
|
*/
|
|
static void
|
|
invoke_gc_callback(PyThreadState *tstate, const char *phase,
|
|
int generation, Py_ssize_t collected,
|
|
Py_ssize_t uncollectable)
|
|
{
|
|
assert(!_PyErr_Occurred(tstate));
|
|
|
|
/* we may get called very early */
|
|
GCState *gcstate = &tstate->interp->gc;
|
|
if (gcstate->callbacks == NULL) {
|
|
return;
|
|
}
|
|
|
|
/* The local variable cannot be rebound, check it for sanity */
|
|
assert(PyList_CheckExact(gcstate->callbacks));
|
|
PyObject *info = NULL;
|
|
if (PyList_GET_SIZE(gcstate->callbacks) != 0) {
|
|
info = Py_BuildValue("{sisnsn}",
|
|
"generation", generation,
|
|
"collected", collected,
|
|
"uncollectable", uncollectable);
|
|
if (info == NULL) {
|
|
PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks");
|
|
return;
|
|
}
|
|
}
|
|
|
|
PyObject *phase_obj = PyUnicode_FromString(phase);
|
|
if (phase_obj == NULL) {
|
|
Py_XDECREF(info);
|
|
PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks");
|
|
return;
|
|
}
|
|
|
|
PyObject *stack[] = {phase_obj, info};
|
|
for (Py_ssize_t i=0; i<PyList_GET_SIZE(gcstate->callbacks); i++) {
|
|
PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i);
|
|
Py_INCREF(cb); /* make sure cb doesn't go away */
|
|
r = PyObject_Vectorcall(cb, stack, 2, NULL);
|
|
if (r == NULL) {
|
|
PyErr_WriteUnraisable(cb);
|
|
}
|
|
else {
|
|
Py_DECREF(r);
|
|
}
|
|
Py_DECREF(cb);
|
|
}
|
|
Py_DECREF(phase_obj);
|
|
Py_XDECREF(info);
|
|
assert(!_PyErr_Occurred(tstate));
|
|
}
|
|
|
|
|
|
/* Find the oldest generation (highest numbered) where the count
|
|
* exceeds the threshold. Objects in the that generation and
|
|
* generations younger than it will be collected. */
|
|
static int
|
|
gc_select_generation(GCState *gcstate)
|
|
{
|
|
for (int i = NUM_GENERATIONS-1; i >= 0; i--) {
|
|
if (gcstate->generations[i].count > gcstate->generations[i].threshold) {
|
|
/* Avoid quadratic performance degradation in number
|
|
of tracked objects (see also issue #4074):
|
|
|
|
To limit the cost of garbage collection, there are two strategies;
|
|
- make each collection faster, e.g. by scanning fewer objects
|
|
- do less collections
|
|
This heuristic is about the latter strategy.
|
|
|
|
In addition to the various configurable thresholds, we only trigger a
|
|
full collection if the ratio
|
|
|
|
long_lived_pending / long_lived_total
|
|
|
|
is above a given value (hardwired to 25%).
|
|
|
|
The reason is that, while "non-full" collections (i.e., collections of
|
|
the young and middle generations) will always examine roughly the same
|
|
number of objects -- determined by the aforementioned thresholds --,
|
|
the cost of a full collection is proportional to the total number of
|
|
long-lived objects, which is virtually unbounded.
|
|
|
|
Indeed, it has been remarked that doing a full collection every
|
|
<constant number> of object creations entails a dramatic performance
|
|
degradation in workloads which consist in creating and storing lots of
|
|
long-lived objects (e.g. building a large list of GC-tracked objects would
|
|
show quadratic performance, instead of linear as expected: see issue #4074).
|
|
|
|
Using the above ratio, instead, yields amortized linear performance in
|
|
the total number of objects (the effect of which can be summarized
|
|
thusly: "each full garbage collection is more and more costly as the
|
|
number of objects grows, but we do fewer and fewer of them").
|
|
|
|
This heuristic was suggested by Martin von Löwis on python-dev in
|
|
June 2008. His original analysis and proposal can be found at:
|
|
http://mail.python.org/pipermail/python-dev/2008-June/080579.html
|
|
*/
|
|
if (i == NUM_GENERATIONS - 1
|
|
&& gcstate->long_lived_pending < gcstate->long_lived_total / 4)
|
|
{
|
|
continue;
|
|
}
|
|
return i;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
static void
|
|
cleanup_worklist(struct worklist *worklist)
|
|
{
|
|
PyObject *op;
|
|
while ((op = worklist_pop(worklist)) != NULL) {
|
|
gc_restore_tid(op);
|
|
gc_clear_unreachable(op);
|
|
Py_DECREF(op);
|
|
}
|
|
}
|
|
|
|
static void
|
|
gc_collect_internal(PyInterpreterState *interp, struct collection_state *state)
|
|
{
|
|
_PyEval_StopTheWorld(interp);
|
|
// Find unreachable objects
|
|
int err = deduce_unreachable_heap(interp, state);
|
|
if (err < 0) {
|
|
_PyEval_StartTheWorld(interp);
|
|
goto error;
|
|
}
|
|
|
|
// Print debugging information.
|
|
if (interp->gc.debug & _PyGC_DEBUG_COLLECTABLE) {
|
|
PyObject *op;
|
|
WORKSTACK_FOR_EACH(&state->unreachable, op) {
|
|
debug_cycle("collectable", op);
|
|
}
|
|
}
|
|
|
|
// Clear weakrefs and enqueue callbacks (but do not call them).
|
|
clear_weakrefs(state);
|
|
_PyEval_StartTheWorld(interp);
|
|
|
|
// Call weakref callbacks and finalizers after unpausing other threads to
|
|
// avoid potential deadlocks.
|
|
call_weakref_callbacks(state);
|
|
finalize_garbage(state);
|
|
|
|
// Handle any objects that may have resurrected after the finalization.
|
|
_PyEval_StopTheWorld(interp);
|
|
err = handle_resurrected_objects(state);
|
|
_PyEval_StartTheWorld(interp);
|
|
|
|
if (err < 0) {
|
|
goto error;
|
|
}
|
|
|
|
// Call tp_clear on objects in the unreachable set. This will cause
|
|
// the reference cycles to be broken. It may also cause some objects
|
|
// to be freed.
|
|
delete_garbage(state);
|
|
|
|
// Append objects with legacy finalizers to the "gc.garbage" list.
|
|
handle_legacy_finalizers(state);
|
|
return;
|
|
|
|
error:
|
|
cleanup_worklist(&state->unreachable);
|
|
cleanup_worklist(&state->legacy_finalizers);
|
|
cleanup_worklist(&state->wrcb_to_call);
|
|
cleanup_worklist(&state->objs_to_decref);
|
|
PyErr_NoMemory();
|
|
PyErr_FormatUnraisable("Out of memory during garbage collection");
|
|
}
|
|
|
|
/* This is the main function. Read this to understand how the
|
|
* collection process works. */
|
|
static Py_ssize_t
|
|
gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
|
|
{
|
|
int i;
|
|
Py_ssize_t m = 0; /* # objects collected */
|
|
Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */
|
|
_PyTime_t t1 = 0; /* initialize to prevent a compiler warning */
|
|
GCState *gcstate = &tstate->interp->gc;
|
|
|
|
// gc_collect_main() must not be called before _PyGC_Init
|
|
// or after _PyGC_Fini()
|
|
assert(gcstate->garbage != NULL);
|
|
assert(!_PyErr_Occurred(tstate));
|
|
|
|
int expected = 0;
|
|
if (!_Py_atomic_compare_exchange_int(&gcstate->collecting, &expected, 1)) {
|
|
// Don't start a garbage collection if one is already in progress.
|
|
return 0;
|
|
}
|
|
|
|
if (generation == GENERATION_AUTO) {
|
|
// Select the oldest generation that needs collecting. We will collect
|
|
// objects from that generation and all generations younger than it.
|
|
generation = gc_select_generation(gcstate);
|
|
if (generation < 0) {
|
|
// No generation needs to be collected.
|
|
_Py_atomic_store_int(&gcstate->collecting, 0);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
assert(generation >= 0 && generation < NUM_GENERATIONS);
|
|
|
|
#ifdef Py_STATS
|
|
if (_Py_stats) {
|
|
_Py_stats->object_stats.object_visits = 0;
|
|
}
|
|
#endif
|
|
GC_STAT_ADD(generation, collections, 1);
|
|
|
|
if (reason != _Py_GC_REASON_SHUTDOWN) {
|
|
invoke_gc_callback(tstate, "start", generation, 0, 0);
|
|
}
|
|
|
|
if (gcstate->debug & _PyGC_DEBUG_STATS) {
|
|
PySys_WriteStderr("gc: collecting generation %d...\n", generation);
|
|
show_stats_each_generations(gcstate);
|
|
t1 = _PyTime_GetPerfCounter();
|
|
}
|
|
|
|
if (PyDTrace_GC_START_ENABLED()) {
|
|
PyDTrace_GC_START(generation);
|
|
}
|
|
|
|
/* update collection and allocation counters */
|
|
if (generation+1 < NUM_GENERATIONS) {
|
|
gcstate->generations[generation+1].count += 1;
|
|
}
|
|
for (i = 0; i <= generation; i++) {
|
|
gcstate->generations[i].count = 0;
|
|
}
|
|
|
|
PyInterpreterState *interp = tstate->interp;
|
|
|
|
struct collection_state state = {
|
|
.interp = interp,
|
|
.gcstate = gcstate,
|
|
};
|
|
|
|
gc_collect_internal(interp, &state);
|
|
|
|
m = state.collected;
|
|
n = state.uncollectable;
|
|
|
|
if (gcstate->debug & _PyGC_DEBUG_STATS) {
|
|
double d = _PyTime_AsSecondsDouble(_PyTime_GetPerfCounter() - t1);
|
|
PySys_WriteStderr(
|
|
"gc: done, %zd unreachable, %zd uncollectable, %.4fs elapsed\n",
|
|
n+m, n, d);
|
|
}
|
|
|
|
// Clear free lists in all threads
|
|
_PyGC_ClearAllFreeLists(interp);
|
|
|
|
if (_PyErr_Occurred(tstate)) {
|
|
if (reason == _Py_GC_REASON_SHUTDOWN) {
|
|
_PyErr_Clear(tstate);
|
|
}
|
|
else {
|
|
PyErr_FormatUnraisable("Exception ignored in garbage collection");
|
|
}
|
|
}
|
|
|
|
/* Update stats */
|
|
struct gc_generation_stats *stats = &gcstate->generation_stats[generation];
|
|
stats->collections++;
|
|
stats->collected += m;
|
|
stats->uncollectable += n;
|
|
|
|
GC_STAT_ADD(generation, objects_collected, m);
|
|
#ifdef Py_STATS
|
|
if (_Py_stats) {
|
|
GC_STAT_ADD(generation, object_visits,
|
|
_Py_stats->object_stats.object_visits);
|
|
_Py_stats->object_stats.object_visits = 0;
|
|
}
|
|
#endif
|
|
|
|
if (PyDTrace_GC_DONE_ENABLED()) {
|
|
PyDTrace_GC_DONE(n + m);
|
|
}
|
|
|
|
if (reason != _Py_GC_REASON_SHUTDOWN) {
|
|
invoke_gc_callback(tstate, "stop", generation, m, n);
|
|
}
|
|
|
|
assert(!_PyErr_Occurred(tstate));
|
|
_Py_atomic_store_int(&gcstate->collecting, 0);
|
|
return n + m;
|
|
}
|
|
|
|
struct get_referrers_args {
|
|
struct visitor_args base;
|
|
PyObject *objs;
|
|
struct worklist results;
|
|
};
|
|
|
|
static int
|
|
referrersvisit(PyObject* obj, void *arg)
|
|
{
|
|
PyObject *objs = arg;
|
|
Py_ssize_t i;
|
|
for (i = 0; i < PyTuple_GET_SIZE(objs); i++) {
|
|
if (PyTuple_GET_ITEM(objs, i) == obj) {
|
|
return 1;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static bool
|
|
visit_get_referrers(const mi_heap_t *heap, const mi_heap_area_t *area,
|
|
void *block, size_t block_size, void *args)
|
|
{
|
|
PyObject *op = op_from_block(block, args, true);
|
|
if (op == NULL) {
|
|
return true;
|
|
}
|
|
|
|
struct get_referrers_args *arg = (struct get_referrers_args *)args;
|
|
if (Py_TYPE(op)->tp_traverse(op, referrersvisit, arg->objs)) {
|
|
op->ob_tid = 0; // we will restore the refcount later
|
|
worklist_push(&arg->results, op);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
PyObject *
|
|
_PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs)
|
|
{
|
|
PyObject *result = PyList_New(0);
|
|
if (!result) {
|
|
return NULL;
|
|
}
|
|
|
|
_PyEval_StopTheWorld(interp);
|
|
|
|
// Append all objects to a worklist. This abuses ob_tid. We will restore
|
|
// it later. NOTE: We can't append to the PyListObject during
|
|
// gc_visit_heaps() because PyList_Append() may reclaim an abandoned
|
|
// mimalloc segments while we are traversing them.
|
|
struct get_referrers_args args = { .objs = objs };
|
|
gc_visit_heaps(interp, &visit_get_referrers, &args.base);
|
|
|
|
bool error = false;
|
|
PyObject *op;
|
|
while ((op = worklist_pop(&args.results)) != NULL) {
|
|
gc_restore_tid(op);
|
|
if (op != objs && PyList_Append(result, op) < 0) {
|
|
error = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// In case of error, clear the remaining worklist
|
|
while ((op = worklist_pop(&args.results)) != NULL) {
|
|
gc_restore_tid(op);
|
|
}
|
|
|
|
_PyEval_StartTheWorld(interp);
|
|
|
|
if (error) {
|
|
Py_DECREF(result);
|
|
return NULL;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
struct get_objects_args {
|
|
struct visitor_args base;
|
|
struct worklist objects;
|
|
};
|
|
|
|
static bool
|
|
visit_get_objects(const mi_heap_t *heap, const mi_heap_area_t *area,
|
|
void *block, size_t block_size, void *args)
|
|
{
|
|
PyObject *op = op_from_block(block, args, true);
|
|
if (op == NULL) {
|
|
return true;
|
|
}
|
|
|
|
struct get_objects_args *arg = (struct get_objects_args *)args;
|
|
op->ob_tid = 0; // we will restore the refcount later
|
|
worklist_push(&arg->objects, op);
|
|
|
|
return true;
|
|
}
|
|
|
|
PyObject *
|
|
_PyGC_GetObjects(PyInterpreterState *interp, Py_ssize_t generation)
|
|
{
|
|
PyObject *result = PyList_New(0);
|
|
if (!result) {
|
|
return NULL;
|
|
}
|
|
|
|
_PyEval_StopTheWorld(interp);
|
|
|
|
// Append all objects to a worklist. This abuses ob_tid. We will restore
|
|
// it later. NOTE: We can't append to the list during gc_visit_heaps()
|
|
// because PyList_Append() may reclaim an abandoned mimalloc segment
|
|
// while we are traversing it.
|
|
struct get_objects_args args = { 0 };
|
|
gc_visit_heaps(interp, &visit_get_objects, &args.base);
|
|
|
|
bool error = false;
|
|
PyObject *op;
|
|
while ((op = worklist_pop(&args.objects)) != NULL) {
|
|
gc_restore_tid(op);
|
|
if (op != result && PyList_Append(result, op) < 0) {
|
|
error = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// In case of error, clear the remaining worklist
|
|
while ((op = worklist_pop(&args.objects)) != NULL) {
|
|
gc_restore_tid(op);
|
|
}
|
|
|
|
_PyEval_StartTheWorld(interp);
|
|
|
|
if (error) {
|
|
Py_DECREF(result);
|
|
return NULL;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
static bool
|
|
visit_freeze(const mi_heap_t *heap, const mi_heap_area_t *area,
|
|
void *block, size_t block_size, void *args)
|
|
{
|
|
PyObject *op = op_from_block(block, args, true);
|
|
if (op != NULL) {
|
|
op->ob_gc_bits |= _PyGC_BITS_FROZEN;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void
|
|
_PyGC_Freeze(PyInterpreterState *interp)
|
|
{
|
|
struct visitor_args args;
|
|
_PyEval_StopTheWorld(interp);
|
|
gc_visit_heaps(interp, &visit_freeze, &args);
|
|
_PyEval_StartTheWorld(interp);
|
|
}
|
|
|
|
static bool
|
|
visit_unfreeze(const mi_heap_t *heap, const mi_heap_area_t *area,
|
|
void *block, size_t block_size, void *args)
|
|
{
|
|
PyObject *op = op_from_block(block, args, true);
|
|
if (op != NULL) {
|
|
op->ob_gc_bits &= ~_PyGC_BITS_FROZEN;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void
|
|
_PyGC_Unfreeze(PyInterpreterState *interp)
|
|
{
|
|
struct visitor_args args;
|
|
_PyEval_StopTheWorld(interp);
|
|
gc_visit_heaps(interp, &visit_unfreeze, &args);
|
|
_PyEval_StartTheWorld(interp);
|
|
}
|
|
|
|
struct count_frozen_args {
|
|
struct visitor_args base;
|
|
Py_ssize_t count;
|
|
};
|
|
|
|
static bool
|
|
visit_count_frozen(const mi_heap_t *heap, const mi_heap_area_t *area,
|
|
void *block, size_t block_size, void *args)
|
|
{
|
|
PyObject *op = op_from_block(block, args, true);
|
|
if (op != NULL && (op->ob_gc_bits & _PyGC_BITS_FROZEN) != 0) {
|
|
struct count_frozen_args *arg = (struct count_frozen_args *)args;
|
|
arg->count++;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
Py_ssize_t
|
|
_PyGC_GetFreezeCount(PyInterpreterState *interp)
|
|
{
|
|
struct count_frozen_args args = { .count = 0 };
|
|
_PyEval_StopTheWorld(interp);
|
|
gc_visit_heaps(interp, &visit_count_frozen, &args.base);
|
|
_PyEval_StartTheWorld(interp);
|
|
return args.count;
|
|
}
|
|
|
|
/* C API for controlling the state of the garbage collector */
|
|
int
|
|
PyGC_Enable(void)
|
|
{
|
|
GCState *gcstate = get_gc_state();
|
|
int old_state = gcstate->enabled;
|
|
gcstate->enabled = 1;
|
|
return old_state;
|
|
}
|
|
|
|
int
|
|
PyGC_Disable(void)
|
|
{
|
|
GCState *gcstate = get_gc_state();
|
|
int old_state = gcstate->enabled;
|
|
gcstate->enabled = 0;
|
|
return old_state;
|
|
}
|
|
|
|
int
|
|
PyGC_IsEnabled(void)
|
|
{
|
|
GCState *gcstate = get_gc_state();
|
|
return gcstate->enabled;
|
|
}
|
|
|
|
/* Public API to invoke gc.collect() from C */
|
|
Py_ssize_t
|
|
PyGC_Collect(void)
|
|
{
|
|
PyThreadState *tstate = _PyThreadState_GET();
|
|
GCState *gcstate = &tstate->interp->gc;
|
|
|
|
if (!gcstate->enabled) {
|
|
return 0;
|
|
}
|
|
|
|
Py_ssize_t n;
|
|
PyObject *exc = _PyErr_GetRaisedException(tstate);
|
|
n = gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_MANUAL);
|
|
_PyErr_SetRaisedException(tstate, exc);
|
|
|
|
return n;
|
|
}
|
|
|
|
Py_ssize_t
|
|
_PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason)
|
|
{
|
|
return gc_collect_main(tstate, generation, reason);
|
|
}
|
|
|
|
Py_ssize_t
|
|
_PyGC_CollectNoFail(PyThreadState *tstate)
|
|
{
|
|
/* Ideally, this function is only called on interpreter shutdown,
|
|
and therefore not recursively. Unfortunately, when there are daemon
|
|
threads, a daemon thread can start a cyclic garbage collection
|
|
during interpreter shutdown (and then never finish it).
|
|
See http://bugs.python.org/issue8713#msg195178 for an example.
|
|
*/
|
|
return gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_SHUTDOWN);
|
|
}
|
|
|
|
void
|
|
_PyGC_DumpShutdownStats(PyInterpreterState *interp)
|
|
{
|
|
GCState *gcstate = &interp->gc;
|
|
if (!(gcstate->debug & _PyGC_DEBUG_SAVEALL)
|
|
&& gcstate->garbage != NULL && PyList_GET_SIZE(gcstate->garbage) > 0) {
|
|
const char *message;
|
|
if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE) {
|
|
message = "gc: %zd uncollectable objects at shutdown";
|
|
}
|
|
else {
|
|
message = "gc: %zd uncollectable objects at shutdown; " \
|
|
"use gc.set_debug(gc.DEBUG_UNCOLLECTABLE) to list them";
|
|
}
|
|
/* PyErr_WarnFormat does too many things and we are at shutdown,
|
|
the warnings module's dependencies (e.g. linecache) may be gone
|
|
already. */
|
|
if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
|
|
"gc", NULL, message,
|
|
PyList_GET_SIZE(gcstate->garbage)))
|
|
{
|
|
PyErr_WriteUnraisable(NULL);
|
|
}
|
|
if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE) {
|
|
PyObject *repr = NULL, *bytes = NULL;
|
|
repr = PyObject_Repr(gcstate->garbage);
|
|
if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr))) {
|
|
PyErr_WriteUnraisable(gcstate->garbage);
|
|
}
|
|
else {
|
|
PySys_WriteStderr(
|
|
" %s\n",
|
|
PyBytes_AS_STRING(bytes)
|
|
);
|
|
}
|
|
Py_XDECREF(repr);
|
|
Py_XDECREF(bytes);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void
|
|
_PyGC_Fini(PyInterpreterState *interp)
|
|
{
|
|
GCState *gcstate = &interp->gc;
|
|
Py_CLEAR(gcstate->garbage);
|
|
Py_CLEAR(gcstate->callbacks);
|
|
|
|
/* We expect that none of this interpreters objects are shared
|
|
with other interpreters.
|
|
See https://github.com/python/cpython/issues/90228. */
|
|
}
|
|
|
|
/* for debugging */
|
|
|
|
#ifdef Py_DEBUG
|
|
static int
|
|
visit_validate(PyObject *op, void *parent_raw)
|
|
{
|
|
PyObject *parent = _PyObject_CAST(parent_raw);
|
|
if (_PyObject_IsFreed(op)) {
|
|
_PyObject_ASSERT_FAILED_MSG(parent,
|
|
"PyObject_GC_Track() object is not valid");
|
|
}
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
|
|
/* extension modules might be compiled with GC support so these
|
|
functions must always be available */
|
|
|
|
void
|
|
PyObject_GC_Track(void *op_raw)
|
|
{
|
|
PyObject *op = _PyObject_CAST(op_raw);
|
|
if (_PyObject_GC_IS_TRACKED(op)) {
|
|
_PyObject_ASSERT_FAILED_MSG(op,
|
|
"object already tracked "
|
|
"by the garbage collector");
|
|
}
|
|
_PyObject_GC_TRACK(op);
|
|
|
|
#ifdef Py_DEBUG
|
|
/* Check that the object is valid: validate objects traversed
|
|
by tp_traverse() */
|
|
traverseproc traverse = Py_TYPE(op)->tp_traverse;
|
|
(void)traverse(op, visit_validate, op);
|
|
#endif
|
|
}
|
|
|
|
void
|
|
PyObject_GC_UnTrack(void *op_raw)
|
|
{
|
|
PyObject *op = _PyObject_CAST(op_raw);
|
|
/* Obscure: the Py_TRASHCAN mechanism requires that we be able to
|
|
* call PyObject_GC_UnTrack twice on an object.
|
|
*/
|
|
if (_PyObject_GC_IS_TRACKED(op)) {
|
|
_PyObject_GC_UNTRACK(op);
|
|
}
|
|
}
|
|
|
|
int
|
|
PyObject_IS_GC(PyObject *obj)
|
|
{
|
|
return _PyObject_IS_GC(obj);
|
|
}
|
|
|
|
void
|
|
_Py_ScheduleGC(PyInterpreterState *interp)
|
|
{
|
|
_Py_set_eval_breaker_bit(interp, _PY_GC_SCHEDULED_BIT, 1);
|
|
}
|
|
|
|
void
|
|
_PyObject_GC_Link(PyObject *op)
|
|
{
|
|
PyThreadState *tstate = _PyThreadState_GET();
|
|
GCState *gcstate = &tstate->interp->gc;
|
|
gcstate->generations[0].count++; /* number of allocated GC objects */
|
|
if (gcstate->generations[0].count > gcstate->generations[0].threshold &&
|
|
gcstate->enabled &&
|
|
gcstate->generations[0].threshold &&
|
|
!_Py_atomic_load_int_relaxed(&gcstate->collecting) &&
|
|
!_PyErr_Occurred(tstate))
|
|
{
|
|
_Py_ScheduleGC(tstate->interp);
|
|
}
|
|
}
|
|
|
|
void
|
|
_Py_RunGC(PyThreadState *tstate)
|
|
{
|
|
gc_collect_main(tstate, GENERATION_AUTO, _Py_GC_REASON_HEAP);
|
|
}
|
|
|
|
static PyObject *
|
|
gc_alloc(PyTypeObject *tp, size_t basicsize, size_t presize)
|
|
{
|
|
PyThreadState *tstate = _PyThreadState_GET();
|
|
if (basicsize > PY_SSIZE_T_MAX - presize) {
|
|
return _PyErr_NoMemory(tstate);
|
|
}
|
|
size_t size = presize + basicsize;
|
|
char *mem = _PyObject_MallocWithType(tp, size);
|
|
if (mem == NULL) {
|
|
return _PyErr_NoMemory(tstate);
|
|
}
|
|
((PyObject **)mem)[0] = NULL;
|
|
((PyObject **)mem)[1] = NULL;
|
|
PyObject *op = (PyObject *)(mem + presize);
|
|
_PyObject_GC_Link(op);
|
|
return op;
|
|
}
|
|
|
|
PyObject *
|
|
_PyObject_GC_New(PyTypeObject *tp)
|
|
{
|
|
size_t presize = _PyType_PreHeaderSize(tp);
|
|
PyObject *op = gc_alloc(tp, _PyObject_SIZE(tp), presize);
|
|
if (op == NULL) {
|
|
return NULL;
|
|
}
|
|
_PyObject_Init(op, tp);
|
|
return op;
|
|
}
|
|
|
|
PyVarObject *
|
|
_PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems)
|
|
{
|
|
PyVarObject *op;
|
|
|
|
if (nitems < 0) {
|
|
PyErr_BadInternalCall();
|
|
return NULL;
|
|
}
|
|
size_t presize = _PyType_PreHeaderSize(tp);
|
|
size_t size = _PyObject_VAR_SIZE(tp, nitems);
|
|
op = (PyVarObject *)gc_alloc(tp, size, presize);
|
|
if (op == NULL) {
|
|
return NULL;
|
|
}
|
|
_PyObject_InitVar(op, tp, nitems);
|
|
return op;
|
|
}
|
|
|
|
PyObject *
|
|
PyUnstable_Object_GC_NewWithExtraData(PyTypeObject *tp, size_t extra_size)
|
|
{
|
|
size_t presize = _PyType_PreHeaderSize(tp);
|
|
PyObject *op = gc_alloc(tp, _PyObject_SIZE(tp) + extra_size, presize);
|
|
if (op == NULL) {
|
|
return NULL;
|
|
}
|
|
memset(op, 0, _PyObject_SIZE(tp) + extra_size);
|
|
_PyObject_Init(op, tp);
|
|
return op;
|
|
}
|
|
|
|
PyVarObject *
|
|
_PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems)
|
|
{
|
|
const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems);
|
|
const size_t presize = _PyType_PreHeaderSize(((PyObject *)op)->ob_type);
|
|
_PyObject_ASSERT((PyObject *)op, !_PyObject_GC_IS_TRACKED(op));
|
|
if (basicsize > (size_t)PY_SSIZE_T_MAX - presize) {
|
|
return (PyVarObject *)PyErr_NoMemory();
|
|
}
|
|
char *mem = (char *)op - presize;
|
|
mem = (char *)_PyObject_ReallocWithType(Py_TYPE(op), mem, presize + basicsize);
|
|
if (mem == NULL) {
|
|
return (PyVarObject *)PyErr_NoMemory();
|
|
}
|
|
op = (PyVarObject *) (mem + presize);
|
|
Py_SET_SIZE(op, nitems);
|
|
return op;
|
|
}
|
|
|
|
void
|
|
PyObject_GC_Del(void *op)
|
|
{
|
|
size_t presize = _PyType_PreHeaderSize(((PyObject *)op)->ob_type);
|
|
if (_PyObject_GC_IS_TRACKED(op)) {
|
|
#ifdef Py_DEBUG
|
|
PyObject *exc = PyErr_GetRaisedException();
|
|
if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
|
|
"gc", NULL, "Object of type %s is not untracked before destruction",
|
|
((PyObject*)op)->ob_type->tp_name)) {
|
|
PyErr_WriteUnraisable(NULL);
|
|
}
|
|
PyErr_SetRaisedException(exc);
|
|
#endif
|
|
}
|
|
GCState *gcstate = get_gc_state();
|
|
if (gcstate->generations[0].count > 0) {
|
|
gcstate->generations[0].count--;
|
|
}
|
|
PyObject_Free(((char *)op)-presize);
|
|
}
|
|
|
|
int
|
|
PyObject_GC_IsTracked(PyObject* obj)
|
|
{
|
|
if (_PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj)) {
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
PyObject_GC_IsFinalized(PyObject *obj)
|
|
{
|
|
if (_PyObject_IS_GC(obj) && _PyGC_FINALIZED(obj)) {
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
struct custom_visitor_args {
|
|
struct visitor_args base;
|
|
gcvisitobjects_t callback;
|
|
void *arg;
|
|
};
|
|
|
|
static bool
|
|
custom_visitor_wrapper(const mi_heap_t *heap, const mi_heap_area_t *area,
|
|
void *block, size_t block_size, void *args)
|
|
{
|
|
PyObject *op = op_from_block(block, args, false);
|
|
if (op == NULL) {
|
|
return true;
|
|
}
|
|
|
|
struct custom_visitor_args *wrapper = (struct custom_visitor_args *)args;
|
|
if (!wrapper->callback(op, wrapper->arg)) {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void
|
|
PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg)
|
|
{
|
|
PyInterpreterState *interp = _PyInterpreterState_GET();
|
|
struct custom_visitor_args wrapper = {
|
|
.callback = callback,
|
|
.arg = arg,
|
|
};
|
|
|
|
_PyEval_StopTheWorld(interp);
|
|
gc_visit_heaps(interp, &custom_visitor_wrapper, &wrapper.base);
|
|
_PyEval_StartTheWorld(interp);
|
|
}
|
|
|
|
/* Clear all free lists
|
|
* All free lists are cleared during the collection of the highest generation.
|
|
* Allocated items in the free list may keep a pymalloc arena occupied.
|
|
* Clearing the free lists may give back memory to the OS earlier.
|
|
* Free-threading version: Since freelists are managed per thread,
|
|
* GC should clear all freelists by traversing all threads.
|
|
*/
|
|
void
|
|
_PyGC_ClearAllFreeLists(PyInterpreterState *interp)
|
|
{
|
|
_PyDict_ClearFreeList(interp);
|
|
|
|
HEAD_LOCK(&_PyRuntime);
|
|
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)interp->threads.head;
|
|
while (tstate != NULL) {
|
|
_Py_ClearFreeLists(&tstate->freelist_state, 0);
|
|
tstate = (_PyThreadStateImpl *)tstate->base.next;
|
|
}
|
|
HEAD_UNLOCK(&_PyRuntime);
|
|
}
|
|
|
|
#endif // Py_GIL_DISABLED
|