gh-148937: revert process RSS based GC deferral (#149475)

This commit is contained in:
Neil Schemenauer 2026-05-07 04:32:14 -07:00 committed by GitHub
parent b3cfd36ba4
commit 13188dbf85
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 7 additions and 225 deletions

View file

@ -133,11 +133,6 @@ The :mod:`!gc` module provides the following functions:
With the third generation, things are a bit more complicated,
see `Collecting the oldest generation <https://github.com/python/cpython/blob/ff0ef0a54bef26fc507fbf9b7a6009eb7d3f17f5/InternalDocs/garbage_collector.md#collecting-the-oldest-generation>`_ for more information.
In the free-threaded build, the increase in process memory usage is also
checked before running the collector. If the memory usage has not increased
by 10% since the last collection and the net number of object allocations
has not exceeded 40 times *threshold0*, the collection is not run.
See `Garbage collector design <https://github.com/python/cpython/blob/3.15/InternalDocs/garbage_collector.md>`_ for more information.
.. versionchanged:: 3.14

View file

@ -263,16 +263,6 @@ struct _gc_runtime_state {
#ifdef Py_GIL_DISABLED
/* True if gc.freeze() has been used. */
int freeze_active;
/* Memory usage of the process (RSS + swap) after last GC. */
Py_ssize_t last_mem;
/* This accumulates the new object count whenever collection is deferred
due to the RSS increase condition not being meet. Reset on collection. */
Py_ssize_t deferred_count;
/* Mutex held for gc_should_collect_mem_usage(). */
PyMutex mutex;
#else
PyGC_Head *generation0;
#endif

View file

@ -0,0 +1,6 @@
Revert the process size based deferral of garbage collection (GH-133464).
The performance issue this change resolves is also fixed by GH-142562. This
approach has the problem that process size as seen by the OS (e.g. the
resident size or RSS) does not immediately decrease after cyclic garbage is
freed since mimalloc defers returning memory of the OS. This change applies
to the free-threaded GC only.

View file

@ -17,30 +17,6 @@
#include "pydtrace.h"
// Platform-specific includes for get_process_mem_usage().
#ifdef _WIN32
#include <windows.h>
#include <psapi.h> // For GetProcessMemoryInfo
#elif defined(__linux__)
#include <unistd.h> // For sysconf, getpid
#elif defined(__APPLE__)
#include <mach/mach.h>
#include <mach/task.h> // Required for TASK_VM_INFO
#include <unistd.h> // For sysconf, getpid
#elif defined(__FreeBSD__)
#include <sys/types.h>
#include <sys/sysctl.h>
#include <sys/user.h> // Requires sys/user.h for kinfo_proc definition
#include <kvm.h>
#include <unistd.h> // For sysconf, getpid
#include <fcntl.h> // For O_RDONLY
#include <limits.h> // For _POSIX2_LINE_MAX
#elif defined(__OpenBSD__)
#include <sys/types.h>
#include <sys/sysctl.h>
#include <sys/user.h> // For kinfo_proc
#include <unistd.h> // For sysconf, getpid
#endif
// enable the "mark alive" pass of GC
#define GC_ENABLE_MARK_ALIVE 1
@ -2016,185 +1992,6 @@ cleanup_worklist(struct worklist *worklist)
}
}
// Return the memory usage (typically RSS + swap) of the process, in units of
// KB. Returns -1 if this operation is not supported or on failure.
static Py_ssize_t
get_process_mem_usage(void)
{
#ifdef _WIN32
// Windows implementation using GetProcessMemoryInfo
// Returns WorkingSetSize + PagefileUsage
PROCESS_MEMORY_COUNTERS pmc;
HANDLE hProcess = GetCurrentProcess();
if (NULL == hProcess) {
// Should not happen for the current process
return -1;
}
// GetProcessMemoryInfo returns non-zero on success
if (GetProcessMemoryInfo(hProcess, &pmc, sizeof(pmc))) {
// Values are in bytes, convert to KB.
return (Py_ssize_t)((pmc.WorkingSetSize + pmc.PagefileUsage) / 1024);
}
else {
return -1;
}
#elif __linux__
FILE* fp = fopen("/proc/self/status", "r");
if (fp == NULL) {
return -1;
}
char line_buffer[256];
long long rss_kb = -1;
long long swap_kb = -1;
while (fgets(line_buffer, sizeof(line_buffer), fp) != NULL) {
if (rss_kb == -1 && strncmp(line_buffer, "VmRSS:", 6) == 0) {
sscanf(line_buffer + 6, "%lld", &rss_kb);
}
else if (swap_kb == -1 && strncmp(line_buffer, "VmSwap:", 7) == 0) {
sscanf(line_buffer + 7, "%lld", &swap_kb);
}
if (rss_kb != -1 && swap_kb != -1) {
break; // Found both
}
}
fclose(fp);
if (rss_kb != -1 && swap_kb != -1) {
return (Py_ssize_t)(rss_kb + swap_kb);
}
return -1;
#elif defined(__APPLE__)
// --- MacOS (Darwin) ---
// Returns phys_footprint (RAM + compressed memory)
task_vm_info_data_t vm_info;
mach_msg_type_number_t count = TASK_VM_INFO_COUNT;
kern_return_t kerr;
kerr = task_info(mach_task_self(), TASK_VM_INFO, (task_info_t)&vm_info, &count);
if (kerr != KERN_SUCCESS) {
return -1;
}
// phys_footprint is in bytes. Convert to KB.
return (Py_ssize_t)(vm_info.phys_footprint / 1024);
#elif defined(__FreeBSD__)
// NOTE: Returns RSS only. Per-process swap usage isn't readily available
long page_size_kb = sysconf(_SC_PAGESIZE) / 1024;
if (page_size_kb <= 0) {
return -1;
}
// Using /dev/null for vmcore avoids needing dump file.
// NULL for kernel file uses running kernel.
char errbuf[_POSIX2_LINE_MAX]; // For kvm error messages
kvm_t *kd = kvm_openfiles(NULL, "/dev/null", NULL, O_RDONLY, errbuf);
if (kd == NULL) {
return -1;
}
// KERN_PROC_PID filters for the specific process ID
// n_procs will contain the number of processes returned (should be 1 or 0)
pid_t pid = getpid();
int n_procs;
struct kinfo_proc *kp = kvm_getprocs(kd, KERN_PROC_PID, pid, &n_procs);
if (kp == NULL) {
kvm_close(kd);
return -1;
}
Py_ssize_t rss_kb = -1;
if (n_procs > 0) {
// kp[0] contains the info for our process
// ki_rssize is in pages. Convert to KB.
rss_kb = (Py_ssize_t)kp->ki_rssize * page_size_kb;
}
else {
// Process with PID not found, shouldn't happen for self.
rss_kb = -1;
}
kvm_close(kd);
return rss_kb;
#elif defined(__OpenBSD__)
// NOTE: Returns RSS only. Per-process swap usage isn't readily available
long page_size_kb = sysconf(_SC_PAGESIZE) / 1024;
if (page_size_kb <= 0) {
return -1;
}
struct kinfo_proc kp;
pid_t pid = getpid();
int mib[6];
size_t len = sizeof(kp);
mib[0] = CTL_KERN;
mib[1] = KERN_PROC;
mib[2] = KERN_PROC_PID;
mib[3] = pid;
mib[4] = sizeof(struct kinfo_proc); // size of the structure we want
mib[5] = 1; // want 1 structure back
if (sysctl(mib, 6, &kp, &len, NULL, 0) == -1) {
return -1;
}
if (len > 0) {
// p_vm_rssize is in pages on OpenBSD. Convert to KB.
return (Py_ssize_t)kp.p_vm_rssize * page_size_kb;
}
else {
// Process info not returned
return -1;
}
#else
// Unsupported platform
return -1;
#endif
}
static bool
gc_should_collect_mem_usage(GCState *gcstate)
{
Py_ssize_t mem = get_process_mem_usage();
if (mem < 0) {
// Reading process memory usage is not support or failed.
return true;
}
int threshold = gcstate->young.threshold;
Py_ssize_t deferred = _Py_atomic_load_ssize_relaxed(&gcstate->deferred_count);
if (deferred > threshold * 40) {
// Too many new container objects since last GC, even though memory use
// might not have increased much. This is intended to avoid resource
// exhaustion if some objects consume resources but don't result in a
// memory usage increase. We use 40x as the factor here because older
// versions of Python would do full collections after roughly every
// 70,000 new container objects.
return true;
}
Py_ssize_t last_mem = _Py_atomic_load_ssize_relaxed(&gcstate->last_mem);
Py_ssize_t mem_threshold = Py_MAX(last_mem / 10, 128);
if ((mem - last_mem) > mem_threshold) {
// The process memory usage has increased too much, do a collection.
return true;
}
else {
// The memory usage has not increased enough, defer the collection and
// clear the young object count so we don't check memory usage again
// on the next call to gc_should_collect().
PyMutex_Lock(&gcstate->mutex);
int young_count = _Py_atomic_exchange_int(&gcstate->young.count, 0);
_Py_atomic_store_ssize_relaxed(&gcstate->deferred_count,
gcstate->deferred_count + young_count);
PyMutex_Unlock(&gcstate->mutex);
return false;
}
}
static bool
gc_should_collect(GCState *gcstate)
{
@ -2214,7 +2011,7 @@ gc_should_collect(GCState *gcstate)
// objects.
return false;
}
return gc_should_collect_mem_usage(gcstate);
return true;
}
static void
@ -2275,7 +2072,6 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
}
state->gcstate->young.count = 0;
state->gcstate->deferred_count = 0;
for (int i = 1; i <= generation; ++i) {
state->gcstate->old[i-1].count = 0;
}
@ -2379,11 +2175,6 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
// to be freed.
delete_garbage(state);
// Store the current memory usage, can be smaller now if breaking cycles
// freed some memory.
Py_ssize_t last_mem = get_process_mem_usage();
_Py_atomic_store_ssize_relaxed(&state->gcstate->last_mem, last_mem);
// Append objects with legacy finalizers to the "gc.garbage" list.
handle_legacy_finalizers(state);
}