gh-149101: Implement PEP 788 (GH-149116)

Co-authored-by: Petr Viktorin <encukou@gmail.com>
Co-authored-by: Sam Gross <colesbury@gmail.com>
This commit is contained in:
Peter Bierma 2026-05-06 17:39:30 -04:00 committed by GitHub
parent b2582a6cf2
commit 2b7c28a440
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 1630 additions and 197 deletions

View file

@ -19,6 +19,7 @@
#include "pycore_object.h" // _PyDebug_PrintTotalRefs()
#include "pycore_obmalloc.h" // _PyMem_init_obmalloc()
#include "pycore_optimizer.h" // _Py_Executors_InvalidateAll
#include "pycore_parking_lot.h" // _PyParkingLot
#include "pycore_pathconfig.h" // _PyPathConfig_UpdateGlobal()
#include "pycore_pyerrors.h" // _PyErr_Occurred()
#include "pycore_pylifecycle.h" // _PyErr_Print()
@ -2229,15 +2230,13 @@ interp_has_threads(PyInterpreterState *interp)
/* This needs to check for non-daemon threads only, otherwise we get stuck
* in an infinite loop. */
assert(interp != NULL);
ASSERT_WORLD_STOPPED(interp);
ASSERT_HEAD_IS_LOCKED(interp->runtime);
assert(interp->threads.head != NULL);
if (interp->threads.head->next == NULL) {
// No other threads active, easy way out.
return 0;
}
// We don't have to worry about locking this because the
// world is stopped.
_Py_FOR_EACH_TSTATE_UNLOCKED(interp, tstate) {
if (tstate->_whence == _PyThreadState_WHENCE_THREADING) {
return 1;
@ -2269,9 +2268,7 @@ static int
runtime_has_subinterpreters(_PyRuntimeState *runtime)
{
assert(runtime != NULL);
HEAD_LOCK(runtime);
PyInterpreterState *interp = runtime->interpreters.head;
HEAD_UNLOCK(runtime);
return interp->next != NULL;
}
@ -2280,6 +2277,7 @@ make_pre_finalization_calls(PyThreadState *tstate, int subinterpreters)
{
assert(tstate != NULL);
PyInterpreterState *interp = tstate->interp;
assert(_Py_atomic_load_uintptr(&interp->finalization_guards) != _PyInterpreterGuard_GUARDS_NOT_ALLOWED);
/* Each of these functions can start one another, e.g. a pending call
* could start a thread or vice versa. To ensure that we properly clean
* call everything, we run these in a loop until none of them run anything. */
@ -2306,41 +2304,78 @@ make_pre_finalization_calls(PyThreadState *tstate, int subinterpreters)
if (subinterpreters) {
/* Clean up any lingering subinterpreters.
Two preconditions need to be met here:
- This has to happen before _PyRuntimeState_SetFinalizing is
called, or else threads might get prematurely blocked.
- The world must not be stopped, as finalizers can run.
*/
* Two preconditions need to be met here:
* 1. This has to happen before _PyRuntimeState_SetFinalizing is
* called, or else threads might get prematurely blocked.
* 2. The world must not be stopped, as finalizers can run.
*/
finalize_subinterpreters();
}
// This is used as a throttle to prevent constant spinning while
// on finalization guards.
for (;;) {
uintptr_t num_guards = _Py_atomic_load_uintptr(&interp->finalization_guards);
if (num_guards == 0) {
break;
}
int ret = _PyParkingLot_Park(&interp->finalization_guards,
&num_guards, sizeof(num_guards), -1,
NULL, /*detach=*/1);
if (ret == Py_PARK_OK) {
break;
}
else if (ret == Py_PARK_INTR) {
if (PyErr_CheckSignals() < 0) {
int fatal = PyErr_ExceptionMatches(PyExc_KeyboardInterrupt);
PyErr_FormatUnraisable("Exception ignored while waiting on finalization guards");
if (fatal) {
fputs("Interrupted while waiting on finalization guards\n", stderr);
exit(1);
}
}
assert(!PyErr_Occurred());
}
else {
assert(ret == Py_PARK_AGAIN);
}
}
/* Stop the world to prevent other threads from creating threads or
* atexit callbacks. On the default build, this is simply locked by
* the GIL. For pending calls, we acquire the dedicated mutex, because
* Py_AddPendingCall() can be called without an attached thread state.
*/
PyMutex_Lock(&interp->ceval.pending.mutex);
// XXX Why does _PyThreadState_DeleteList() rely on all interpreters
// being stopped?
_PyEval_StopTheWorldAll(interp->runtime);
HEAD_LOCK(interp->runtime);
int has_subinterpreters = subinterpreters
? runtime_has_subinterpreters(interp->runtime)
: 0;
uintptr_t guards_expected = 0;
int should_continue = (interp_has_threads(interp)
|| interp_has_atexit_callbacks(interp)
|| interp_has_pending_calls(interp)
|| has_subinterpreters);
if (!should_continue) {
break;
// We only want to prevent new guards once we're sure that we
// won't be running another pre-finalization cycle.
if (_Py_atomic_compare_exchange_uintptr(&interp->finalization_guards,
&guards_expected,
_PyInterpreterGuard_GUARDS_NOT_ALLOWED) == 1) {
HEAD_UNLOCK(interp->runtime);
break;
}
}
HEAD_UNLOCK(interp->runtime);
_PyEval_StartTheWorldAll(interp->runtime);
PyMutex_Unlock(&interp->ceval.pending.mutex);
}
assert(PyMutex_IsLocked(&interp->ceval.pending.mutex));
assert(_Py_atomic_load_uintptr(&interp->finalization_guards) == _PyInterpreterGuard_GUARDS_NOT_ALLOWED);
ASSERT_WORLD_STOPPED(interp);
}

View file

@ -2889,34 +2889,40 @@ PyGILState_Check(void)
return (tstate == tcur);
}
static PyInterpreterGuard *
get_main_interp_guard(void)
{
PyInterpreterView *view = PyInterpreterView_FromMain();
if (view == NULL) {
return NULL;
}
PyInterpreterGuard *guard = PyInterpreterGuard_FromView(view);
PyInterpreterView_Close(view);
return guard;
}
PyGILState_STATE
PyGILState_Ensure(void)
{
_PyRuntimeState *runtime = &_PyRuntime;
/* Note that we do not auto-init Python here - apart from
potential races with 2 threads auto-initializing, pep-311
spells out other issues. Embedders are expected to have
called Py_Initialize(). */
/* Ensure that _PyEval_InitThreads() and _PyGILState_Init() have been
called by Py_Initialize()
TODO: This isn't thread-safe. There's no protection here against
concurrent finalization of the interpreter; it's simply a guard
for *after* the interpreter has finalized.
*/
if (!_PyEval_ThreadsInitialized() || runtime->gilstate.autoInterpreterState == NULL) {
PyThread_hang_thread();
}
PyThreadState *tcur = gilstate_get();
int has_gil;
if (tcur == NULL) {
/* Create a new Python thread state for this thread */
// XXX Use PyInterpreterState_EnsureThreadState()?
tcur = new_threadstate(runtime->gilstate.autoInterpreterState,
_PyThreadState_WHENCE_GILSTATE);
PyInterpreterGuard *guard = get_main_interp_guard();
if (guard == NULL) {
// The main interpreter has finished, so we don't have
// any intepreter to make a thread state for. Hang the
// thread to act as failure.
PyThread_hang_thread();
}
tcur = new_threadstate(guard->interp,
_PyThreadState_WHENCE_C_API);
if (tcur == NULL) {
Py_FatalError("Couldn't create thread-state for new thread");
}
@ -2928,6 +2934,7 @@ PyGILState_Ensure(void)
assert(tcur->gilstate_counter == 1);
tcur->gilstate_counter = 0;
has_gil = 0; /* new thread state is never current */
PyInterpreterGuard_Close(guard);
}
else {
has_gil = holds_gil(tcur);
@ -3309,3 +3316,277 @@ _Py_GetMainConfig(void)
}
return _PyInterpreterState_GetConfig(interp);
}
Py_ssize_t
_PyInterpreterState_GuardCountdown(PyInterpreterState *interp)
{
assert(interp != NULL);
Py_ssize_t count = _Py_atomic_load_uintptr(&interp->finalization_guards);
assert(count >= 0);
return count;
}
PyInterpreterState *
_PyInterpreterGuard_GetInterpreter(PyInterpreterGuard *guard)
{
assert(guard != NULL);
assert(guard->interp != NULL);
return guard->interp;
}
static int
try_acquire_interp_guard(PyInterpreterState *interp, PyInterpreterGuard *guard)
{
assert(interp != NULL);
uintptr_t expected;
do {
expected = _Py_atomic_load_uintptr(&interp->finalization_guards);
if (expected == _PyInterpreterGuard_GUARDS_NOT_ALLOWED) {
return -1;
}
} while (_Py_atomic_compare_exchange_uintptr(&interp->finalization_guards,
&expected,
expected + 1) == 0);
assert(_Py_atomic_load_uintptr(&interp->finalization_guards) > 0);
assert(_Py_atomic_load_uintptr(&interp->finalization_guards) != _PyInterpreterGuard_GUARDS_NOT_ALLOWED);
guard->interp = interp;
return 0;
}
PyInterpreterGuard *
PyInterpreterGuard_FromCurrent(void)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
assert(interp != NULL);
PyInterpreterGuard *guard = PyMem_RawMalloc(sizeof(PyInterpreterGuard));
if (guard == NULL) {
PyErr_NoMemory();
return NULL;
}
if (try_acquire_interp_guard(interp, guard) < 0) {
PyMem_RawFree(guard);
PyErr_SetString(PyExc_PythonFinalizationError,
"cannot acquire finalization guard anymore");
return NULL;
}
return guard;
}
void
PyInterpreterGuard_Close(PyInterpreterGuard *guard)
{
PyInterpreterState *interp = guard->interp;
assert(interp != NULL);
assert(_Py_atomic_load_uintptr(&interp->finalization_guards) != _PyInterpreterGuard_GUARDS_NOT_ALLOWED);
uintptr_t old_value = _Py_atomic_add_uintptr(&interp->finalization_guards, -1);
if (old_value == 1) {
_PyParkingLot_UnparkAll(&interp->finalization_guards);
}
assert(old_value > 0);
PyMem_RawFree(guard);
}
PyInterpreterView *
PyInterpreterView_FromCurrent(void)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
assert(interp != NULL);
// PyInterpreterView_Close() can be called without an attached thread
// state, so we have to use the raw allocator.
PyInterpreterView *view = PyMem_RawMalloc(sizeof(PyInterpreterView));
if (view == NULL) {
PyErr_NoMemory();
return NULL;
}
view->id = interp->id;
return view;
}
void
PyInterpreterView_Close(PyInterpreterView *view)
{
assert(view != NULL);
PyMem_RawFree(view);
}
PyInterpreterGuard *
PyInterpreterGuard_FromView(PyInterpreterView *view)
{
assert(view != NULL);
int64_t interp_id = view->id;
assert(interp_id >= 0);
// This allocation has to happen before we acquire the runtime lock, because
// PyMem_RawMalloc() might call some weird callback (such as tracemalloc)
// that tries to re-entrantly acquire the lock.
PyInterpreterGuard *guard = PyMem_RawMalloc(sizeof(PyInterpreterGuard));
if (guard == NULL) {
return NULL;
}
// Interpreters cannot be deleted while we hold the runtime lock.
_PyRuntimeState *runtime = &_PyRuntime;
HEAD_LOCK(runtime);
PyInterpreterState *interp = interp_look_up_id(runtime, interp_id);
if (interp == NULL) {
HEAD_UNLOCK(runtime);
PyMem_RawFree(guard);
return NULL;
}
int result = try_acquire_interp_guard(interp, guard);
HEAD_UNLOCK(runtime);
if (result < 0) {
PyMem_RawFree(guard);
return NULL;
}
assert(guard->interp != NULL);
return guard;
}
PyInterpreterView *
PyInterpreterView_FromMain(void)
{
PyInterpreterView *view = PyMem_RawMalloc(sizeof(PyInterpreterView));
if (view == NULL) {
return NULL;
}
// The main interpreter always has an ID of zero.
view->id = 0;
return view;
}
static const PyThreadStateToken *_no_tstate_sentinel = (const PyThreadStateToken *)&_no_tstate_sentinel;
#define NO_TSTATE_SENTINEL ((PyThreadStateToken *)_no_tstate_sentinel)
PyThreadStateToken *
PyThreadState_Ensure(PyInterpreterGuard *guard)
{
assert(guard != NULL);
PyInterpreterState *interp = guard->interp;
assert(interp != NULL);
PyThreadState *attached_tstate = current_fast_get();
if (attached_tstate != NULL && attached_tstate->interp == interp) {
/* Yay! We already have an attached thread state that matches. */
++attached_tstate->ensure.counter;
return attached_tstate;
}
PyThreadState *detached_gilstate = gilstate_get();
if (detached_gilstate != NULL && detached_gilstate->interp == interp) {
/* There's a detached thread state that works. */
assert(attached_tstate == NULL);
++detached_gilstate->ensure.counter;
_PyThreadState_Attach(detached_gilstate);
return NO_TSTATE_SENTINEL;
}
PyThreadState *fresh_tstate = _PyThreadState_NewBound(interp,
_PyThreadState_WHENCE_C_API);
if (fresh_tstate == NULL) {
return NULL;
}
fresh_tstate->ensure.counter = 1;
fresh_tstate->ensure.delete_on_release = 1;
if (attached_tstate != NULL) {
return (PyThreadStateToken *)PyThreadState_Swap(fresh_tstate);
}
_PyThreadState_Attach(fresh_tstate);
return NO_TSTATE_SENTINEL;
}
PyThreadStateToken *
PyThreadState_EnsureFromView(PyInterpreterView *view)
{
assert(view != NULL);
PyInterpreterGuard *guard = PyInterpreterGuard_FromView(view);
if (guard == NULL) {
return NULL;
}
PyThreadStateToken *result = (PyThreadStateToken *)PyThreadState_Ensure(guard);
if (result == NULL) {
PyInterpreterGuard_Close(guard);
return NULL;
}
PyThreadState *tstate = current_fast_get();
assert(tstate != NULL);
if (tstate->ensure.owned_guard != NULL) {
assert(tstate->ensure.owned_guard->interp == guard->interp);
PyInterpreterGuard_Close(guard);
}
else {
assert(tstate->ensure.owned_guard == NULL);
tstate->ensure.owned_guard = guard;
}
return result;
}
void
PyThreadState_Release(PyThreadStateToken *token)
{
PyThreadState *tstate = current_fast_get();
_Py_EnsureTstateNotNULL(tstate);
Py_ssize_t remaining = --tstate->ensure.counter;
if (remaining < 0) {
Py_FatalError("PyThreadState_Release() called more times than PyThreadState_Ensure()");
}
if (remaining != 0) {
// If the corresponding PyThreadState_Ensure() call used a detached
// thread state, we want to detach it again.
if (token == NO_TSTATE_SENTINEL) {
PyThreadState_Swap(NULL);
}
return;
}
PyThreadState *to_restore;
if (token == NO_TSTATE_SENTINEL) {
to_restore = NULL;
}
else {
to_restore = (PyThreadState *)token;
}
PyInterpreterGuard *owned_guard = tstate->ensure.owned_guard;
assert(tstate->ensure.delete_on_release == 1 || tstate->ensure.delete_on_release == 0);
if (tstate->ensure.delete_on_release) {
++tstate->ensure.counter;
PyThreadState_Clear(tstate);
--tstate->ensure.counter;
}
else if (owned_guard != NULL) {
tstate->ensure.owned_guard = NULL;
}
PyThreadState *check_tstate = PyThreadState_Swap(to_restore);
(void)check_tstate;
assert(check_tstate == tstate);
if (tstate->ensure.delete_on_release) {
PyThreadState_Delete(tstate);
}
if (owned_guard != NULL) {
PyInterpreterGuard_Close(owned_guard);
}
}