gh-111924: Use PyMutex for runtime global locks.

This replaces some usages of PyThread_type_lock with PyMutex, which
does not require memory allocation to initialize.
This commit is contained in:
Sam Gross 2023-11-16 16:41:21 -05:00
parent 974847be44
commit 628f6eb003
18 changed files with 91 additions and 251 deletions

View file

@ -589,9 +589,7 @@ _PyEval_ReInitThreads(PyThreadState *tstate)
take_gil(tstate);
struct _pending_calls *pending = &tstate->interp->ceval.pending;
if (_PyThread_at_fork_reinit(&pending->lock) < 0) {
return _PyStatus_ERR("Can't reinitialize pending calls lock");
}
pending->mutex = (PyMutex){0};
/* Destroy all threads except the current one */
_PyThreadState_DeleteExcept(tstate);
@ -720,13 +718,10 @@ _PyEval_AddPendingCall(PyInterpreterState *interp,
assert(_Py_IsMainInterpreter(interp));
pending = &_PyRuntime.ceval.pending_mainthread;
}
/* Ensure that _PyEval_InitState() was called
and that _PyEval_FiniState() is not called yet. */
assert(pending->lock != NULL);
PyThread_acquire_lock(pending->lock, WAIT_LOCK);
PyMutex_Lock(&pending->mutex);
int result = _push_pending_call(pending, func, arg, flags);
PyThread_release_lock(pending->lock);
PyMutex_Unlock(&pending->mutex);
/* signal main loop */
SIGNAL_PENDING_CALLS(interp);
@ -768,9 +763,9 @@ _make_pending_calls(struct _pending_calls *pending)
int flags = 0;
/* pop one item off the queue while holding the lock */
PyThread_acquire_lock(pending->lock, WAIT_LOCK);
PyMutex_Lock(&pending->mutex);
_pop_pending_call(pending, &func, &arg, &flags);
PyThread_release_lock(pending->lock);
PyMutex_Unlock(&pending->mutex);
/* having released the lock, perform the callback */
if (func == NULL) {
@ -795,7 +790,7 @@ make_pending_calls(PyInterpreterState *interp)
/* Only one thread (per interpreter) may run the pending calls
at once. In the same way, we don't do recursive pending calls. */
PyThread_acquire_lock(pending->lock, WAIT_LOCK);
PyMutex_Lock(&pending->mutex);
if (pending->busy) {
/* A pending call was added after another thread was already
handling the pending calls (and had already "unsignaled").
@ -807,11 +802,11 @@ make_pending_calls(PyInterpreterState *interp)
care of any remaining pending calls. Until then, though,
all the interpreter's threads will be tripping the eval
breaker every time it's checked. */
PyThread_release_lock(pending->lock);
PyMutex_Unlock(&pending->mutex);
return 0;
}
pending->busy = 1;
PyThread_release_lock(pending->lock);
PyMutex_Unlock(&pending->mutex);
/* unsignal before starting to call callbacks, so that any callback
added in-between re-signals */
@ -892,23 +887,9 @@ Py_MakePendingCalls(void)
}
void
_PyEval_InitState(PyInterpreterState *interp, PyThread_type_lock pending_lock)
_PyEval_InitState(PyInterpreterState *interp)
{
_gil_initialize(&interp->_gil);
struct _pending_calls *pending = &interp->ceval.pending;
assert(pending->lock == NULL);
pending->lock = pending_lock;
}
void
_PyEval_FiniState(struct _ceval_state *ceval)
{
struct _pending_calls *pending = &ceval->pending;
if (pending->lock != NULL) {
PyThread_free_lock(pending->lock);
pending->lock = NULL;
}
}

View file

@ -454,16 +454,16 @@ _xidregistry_clear(struct _xidregistry *xidregistry)
static void
_xidregistry_lock(struct _xidregistry *registry)
{
if (registry->mutex != NULL) {
PyThread_acquire_lock(registry->mutex, WAIT_LOCK);
if (registry->global) {
PyMutex_Lock(&registry->mutex);
}
}
static void
_xidregistry_unlock(struct _xidregistry *registry)
{
if (registry->mutex != NULL) {
PyThread_release_lock(registry->mutex);
if (registry->global) {
PyMutex_Unlock(&registry->mutex);
}
}
@ -856,19 +856,10 @@ _xidregistry_init(struct _xidregistry *registry)
registry->initialized = 1;
if (registry->global) {
// We manage the mutex lifecycle in pystate.c.
assert(registry->mutex != NULL);
// Registering the builtins is cheap so we don't bother doing it lazily.
assert(registry->head == NULL);
_register_builtins_for_crossinterpreter_data(registry);
}
else {
// Within an interpreter we rely on the GIL instead of a separate lock.
assert(registry->mutex == NULL);
// There's nothing else to initialize.
}
}
static void
@ -880,17 +871,6 @@ _xidregistry_fini(struct _xidregistry *registry)
registry->initialized = 0;
_xidregistry_clear(registry);
if (registry->global) {
// We manage the mutex lifecycle in pystate.c.
assert(registry->mutex != NULL);
}
else {
// There's nothing else to finalize.
// Within an interpreter we rely on the GIL instead of a separate lock.
assert(registry->mutex == NULL);
}
}

View file

@ -415,11 +415,7 @@ remove_module(PyThreadState *tstate, PyObject *name)
Py_ssize_t
_PyImport_GetNextModuleIndex(void)
{
PyThread_acquire_lock(EXTENSIONS.mutex, WAIT_LOCK);
LAST_MODULE_INDEX++;
Py_ssize_t index = LAST_MODULE_INDEX;
PyThread_release_lock(EXTENSIONS.mutex);
return index;
return _Py_atomic_add_ssize(&LAST_MODULE_INDEX, 1) + 1;
}
static const char *
@ -879,13 +875,13 @@ gets even messier.
static inline void
extensions_lock_acquire(void)
{
PyThread_acquire_lock(_PyRuntime.imports.extensions.mutex, WAIT_LOCK);
PyMutex_Lock(&_PyRuntime.imports.extensions.mutex);
}
static inline void
extensions_lock_release(void)
{
PyThread_release_lock(_PyRuntime.imports.extensions.mutex);
PyMutex_Unlock(&_PyRuntime.imports.extensions.mutex);
}
/* Magic for extension modules (built-in as well as dynamically

View file

@ -3055,13 +3055,13 @@ wait_for_thread_shutdown(PyThreadState *tstate)
int Py_AtExit(void (*func)(void))
{
struct _atexit_runtime_state *state = &_PyRuntime.atexit;
PyThread_acquire_lock(state->mutex, WAIT_LOCK);
PyMutex_Lock(&state->mutex);
if (state->ncallbacks >= NEXITFUNCS) {
PyThread_release_lock(state->mutex);
PyMutex_Unlock(&state->mutex);
return -1;
}
state->callbacks[state->ncallbacks++] = func;
PyThread_release_lock(state->mutex);
PyMutex_Unlock(&state->mutex);
return 0;
}
@ -3071,18 +3071,18 @@ call_ll_exitfuncs(_PyRuntimeState *runtime)
atexit_callbackfunc exitfunc;
struct _atexit_runtime_state *state = &runtime->atexit;
PyThread_acquire_lock(state->mutex, WAIT_LOCK);
PyMutex_Lock(&state->mutex);
while (state->ncallbacks > 0) {
/* pop last function from the list */
state->ncallbacks--;
exitfunc = state->callbacks[state->ncallbacks];
state->callbacks[state->ncallbacks] = NULL;
PyThread_release_lock(state->mutex);
PyMutex_Unlock(&state->mutex);
exitfunc();
PyThread_acquire_lock(state->mutex, WAIT_LOCK);
PyMutex_Lock(&state->mutex);
}
PyThread_release_lock(state->mutex);
PyMutex_Unlock(&state->mutex);
fflush(stdout);
fflush(stderr);

View file

@ -379,49 +379,11 @@ _Py_COMP_DIAG_IGNORE_DEPR_DECLS
static const _PyRuntimeState initial = _PyRuntimeState_INIT(_PyRuntime);
_Py_COMP_DIAG_POP
#define NUMLOCKS 8
#define LOCKS_INIT(runtime) \
{ \
&(runtime)->interpreters.mutex, \
&(runtime)->xi.registry.mutex, \
&(runtime)->unicode_state.ids.lock, \
&(runtime)->imports.extensions.mutex, \
&(runtime)->ceval.pending_mainthread.lock, \
&(runtime)->atexit.mutex, \
&(runtime)->audit_hooks.mutex, \
&(runtime)->allocators.mutex, \
}
static int
alloc_for_runtime(PyThread_type_lock locks[NUMLOCKS])
{
/* Force default allocator, since _PyRuntimeState_Fini() must
use the same allocator than this function. */
PyMemAllocatorEx old_alloc;
_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
for (int i = 0; i < NUMLOCKS; i++) {
PyThread_type_lock lock = PyThread_allocate_lock();
if (lock == NULL) {
for (int j = 0; j < i; j++) {
PyThread_free_lock(locks[j]);
locks[j] = NULL;
}
break;
}
locks[i] = lock;
}
PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
return 0;
}
static void
init_runtime(_PyRuntimeState *runtime,
void *open_code_hook, void *open_code_userdata,
_Py_AuditHookEntry *audit_hook_head,
Py_ssize_t unicode_next_index,
PyThread_type_lock locks[NUMLOCKS])
Py_ssize_t unicode_next_index)
{
assert(!runtime->preinitializing);
assert(!runtime->preinitialized);
@ -435,12 +397,6 @@ init_runtime(_PyRuntimeState *runtime,
PyPreConfig_InitPythonConfig(&runtime->preconfig);
PyThread_type_lock *lockptrs[NUMLOCKS] = LOCKS_INIT(runtime);
for (int i = 0; i < NUMLOCKS; i++) {
assert(locks[i] != NULL);
*lockptrs[i] = locks[i];
}
// Set it to the ID of the main thread of the main interpreter.
runtime->main_thread = PyThread_get_thread_ident();
@ -466,11 +422,6 @@ _PyRuntimeState_Init(_PyRuntimeState *runtime)
// is called multiple times.
Py_ssize_t unicode_next_index = runtime->unicode_state.ids.next_index;
PyThread_type_lock locks[NUMLOCKS];
if (alloc_for_runtime(locks) != 0) {
return _PyStatus_NO_MEMORY();
}
if (runtime->_initialized) {
// Py_Initialize() must be running again.
// Reset to _PyRuntimeState_INIT.
@ -489,7 +440,7 @@ _PyRuntimeState_Init(_PyRuntimeState *runtime)
}
init_runtime(runtime, open_code_hook, open_code_userdata, audit_hook_head,
unicode_next_index, locks);
unicode_next_index);
return _PyStatus_OK();
}
@ -509,23 +460,6 @@ _PyRuntimeState_Fini(_PyRuntimeState *runtime)
if (PyThread_tss_is_created(&runtime->trashTSSkey)) {
PyThread_tss_delete(&runtime->trashTSSkey);
}
/* Force the allocator used by _PyRuntimeState_Init(). */
PyMemAllocatorEx old_alloc;
_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
#define FREE_LOCK(LOCK) \
if (LOCK != NULL) { \
PyThread_free_lock(LOCK); \
LOCK = NULL; \
}
PyThread_type_lock *lockptrs[NUMLOCKS] = LOCKS_INIT(runtime);
for (int i = 0; i < NUMLOCKS; i++) {
FREE_LOCK(*lockptrs[i]);
}
#undef FREE_LOCK
PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
}
#ifdef HAVE_FORK
@ -537,28 +471,23 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime)
// This was initially set in _PyRuntimeState_Init().
runtime->main_thread = PyThread_get_thread_ident();
/* Force default allocator, since _PyRuntimeState_Fini() must
use the same allocator than this function. */
PyMemAllocatorEx old_alloc;
_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
PyThread_type_lock *lockptrs[NUMLOCKS] = LOCKS_INIT(runtime);
int reinit_err = 0;
for (int i = 0; i < NUMLOCKS; i++) {
reinit_err += _PyThread_at_fork_reinit(lockptrs[i]);
}
PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
// Clears the parking lot. Any waiting threads are dead. This must be
// called before releasing any locks that use the parking lot.
_PyParkingLot_AfterFork();
// Re-initialize global locks
runtime->interpreters.mutex = (PyMutex){0};
runtime->xi.registry.mutex = (PyMutex){0};
runtime->unicode_state.ids.mutex = (PyMutex){0};
runtime->imports.extensions.mutex = (PyMutex){0};
runtime->ceval.pending_mainthread.mutex = (PyMutex){0};
runtime->atexit.mutex = (PyMutex){0};
runtime->audit_hooks.mutex = (PyMutex){0};
runtime->allocators.mutex = (PyMutex){0};
/* bpo-42540: id_mutex is freed by _PyInterpreterState_Delete, which does
* not force the default allocator. */
reinit_err += _PyThread_at_fork_reinit(&runtime->interpreters.main->id_mutex);
if (reinit_err < 0) {
if (_PyThread_at_fork_reinit(&runtime->interpreters.main->id_mutex) < 0) {
return _PyStatus_ERR("Failed to reinitialize runtime locks");
}
@ -594,24 +523,6 @@ _PyInterpreterState_Enable(_PyRuntimeState *runtime)
{
struct pyinterpreters *interpreters = &runtime->interpreters;
interpreters->next_id = 0;
/* Py_Finalize() calls _PyRuntimeState_Fini() which clears the mutex.
Create a new mutex if needed. */
if (interpreters->mutex == NULL) {
/* Force default allocator, since _PyRuntimeState_Fini() must
use the same allocator than this function. */
PyMemAllocatorEx old_alloc;
_PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
interpreters->mutex = PyThread_allocate_lock();
PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
if (interpreters->mutex == NULL) {
return _PyStatus_ERR("Can't initialize threads for interpreter");
}
}
return _PyStatus_OK();
}
@ -654,8 +565,7 @@ free_interpreter(PyInterpreterState *interp)
static PyStatus
init_interpreter(PyInterpreterState *interp,
_PyRuntimeState *runtime, int64_t id,
PyInterpreterState *next,
PyThread_type_lock pending_lock)
PyInterpreterState *next)
{
if (interp->_initialized) {
return _PyStatus_ERR("interpreter already initialized");
@ -684,7 +594,7 @@ init_interpreter(PyInterpreterState *interp,
return status;
}
_PyEval_InitState(interp, pending_lock);
_PyEval_InitState(interp);
_PyGC_InitState(&interp->gc);
PyConfig_InitPythonConfig(&interp->config);
_PyType_InitCache(interp);
@ -730,11 +640,6 @@ _PyInterpreterState_New(PyThreadState *tstate, PyInterpreterState **pinterp)
}
}
PyThread_type_lock pending_lock = PyThread_allocate_lock();
if (pending_lock == NULL) {
return _PyStatus_NO_MEMORY();
}
/* We completely serialize creation of multiple interpreters, since
it simplifies things here and blocking concurrent calls isn't a problem.
Regardless, we must fully block subinterpreter creation until
@ -781,11 +686,10 @@ _PyInterpreterState_New(PyThreadState *tstate, PyInterpreterState **pinterp)
interpreters->head = interp;
status = init_interpreter(interp, runtime,
id, old_head, pending_lock);
id, old_head);
if (_PyStatus_EXCEPTION(status)) {
goto error;
}
pending_lock = NULL;
HEAD_UNLOCK(runtime);
@ -796,9 +700,6 @@ _PyInterpreterState_New(PyThreadState *tstate, PyInterpreterState **pinterp)
error:
HEAD_UNLOCK(runtime);
if (pending_lock != NULL) {
PyThread_free_lock(pending_lock);
}
if (interp != NULL) {
free_interpreter(interp);
}
@ -1003,8 +904,6 @@ PyInterpreterState_Delete(PyInterpreterState *interp)
zapthreads(interp);
_PyEval_FiniState(&interp->ceval);
// XXX These two calls should be done at the end of clear_interpreter(),
// but currently some objects get decref'ed after that.
#ifdef Py_REF_DEBUG

View file

@ -451,15 +451,9 @@ PySys_AddAuditHook(Py_AuditHookFunction hook, void *userData)
e->hookCFunction = (Py_AuditHookFunction)hook;
e->userData = userData;
if (runtime->audit_hooks.mutex == NULL) {
/* The runtime must not be initialized yet. */
add_audit_hook_entry_unlocked(runtime, e);
}
else {
PyThread_acquire_lock(runtime->audit_hooks.mutex, WAIT_LOCK);
add_audit_hook_entry_unlocked(runtime, e);
PyThread_release_lock(runtime->audit_hooks.mutex);
}
PyMutex_Lock(&runtime->audit_hooks.mutex);
add_audit_hook_entry_unlocked(runtime, e);
PyMutex_Unlock(&runtime->audit_hooks.mutex);
return 0;
}