mirror of
https://github.com/python/cpython.git
synced 2025-12-31 04:23:37 +00:00
gh-137433: Fix deadlock with stop-the-world and daemon threads (gh-137735)
There was a deadlock originally seen by Memray when a daemon thread enabled or disabled profiling while the interpreter was shutting down. I think this could also happen with garbage collection, but I haven't seen that in practice. The daemon thread could be hung while trying acquire the global rwmutex that prevents overlapping global and per-interpreter stop-the-world events. Since it already held the main interpreter's stop-the-world lock, it also deadlocked the main thread, which is trying to perform interpreter finalization. Swap the order of lock acquisition to prevent this deadlock. Additionally, refactor `_PyParkingLot_Park` so that the global buckets hashtable is left in a clean state if the thread is hung in `PyEval_AcquireThread`.
This commit is contained in:
parent
4c0d7bc52a
commit
90fe3250f8
6 changed files with 55 additions and 31 deletions
|
|
@ -46,10 +46,8 @@ typedef struct _PySemaphore {
|
||||||
} _PySemaphore;
|
} _PySemaphore;
|
||||||
|
|
||||||
// Puts the current thread to sleep until _PySemaphore_Wakeup() is called.
|
// Puts the current thread to sleep until _PySemaphore_Wakeup() is called.
|
||||||
// If `detach` is true, then the thread will detach/release the GIL while
|
|
||||||
// sleeping.
|
|
||||||
PyAPI_FUNC(int)
|
PyAPI_FUNC(int)
|
||||||
_PySemaphore_Wait(_PySemaphore *sema, PyTime_t timeout_ns, int detach);
|
_PySemaphore_Wait(_PySemaphore *sema, PyTime_t timeout_ns);
|
||||||
|
|
||||||
// Wakes up a single thread waiting on sema. Note that _PySemaphore_Wakeup()
|
// Wakes up a single thread waiting on sema. Note that _PySemaphore_Wakeup()
|
||||||
// can be called before _PySemaphore_Wait().
|
// can be called before _PySemaphore_Wait().
|
||||||
|
|
|
||||||
|
|
@ -1430,6 +1430,33 @@ def test_native_id_after_fork(self):
|
||||||
self.assertEqual(len(native_ids), 2)
|
self.assertEqual(len(native_ids), 2)
|
||||||
self.assertNotEqual(native_ids[0], native_ids[1])
|
self.assertNotEqual(native_ids[0], native_ids[1])
|
||||||
|
|
||||||
|
def test_stop_the_world_during_finalization(self):
|
||||||
|
# gh-137433: Test functions that trigger a stop-the-world in the free
|
||||||
|
# threading build concurrent with interpreter finalization.
|
||||||
|
script = """if True:
|
||||||
|
import gc
|
||||||
|
import sys
|
||||||
|
import threading
|
||||||
|
NUM_THREADS = 5
|
||||||
|
b = threading.Barrier(NUM_THREADS + 1)
|
||||||
|
def run_in_bg():
|
||||||
|
b.wait()
|
||||||
|
while True:
|
||||||
|
sys.setprofile(None)
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
|
for _ in range(NUM_THREADS):
|
||||||
|
t = threading.Thread(target=run_in_bg, daemon=True)
|
||||||
|
t.start()
|
||||||
|
|
||||||
|
b.wait()
|
||||||
|
print("Exiting...")
|
||||||
|
"""
|
||||||
|
rc, out, err = assert_python_ok('-c', script)
|
||||||
|
self.assertEqual(rc, 0)
|
||||||
|
self.assertEqual(err, b"")
|
||||||
|
self.assertEqual(out.strip(), b"Exiting...")
|
||||||
|
|
||||||
class ThreadJoinOnShutdown(BaseTestCase):
|
class ThreadJoinOnShutdown(BaseTestCase):
|
||||||
|
|
||||||
def _run_and_join(self, script):
|
def _run_and_join(self, script):
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,3 @@
|
||||||
|
Fix a potential deadlock in the :term:`free threading` build when daemon
|
||||||
|
threads enable or disable profiling or tracing while the main thread is
|
||||||
|
shutting down the interpreter.
|
||||||
|
|
@ -227,7 +227,7 @@ _PyRawMutex_LockSlow(_PyRawMutex *m)
|
||||||
|
|
||||||
// Wait for us to be woken up. Note that we still have to lock the
|
// Wait for us to be woken up. Note that we still have to lock the
|
||||||
// mutex ourselves: it is NOT handed off to us.
|
// mutex ourselves: it is NOT handed off to us.
|
||||||
_PySemaphore_Wait(&waiter.sema, -1, /*detach=*/0);
|
_PySemaphore_Wait(&waiter.sema, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
_PySemaphore_Destroy(&waiter.sema);
|
_PySemaphore_Destroy(&waiter.sema);
|
||||||
|
|
|
||||||
|
|
@ -91,8 +91,8 @@ _PySemaphore_Destroy(_PySemaphore *sema)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
int
|
||||||
_PySemaphore_PlatformWait(_PySemaphore *sema, PyTime_t timeout)
|
_PySemaphore_Wait(_PySemaphore *sema, PyTime_t timeout)
|
||||||
{
|
{
|
||||||
int res;
|
int res;
|
||||||
#if defined(MS_WINDOWS)
|
#if defined(MS_WINDOWS)
|
||||||
|
|
@ -225,27 +225,6 @@ _PySemaphore_PlatformWait(_PySemaphore *sema, PyTime_t timeout)
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
|
||||||
_PySemaphore_Wait(_PySemaphore *sema, PyTime_t timeout, int detach)
|
|
||||||
{
|
|
||||||
PyThreadState *tstate = NULL;
|
|
||||||
if (detach) {
|
|
||||||
tstate = _PyThreadState_GET();
|
|
||||||
if (tstate && _PyThreadState_IsAttached(tstate)) {
|
|
||||||
// Only detach if we are attached
|
|
||||||
PyEval_ReleaseThread(tstate);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
tstate = NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
int res = _PySemaphore_PlatformWait(sema, timeout);
|
|
||||||
if (tstate) {
|
|
||||||
PyEval_AcquireThread(tstate);
|
|
||||||
}
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
_PySemaphore_Wakeup(_PySemaphore *sema)
|
_PySemaphore_Wakeup(_PySemaphore *sema)
|
||||||
{
|
{
|
||||||
|
|
@ -342,7 +321,19 @@ _PyParkingLot_Park(const void *addr, const void *expected, size_t size,
|
||||||
enqueue(bucket, addr, &wait);
|
enqueue(bucket, addr, &wait);
|
||||||
_PyRawMutex_Unlock(&bucket->mutex);
|
_PyRawMutex_Unlock(&bucket->mutex);
|
||||||
|
|
||||||
int res = _PySemaphore_Wait(&wait.sema, timeout_ns, detach);
|
PyThreadState *tstate = NULL;
|
||||||
|
if (detach) {
|
||||||
|
tstate = _PyThreadState_GET();
|
||||||
|
if (tstate && _PyThreadState_IsAttached(tstate)) {
|
||||||
|
// Only detach if we are attached
|
||||||
|
PyEval_ReleaseThread(tstate);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
tstate = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int res = _PySemaphore_Wait(&wait.sema, timeout_ns);
|
||||||
if (res == Py_PARK_OK) {
|
if (res == Py_PARK_OK) {
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
|
|
@ -354,7 +345,7 @@ _PyParkingLot_Park(const void *addr, const void *expected, size_t size,
|
||||||
// Another thread has started to unpark us. Wait until we process the
|
// Another thread has started to unpark us. Wait until we process the
|
||||||
// wakeup signal.
|
// wakeup signal.
|
||||||
do {
|
do {
|
||||||
res = _PySemaphore_Wait(&wait.sema, -1, detach);
|
res = _PySemaphore_Wait(&wait.sema, -1);
|
||||||
} while (res != Py_PARK_OK);
|
} while (res != Py_PARK_OK);
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
|
|
@ -366,6 +357,9 @@ _PyParkingLot_Park(const void *addr, const void *expected, size_t size,
|
||||||
|
|
||||||
done:
|
done:
|
||||||
_PySemaphore_Destroy(&wait.sema);
|
_PySemaphore_Destroy(&wait.sema);
|
||||||
|
if (tstate) {
|
||||||
|
PyEval_AcquireThread(tstate);
|
||||||
|
}
|
||||||
return res;
|
return res;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2253,13 +2253,15 @@ stop_the_world(struct _stoptheworld_state *stw)
|
||||||
{
|
{
|
||||||
_PyRuntimeState *runtime = &_PyRuntime;
|
_PyRuntimeState *runtime = &_PyRuntime;
|
||||||
|
|
||||||
PyMutex_Lock(&stw->mutex);
|
// gh-137433: Acquire the rwmutex first to avoid deadlocks with daemon
|
||||||
|
// threads that may hang when blocked on lock acquisition.
|
||||||
if (stw->is_global) {
|
if (stw->is_global) {
|
||||||
_PyRWMutex_Lock(&runtime->stoptheworld_mutex);
|
_PyRWMutex_Lock(&runtime->stoptheworld_mutex);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
_PyRWMutex_RLock(&runtime->stoptheworld_mutex);
|
_PyRWMutex_RLock(&runtime->stoptheworld_mutex);
|
||||||
}
|
}
|
||||||
|
PyMutex_Lock(&stw->mutex);
|
||||||
|
|
||||||
HEAD_LOCK(runtime);
|
HEAD_LOCK(runtime);
|
||||||
stw->requested = 1;
|
stw->requested = 1;
|
||||||
|
|
@ -2325,13 +2327,13 @@ start_the_world(struct _stoptheworld_state *stw)
|
||||||
}
|
}
|
||||||
stw->requester = NULL;
|
stw->requester = NULL;
|
||||||
HEAD_UNLOCK(runtime);
|
HEAD_UNLOCK(runtime);
|
||||||
|
PyMutex_Unlock(&stw->mutex);
|
||||||
if (stw->is_global) {
|
if (stw->is_global) {
|
||||||
_PyRWMutex_Unlock(&runtime->stoptheworld_mutex);
|
_PyRWMutex_Unlock(&runtime->stoptheworld_mutex);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
_PyRWMutex_RUnlock(&runtime->stoptheworld_mutex);
|
_PyRWMutex_RUnlock(&runtime->stoptheworld_mutex);
|
||||||
}
|
}
|
||||||
PyMutex_Unlock(&stw->mutex);
|
|
||||||
}
|
}
|
||||||
#endif // Py_GIL_DISABLED
|
#endif // Py_GIL_DISABLED
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue