mirror of
https://github.com/python/cpython.git
synced 2025-10-31 21:51:50 +00:00
[3.14] gh-137433: Fix deadlock with stop-the-world and daemon threads (gh-137735) (GH-138965)
There was a deadlock originally seen by Memray when a daemon thread
enabled or disabled profiling while the interpreter was shutting down.
I think this could also happen with garbage collection, but I haven't
seen that in practice.
The daemon thread could be hung while trying acquire the global rwmutex
that prevents overlapping global and per-interpreter stop-the-world events.
Since it already held the main interpreter's stop-the-world lock, it
also deadlocked the main thread, which is trying to perform interpreter
finalization.
Swap the order of lock acquisition to prevent this deadlock.
Additionally, refactor `_PyParkingLot_Park` so that the global buckets
hashtable is left in a clean state if the thread is hung in
`PyEval_AcquireThread`.
(cherry picked from commit 90fe3250f8)
Co-authored-by: Sam Gross <colesbury@gmail.com>
This commit is contained in:
parent
db8b943259
commit
e09f33e5bf
4 changed files with 51 additions and 4 deletions
|
|
@ -1381,6 +1381,33 @@ def test_native_id_after_fork(self):
|
|||
self.assertEqual(len(native_ids), 2)
|
||||
self.assertNotEqual(native_ids[0], native_ids[1])
|
||||
|
||||
def test_stop_the_world_during_finalization(self):
|
||||
# gh-137433: Test functions that trigger a stop-the-world in the free
|
||||
# threading build concurrent with interpreter finalization.
|
||||
script = """if True:
|
||||
import gc
|
||||
import sys
|
||||
import threading
|
||||
NUM_THREADS = 5
|
||||
b = threading.Barrier(NUM_THREADS + 1)
|
||||
def run_in_bg():
|
||||
b.wait()
|
||||
while True:
|
||||
sys.setprofile(None)
|
||||
gc.collect()
|
||||
|
||||
for _ in range(NUM_THREADS):
|
||||
t = threading.Thread(target=run_in_bg, daemon=True)
|
||||
t.start()
|
||||
|
||||
b.wait()
|
||||
print("Exiting...")
|
||||
"""
|
||||
rc, out, err = assert_python_ok('-c', script)
|
||||
self.assertEqual(rc, 0)
|
||||
self.assertEqual(err, b"")
|
||||
self.assertEqual(out.strip(), b"Exiting...")
|
||||
|
||||
class ThreadJoinOnShutdown(BaseTestCase):
|
||||
|
||||
def _run_and_join(self, script):
|
||||
|
|
|
|||
|
|
@ -0,0 +1,3 @@
|
|||
Fix a potential deadlock in the :term:`free threading` build when daemon
|
||||
threads enable or disable profiling or tracing while the main thread is
|
||||
shutting down the interpreter.
|
||||
|
|
@ -342,7 +342,19 @@ _PyParkingLot_Park(const void *addr, const void *expected, size_t size,
|
|||
enqueue(bucket, addr, &wait);
|
||||
_PyRawMutex_Unlock(&bucket->mutex);
|
||||
|
||||
int res = _PySemaphore_Wait(&wait.sema, timeout_ns, detach);
|
||||
PyThreadState *tstate = NULL;
|
||||
if (detach) {
|
||||
tstate = _PyThreadState_GET();
|
||||
if (tstate && _PyThreadState_IsAttached(tstate)) {
|
||||
// Only detach if we are attached
|
||||
PyEval_ReleaseThread(tstate);
|
||||
}
|
||||
else {
|
||||
tstate = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int res = _PySemaphore_Wait(&wait.sema, timeout_ns, 0);
|
||||
if (res == Py_PARK_OK) {
|
||||
goto done;
|
||||
}
|
||||
|
|
@ -354,7 +366,7 @@ _PyParkingLot_Park(const void *addr, const void *expected, size_t size,
|
|||
// Another thread has started to unpark us. Wait until we process the
|
||||
// wakeup signal.
|
||||
do {
|
||||
res = _PySemaphore_Wait(&wait.sema, -1, detach);
|
||||
res = _PySemaphore_Wait(&wait.sema, -1, 0);
|
||||
} while (res != Py_PARK_OK);
|
||||
goto done;
|
||||
}
|
||||
|
|
@ -366,6 +378,9 @@ _PyParkingLot_Park(const void *addr, const void *expected, size_t size,
|
|||
|
||||
done:
|
||||
_PySemaphore_Destroy(&wait.sema);
|
||||
if (tstate) {
|
||||
PyEval_AcquireThread(tstate);
|
||||
}
|
||||
return res;
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2345,13 +2345,15 @@ stop_the_world(struct _stoptheworld_state *stw)
|
|||
{
|
||||
_PyRuntimeState *runtime = &_PyRuntime;
|
||||
|
||||
PyMutex_Lock(&stw->mutex);
|
||||
// gh-137433: Acquire the rwmutex first to avoid deadlocks with daemon
|
||||
// threads that may hang when blocked on lock acquisition.
|
||||
if (stw->is_global) {
|
||||
_PyRWMutex_Lock(&runtime->stoptheworld_mutex);
|
||||
}
|
||||
else {
|
||||
_PyRWMutex_RLock(&runtime->stoptheworld_mutex);
|
||||
}
|
||||
PyMutex_Lock(&stw->mutex);
|
||||
|
||||
HEAD_LOCK(runtime);
|
||||
stw->requested = 1;
|
||||
|
|
@ -2417,13 +2419,13 @@ start_the_world(struct _stoptheworld_state *stw)
|
|||
}
|
||||
stw->requester = NULL;
|
||||
HEAD_UNLOCK(runtime);
|
||||
PyMutex_Unlock(&stw->mutex);
|
||||
if (stw->is_global) {
|
||||
_PyRWMutex_Unlock(&runtime->stoptheworld_mutex);
|
||||
}
|
||||
else {
|
||||
_PyRWMutex_RUnlock(&runtime->stoptheworld_mutex);
|
||||
}
|
||||
PyMutex_Unlock(&stw->mutex);
|
||||
}
|
||||
#endif // Py_GIL_DISABLED
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue