mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	[3.14] gh-137433: Fix deadlock with stop-the-world and daemon threads (gh-137735) (GH-138965)
There was a deadlock originally seen by Memray when a daemon thread
enabled or disabled profiling while the interpreter was shutting down.
I think this could also happen with garbage collection, but I haven't
seen that in practice.
The daemon thread could be hung while trying acquire the global rwmutex
that prevents overlapping global and per-interpreter stop-the-world events.
Since it already held the main interpreter's stop-the-world lock, it
also deadlocked the main thread, which is trying to perform interpreter
finalization.
Swap the order of lock acquisition to prevent this deadlock.
Additionally, refactor `_PyParkingLot_Park` so that the global buckets
hashtable is left in a clean state if the thread is hung in
`PyEval_AcquireThread`.
(cherry picked from commit 90fe3250f8)
Co-authored-by: Sam Gross <colesbury@gmail.com>
			
			
This commit is contained in:
		
							parent
							
								
									db8b943259
								
							
						
					
					
						commit
						e09f33e5bf
					
				
					 4 changed files with 51 additions and 4 deletions
				
			
		|  | @ -1381,6 +1381,33 @@ def test_native_id_after_fork(self): | |||
|         self.assertEqual(len(native_ids), 2) | ||||
|         self.assertNotEqual(native_ids[0], native_ids[1]) | ||||
| 
 | ||||
|     def test_stop_the_world_during_finalization(self): | ||||
|         # gh-137433: Test functions that trigger a stop-the-world in the free | ||||
|         # threading build concurrent with interpreter finalization. | ||||
|         script = """if True: | ||||
|             import gc | ||||
|             import sys | ||||
|             import threading | ||||
|             NUM_THREADS = 5 | ||||
|             b = threading.Barrier(NUM_THREADS + 1) | ||||
|             def run_in_bg(): | ||||
|                 b.wait() | ||||
|                 while True: | ||||
|                     sys.setprofile(None) | ||||
|                     gc.collect() | ||||
| 
 | ||||
|             for _ in range(NUM_THREADS): | ||||
|                 t = threading.Thread(target=run_in_bg, daemon=True) | ||||
|                 t.start() | ||||
| 
 | ||||
|             b.wait() | ||||
|             print("Exiting...") | ||||
|         """ | ||||
|         rc, out, err = assert_python_ok('-c', script) | ||||
|         self.assertEqual(rc, 0) | ||||
|         self.assertEqual(err, b"") | ||||
|         self.assertEqual(out.strip(), b"Exiting...") | ||||
| 
 | ||||
| class ThreadJoinOnShutdown(BaseTestCase): | ||||
| 
 | ||||
|     def _run_and_join(self, script): | ||||
|  |  | |||
|  | @ -0,0 +1,3 @@ | |||
| Fix a potential deadlock in the :term:`free threading` build when daemon | ||||
| threads enable or disable profiling or tracing while the main thread is | ||||
| shutting down the interpreter. | ||||
|  | @ -342,7 +342,19 @@ _PyParkingLot_Park(const void *addr, const void *expected, size_t size, | |||
|     enqueue(bucket, addr, &wait); | ||||
|     _PyRawMutex_Unlock(&bucket->mutex); | ||||
| 
 | ||||
|     int res = _PySemaphore_Wait(&wait.sema, timeout_ns, detach); | ||||
|     PyThreadState *tstate = NULL; | ||||
|     if (detach) { | ||||
|         tstate = _PyThreadState_GET(); | ||||
|         if (tstate && _PyThreadState_IsAttached(tstate)) { | ||||
|             // Only detach if we are attached
 | ||||
|             PyEval_ReleaseThread(tstate); | ||||
|         } | ||||
|         else { | ||||
|             tstate = NULL; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     int res = _PySemaphore_Wait(&wait.sema, timeout_ns, 0); | ||||
|     if (res == Py_PARK_OK) { | ||||
|         goto done; | ||||
|     } | ||||
|  | @ -354,7 +366,7 @@ _PyParkingLot_Park(const void *addr, const void *expected, size_t size, | |||
|         // Another thread has started to unpark us. Wait until we process the
 | ||||
|         // wakeup signal.
 | ||||
|         do { | ||||
|             res = _PySemaphore_Wait(&wait.sema, -1, detach); | ||||
|             res = _PySemaphore_Wait(&wait.sema, -1, 0); | ||||
|         } while (res != Py_PARK_OK); | ||||
|         goto done; | ||||
|     } | ||||
|  | @ -366,6 +378,9 @@ _PyParkingLot_Park(const void *addr, const void *expected, size_t size, | |||
| 
 | ||||
| done: | ||||
|     _PySemaphore_Destroy(&wait.sema); | ||||
|     if (tstate) { | ||||
|         PyEval_AcquireThread(tstate); | ||||
|     } | ||||
|     return res; | ||||
| 
 | ||||
| } | ||||
|  |  | |||
|  | @ -2345,13 +2345,15 @@ stop_the_world(struct _stoptheworld_state *stw) | |||
| { | ||||
|     _PyRuntimeState *runtime = &_PyRuntime; | ||||
| 
 | ||||
|     PyMutex_Lock(&stw->mutex); | ||||
|     // gh-137433: Acquire the rwmutex first to avoid deadlocks with daemon
 | ||||
|     // threads that may hang when blocked on lock acquisition.
 | ||||
|     if (stw->is_global) { | ||||
|         _PyRWMutex_Lock(&runtime->stoptheworld_mutex); | ||||
|     } | ||||
|     else { | ||||
|         _PyRWMutex_RLock(&runtime->stoptheworld_mutex); | ||||
|     } | ||||
|     PyMutex_Lock(&stw->mutex); | ||||
| 
 | ||||
|     HEAD_LOCK(runtime); | ||||
|     stw->requested = 1; | ||||
|  | @ -2417,13 +2419,13 @@ start_the_world(struct _stoptheworld_state *stw) | |||
|     } | ||||
|     stw->requester = NULL; | ||||
|     HEAD_UNLOCK(runtime); | ||||
|     PyMutex_Unlock(&stw->mutex); | ||||
|     if (stw->is_global) { | ||||
|         _PyRWMutex_Unlock(&runtime->stoptheworld_mutex); | ||||
|     } | ||||
|     else { | ||||
|         _PyRWMutex_RUnlock(&runtime->stoptheworld_mutex); | ||||
|     } | ||||
|     PyMutex_Unlock(&stw->mutex); | ||||
| } | ||||
| #endif  // Py_GIL_DISABLED
 | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Miss Islington (bot)
						Miss Islington (bot)