From daa159f98b3689ae6a587bfb978b931762f9dbc9 Mon Sep 17 00:00:00 2001 From: Daniele Parmeggiani <8658291+dpdani@users.noreply.github.com> Date: Mon, 23 Mar 2026 21:55:06 +0100 Subject: [PATCH] gh-135871: Reload lock internal state while spinning in `PyMutex_LockTimed` (gh-146064) Add atomic loads in the slow path of PyMutex to increase the number of lock acquisitions per second that threads can make on a shared mutex. --- ...2026-03-19-16-16-40.gh-issue-135871.jSExZ3.rst | 1 + Python/lock.c | 15 +++++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-03-19-16-16-40.gh-issue-135871.jSExZ3.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-03-19-16-16-40.gh-issue-135871.jSExZ3.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-19-16-16-40.gh-issue-135871.jSExZ3.rst new file mode 100644 index 00000000000..29103e46906 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-03-19-16-16-40.gh-issue-135871.jSExZ3.rst @@ -0,0 +1 @@ +Improve multithreaded scaling of PyMutex in low-contention scenarios by reloading the lock's internal state, without slowing down high-contention scenarios. diff --git a/Python/lock.c b/Python/lock.c index ad97bfd93c8..752a5899e08 100644 --- a/Python/lock.c +++ b/Python/lock.c @@ -27,8 +27,10 @@ static const PyTime_t TIME_TO_BE_FAIR_NS = 1000*1000; // enabled. #if Py_GIL_DISABLED static const int MAX_SPIN_COUNT = 40; +static const int RELOAD_SPIN_MASK = 3; #else static const int MAX_SPIN_COUNT = 0; +static const int RELOAD_SPIN_MASK = 1; #endif struct mutex_entry { @@ -79,6 +81,16 @@ _PyMutex_LockTimed(PyMutex *m, PyTime_t timeout, _PyLockFlags flags) }; Py_ssize_t spin_count = 0; +#ifdef Py_GIL_DISABLED + // Using thread-id as a way of reducing contention further in the reload below. + // It adds a pseudo-random starting offset to the recurrence, so that threads + // are less likely to try and run compare-exchange at the same time. + // The lower bits of platform thread ids are likely to not be random, + // hence the right shift. + const Py_ssize_t tid = (Py_ssize_t)(_Py_ThreadId() >> 12); +#else + const Py_ssize_t tid = 0; +#endif for (;;) { if ((v & _Py_LOCKED) == 0) { // The lock is unlocked. Try to grab it. @@ -92,6 +104,9 @@ _PyMutex_LockTimed(PyMutex *m, PyTime_t timeout, _PyLockFlags flags) // Spin for a bit. _Py_yield(); spin_count++; + if (((spin_count + tid) & RELOAD_SPIN_MASK) == 0) { + v = _Py_atomic_load_uint8_relaxed(&m->_bits); + } continue; }