mirror of
https://github.com/python/cpython.git
synced 2026-04-26 22:01:08 +00:00
gh-113956: Make intern_common thread-safe in free-threaded build (gh-148886)
Avoid racing with the owning thread's refcount operations when immortalizing an interned string: if we don't own it and its refcount isn't merged, intern a copy we own instead. Use atomic stores in _Py_SetImmortalUntracked so concurrent atomic reads are race-free.
This commit is contained in:
parent
42d645a7e8
commit
4629c2215a
4 changed files with 77 additions and 11 deletions
|
|
@ -1,7 +1,9 @@
|
|||
import sys
|
||||
import threading
|
||||
import unittest
|
||||
|
||||
from itertools import cycle
|
||||
from threading import Event, Thread
|
||||
from threading import Barrier, Event, Thread
|
||||
from unittest import TestCase
|
||||
|
||||
from test.support import threading_helper
|
||||
|
|
@ -69,6 +71,24 @@ def reader_func():
|
|||
for reader in readers:
|
||||
reader.join()
|
||||
|
||||
def test_intern_unowned_string(self):
|
||||
# Test interning strings owned by various threads.
|
||||
strings = [f"intern_race_owner_{i}" for i in range(50)]
|
||||
|
||||
NUM_THREADS = 5
|
||||
b = Barrier(NUM_THREADS)
|
||||
|
||||
def interner():
|
||||
tid = threading.get_ident()
|
||||
for i in range(20):
|
||||
strings.append(f"intern_{tid}_{i}")
|
||||
b.wait()
|
||||
for s in strings:
|
||||
r = sys.intern(s)
|
||||
self.assertTrue(sys._is_interned(r))
|
||||
|
||||
threading_helper.run_concurrently(interner, nthreads=NUM_THREADS)
|
||||
|
||||
def test_maketrans_dict_concurrent_modification(self):
|
||||
for _ in range(5):
|
||||
d = {2000: 'a'}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,4 @@
|
|||
Fix a data race in :func:`sys.intern` in the free-threaded build when
|
||||
interning a string owned by another thread. An interned copy owned by the
|
||||
current thread is used instead when it is not safe to immortalize the
|
||||
original.
|
||||
|
|
@ -2768,9 +2768,9 @@ _Py_SetImmortalUntracked(PyObject *op)
|
|||
return;
|
||||
}
|
||||
#ifdef Py_GIL_DISABLED
|
||||
op->ob_tid = _Py_UNOWNED_TID;
|
||||
op->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL;
|
||||
op->ob_ref_shared = 0;
|
||||
_Py_atomic_store_uintptr_relaxed(&op->ob_tid, _Py_UNOWNED_TID);
|
||||
_Py_atomic_store_uint32_relaxed(&op->ob_ref_local, _Py_IMMORTAL_REFCNT_LOCAL);
|
||||
_Py_atomic_store_ssize_relaxed(&op->ob_ref_shared, 0);
|
||||
_Py_atomic_or_uint8(&op->ob_gc_bits, _PyGC_BITS_DEFERRED);
|
||||
#elif SIZEOF_VOID_P > 4
|
||||
op->ob_flags = _Py_IMMORTAL_FLAGS;
|
||||
|
|
|
|||
|
|
@ -589,6 +589,14 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
|
|||
{
|
||||
#define CHECK(expr) \
|
||||
do { if (!(expr)) { _PyObject_ASSERT_FAILED_MSG(op, Py_STRINGIFY(expr)); } } while (0)
|
||||
#ifdef Py_GIL_DISABLED
|
||||
# define CHECK_IF_GIL(expr) (void)(expr)
|
||||
# define CHECK_IF_FT(expr) CHECK(expr)
|
||||
#else
|
||||
# define CHECK_IF_GIL(expr) CHECK(expr)
|
||||
# define CHECK_IF_FT(expr) (void)(expr)
|
||||
#endif
|
||||
|
||||
|
||||
assert(op != NULL);
|
||||
CHECK(PyUnicode_Check(op));
|
||||
|
|
@ -669,11 +677,9 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
|
|||
|
||||
/* Check interning state */
|
||||
#ifdef Py_DEBUG
|
||||
// Note that we do not check `_Py_IsImmortal(op)`, since stable ABI
|
||||
// extensions can make immortal strings mortal (but with a high enough
|
||||
// refcount).
|
||||
// The other way is extremely unlikely (worth a potential failed assertion
|
||||
// in a debug build), so we do check `!_Py_IsImmortal(op)`.
|
||||
// Note that we do not check `_Py_IsImmortal(op)` in the GIL-enabled build
|
||||
// since stable ABI extensions can make immortal strings mortal (but with a
|
||||
// high enough refcount).
|
||||
switch (PyUnicode_CHECK_INTERNED(op)) {
|
||||
case SSTATE_NOT_INTERNED:
|
||||
if (ascii->state.statically_allocated) {
|
||||
|
|
@ -683,18 +689,20 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
|
|||
// are static but use SSTATE_NOT_INTERNED
|
||||
}
|
||||
else {
|
||||
CHECK(!_Py_IsImmortal(op));
|
||||
CHECK_IF_GIL(!_Py_IsImmortal(op));
|
||||
}
|
||||
break;
|
||||
case SSTATE_INTERNED_MORTAL:
|
||||
CHECK(!ascii->state.statically_allocated);
|
||||
CHECK(!_Py_IsImmortal(op));
|
||||
CHECK_IF_GIL(!_Py_IsImmortal(op));
|
||||
break;
|
||||
case SSTATE_INTERNED_IMMORTAL:
|
||||
CHECK(!ascii->state.statically_allocated);
|
||||
CHECK_IF_FT(_Py_IsImmortal(op));
|
||||
break;
|
||||
case SSTATE_INTERNED_IMMORTAL_STATIC:
|
||||
CHECK(ascii->state.statically_allocated);
|
||||
CHECK_IF_FT(_Py_IsImmortal(op));
|
||||
break;
|
||||
default:
|
||||
Py_UNREACHABLE();
|
||||
|
|
@ -14208,6 +14216,18 @@ immortalize_interned(PyObject *s)
|
|||
FT_ATOMIC_STORE_UINT8(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_IMMORTAL);
|
||||
}
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
static bool
|
||||
can_immortalize_safely(PyObject *s)
|
||||
{
|
||||
if (_Py_IsOwnedByCurrentThread(s) || _Py_IsImmortal(s)) {
|
||||
return true;
|
||||
}
|
||||
Py_ssize_t shared = _Py_atomic_load_ssize(&s->ob_ref_shared);
|
||||
return _Py_REF_IS_MERGED(shared);
|
||||
}
|
||||
#endif
|
||||
|
||||
static /* non-null */ PyObject*
|
||||
intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
|
||||
bool immortalize)
|
||||
|
|
@ -14236,11 +14256,16 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
|
|||
// no, go on
|
||||
break;
|
||||
case SSTATE_INTERNED_MORTAL:
|
||||
#ifndef Py_GIL_DISABLED
|
||||
// yes but we might need to make it immortal
|
||||
if (immortalize) {
|
||||
immortalize_interned(s);
|
||||
}
|
||||
return s;
|
||||
#else
|
||||
// not fully interned yet; fall through to the locking path
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
// all done
|
||||
return s;
|
||||
|
|
@ -14305,6 +14330,23 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
|
|||
Py_DECREF(r);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
// Immortalization writes to the refcount fields non-atomically. That
|
||||
// races with Py_INCREF / Py_DECREF on the thread that owns `s`. If we
|
||||
// don't own it (and its refcount hasn't been merged), intern a copy
|
||||
// we own instead.
|
||||
if (!can_immortalize_safely(s)) {
|
||||
PyObject *copy = _PyUnicode_Copy(s);
|
||||
if (copy == NULL) {
|
||||
PyErr_Clear();
|
||||
return s;
|
||||
}
|
||||
Py_DECREF(s);
|
||||
s = copy;
|
||||
}
|
||||
#endif
|
||||
|
||||
FT_MUTEX_LOCK(INTERN_MUTEX);
|
||||
PyObject *t;
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue