gh-113956: Make intern_common thread-safe in free-threaded build (gh-148886)

Avoid racing with the owning thread's refcount operations when
immortalizing an interned string: if we don't own it and its refcount
isn't merged, intern a copy we own instead. Use atomic stores in
_Py_SetImmortalUntracked so concurrent atomic reads are race-free.
This commit is contained in:
Sam Gross 2026-04-23 14:42:57 -04:00 committed by GitHub
parent 42d645a7e8
commit 4629c2215a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 77 additions and 11 deletions

View file

@ -1,7 +1,9 @@
import sys
import threading
import unittest
from itertools import cycle
from threading import Event, Thread
from threading import Barrier, Event, Thread
from unittest import TestCase
from test.support import threading_helper
@ -69,6 +71,24 @@ def reader_func():
for reader in readers:
reader.join()
def test_intern_unowned_string(self):
# Test interning strings owned by various threads.
strings = [f"intern_race_owner_{i}" for i in range(50)]
NUM_THREADS = 5
b = Barrier(NUM_THREADS)
def interner():
tid = threading.get_ident()
for i in range(20):
strings.append(f"intern_{tid}_{i}")
b.wait()
for s in strings:
r = sys.intern(s)
self.assertTrue(sys._is_interned(r))
threading_helper.run_concurrently(interner, nthreads=NUM_THREADS)
def test_maketrans_dict_concurrent_modification(self):
for _ in range(5):
d = {2000: 'a'}

View file

@ -0,0 +1,4 @@
Fix a data race in :func:`sys.intern` in the free-threaded build when
interning a string owned by another thread. An interned copy owned by the
current thread is used instead when it is not safe to immortalize the
original.

View file

@ -2768,9 +2768,9 @@ _Py_SetImmortalUntracked(PyObject *op)
return;
}
#ifdef Py_GIL_DISABLED
op->ob_tid = _Py_UNOWNED_TID;
op->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL;
op->ob_ref_shared = 0;
_Py_atomic_store_uintptr_relaxed(&op->ob_tid, _Py_UNOWNED_TID);
_Py_atomic_store_uint32_relaxed(&op->ob_ref_local, _Py_IMMORTAL_REFCNT_LOCAL);
_Py_atomic_store_ssize_relaxed(&op->ob_ref_shared, 0);
_Py_atomic_or_uint8(&op->ob_gc_bits, _PyGC_BITS_DEFERRED);
#elif SIZEOF_VOID_P > 4
op->ob_flags = _Py_IMMORTAL_FLAGS;

View file

@ -589,6 +589,14 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
{
#define CHECK(expr) \
do { if (!(expr)) { _PyObject_ASSERT_FAILED_MSG(op, Py_STRINGIFY(expr)); } } while (0)
#ifdef Py_GIL_DISABLED
# define CHECK_IF_GIL(expr) (void)(expr)
# define CHECK_IF_FT(expr) CHECK(expr)
#else
# define CHECK_IF_GIL(expr) CHECK(expr)
# define CHECK_IF_FT(expr) (void)(expr)
#endif
assert(op != NULL);
CHECK(PyUnicode_Check(op));
@ -669,11 +677,9 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
/* Check interning state */
#ifdef Py_DEBUG
// Note that we do not check `_Py_IsImmortal(op)`, since stable ABI
// extensions can make immortal strings mortal (but with a high enough
// refcount).
// The other way is extremely unlikely (worth a potential failed assertion
// in a debug build), so we do check `!_Py_IsImmortal(op)`.
// Note that we do not check `_Py_IsImmortal(op)` in the GIL-enabled build
// since stable ABI extensions can make immortal strings mortal (but with a
// high enough refcount).
switch (PyUnicode_CHECK_INTERNED(op)) {
case SSTATE_NOT_INTERNED:
if (ascii->state.statically_allocated) {
@ -683,18 +689,20 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
// are static but use SSTATE_NOT_INTERNED
}
else {
CHECK(!_Py_IsImmortal(op));
CHECK_IF_GIL(!_Py_IsImmortal(op));
}
break;
case SSTATE_INTERNED_MORTAL:
CHECK(!ascii->state.statically_allocated);
CHECK(!_Py_IsImmortal(op));
CHECK_IF_GIL(!_Py_IsImmortal(op));
break;
case SSTATE_INTERNED_IMMORTAL:
CHECK(!ascii->state.statically_allocated);
CHECK_IF_FT(_Py_IsImmortal(op));
break;
case SSTATE_INTERNED_IMMORTAL_STATIC:
CHECK(ascii->state.statically_allocated);
CHECK_IF_FT(_Py_IsImmortal(op));
break;
default:
Py_UNREACHABLE();
@ -14208,6 +14216,18 @@ immortalize_interned(PyObject *s)
FT_ATOMIC_STORE_UINT8(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_IMMORTAL);
}
#ifdef Py_GIL_DISABLED
static bool
can_immortalize_safely(PyObject *s)
{
if (_Py_IsOwnedByCurrentThread(s) || _Py_IsImmortal(s)) {
return true;
}
Py_ssize_t shared = _Py_atomic_load_ssize(&s->ob_ref_shared);
return _Py_REF_IS_MERGED(shared);
}
#endif
static /* non-null */ PyObject*
intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
bool immortalize)
@ -14236,11 +14256,16 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
// no, go on
break;
case SSTATE_INTERNED_MORTAL:
#ifndef Py_GIL_DISABLED
// yes but we might need to make it immortal
if (immortalize) {
immortalize_interned(s);
}
return s;
#else
// not fully interned yet; fall through to the locking path
break;
#endif
default:
// all done
return s;
@ -14305,6 +14330,23 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
Py_DECREF(r);
}
#endif
#ifdef Py_GIL_DISABLED
// Immortalization writes to the refcount fields non-atomically. That
// races with Py_INCREF / Py_DECREF on the thread that owns `s`. If we
// don't own it (and its refcount hasn't been merged), intern a copy
// we own instead.
if (!can_immortalize_safely(s)) {
PyObject *copy = _PyUnicode_Copy(s);
if (copy == NULL) {
PyErr_Clear();
return s;
}
Py_DECREF(s);
s = copy;
}
#endif
FT_MUTEX_LOCK(INTERN_MUTEX);
PyObject *t;
{