mirror of
https://github.com/python/cpython.git
synced 2025-12-07 13:50:06 +00:00
gh-116738: Fix thread-safety issue in re module for free threading (gh-141923)
Added atomic operations to `scanner_begin()` and `scanner_end()` to prevent race conditions on the `executing` flag in free-threaded builds. Also added tests for concurrent usage of the `re` module. Without the atomic operations, `test_scanner_concurrent_access()` triggers `assert(self->executing)` failures, or a thread sanitizer run emits errors.
This commit is contained in:
parent
9ac14288d7
commit
bc9e63dd9d
4 changed files with 79 additions and 4 deletions
|
|
@ -77,6 +77,10 @@ extern "C" {
|
|||
_Py_atomic_store_ushort_relaxed(&value, new_value)
|
||||
#define FT_ATOMIC_LOAD_USHORT_RELAXED(value) \
|
||||
_Py_atomic_load_ushort_relaxed(&value)
|
||||
#define FT_ATOMIC_LOAD_INT(value) \
|
||||
_Py_atomic_load_int(&value)
|
||||
#define FT_ATOMIC_STORE_INT(value, new_value) \
|
||||
_Py_atomic_store_int(&value, new_value)
|
||||
#define FT_ATOMIC_STORE_INT_RELAXED(value, new_value) \
|
||||
_Py_atomic_store_int_relaxed(&value, new_value)
|
||||
#define FT_ATOMIC_LOAD_INT_RELAXED(value) \
|
||||
|
|
@ -144,6 +148,8 @@ extern "C" {
|
|||
#define FT_ATOMIC_STORE_SHORT_RELAXED(value, new_value) value = new_value
|
||||
#define FT_ATOMIC_LOAD_USHORT_RELAXED(value) value
|
||||
#define FT_ATOMIC_STORE_USHORT_RELAXED(value, new_value) value = new_value
|
||||
#define FT_ATOMIC_LOAD_INT(value) value
|
||||
#define FT_ATOMIC_STORE_INT(value, new_value) value = new_value
|
||||
#define FT_ATOMIC_LOAD_INT_RELAXED(value) value
|
||||
#define FT_ATOMIC_STORE_INT_RELAXED(value, new_value) value = new_value
|
||||
#define FT_ATOMIC_LOAD_UINT_RELAXED(value) value
|
||||
|
|
|
|||
62
Lib/test/test_free_threading/test_re.py
Normal file
62
Lib/test/test_free_threading/test_re.py
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
import re
|
||||
import unittest
|
||||
|
||||
from test.support import threading_helper
|
||||
from test.support.threading_helper import run_concurrently
|
||||
|
||||
|
||||
NTHREADS = 10
|
||||
|
||||
|
||||
@threading_helper.requires_working_threading()
|
||||
class TestRe(unittest.TestCase):
|
||||
def test_pattern_sub(self):
|
||||
"""Pattern substitution should work across threads"""
|
||||
pattern = re.compile(r"\w+@\w+\.\w+")
|
||||
text = "e-mail: test@python.org or user@pycon.org. " * 5
|
||||
results = []
|
||||
|
||||
def worker():
|
||||
substituted = pattern.sub("(redacted)", text)
|
||||
results.append(substituted.count("(redacted)"))
|
||||
|
||||
run_concurrently(worker_func=worker, nthreads=NTHREADS)
|
||||
self.assertEqual(results, [2 * 5] * NTHREADS)
|
||||
|
||||
def test_pattern_search(self):
|
||||
"""Pattern search should work across threads."""
|
||||
emails = ["alice@python.org", "bob@pycon.org"] * 10
|
||||
pattern = re.compile(r"\w+@\w+\.\w+")
|
||||
results = []
|
||||
|
||||
def worker():
|
||||
matches = [pattern.search(e).group() for e in emails]
|
||||
results.append(len(matches))
|
||||
|
||||
run_concurrently(worker_func=worker, nthreads=NTHREADS)
|
||||
self.assertEqual(results, [2 * 10] * NTHREADS)
|
||||
|
||||
def test_scanner_concurrent_access(self):
|
||||
"""Shared scanner should reject concurrent access."""
|
||||
pattern = re.compile(r"\w+")
|
||||
scanner = pattern.scanner("word " * 10)
|
||||
|
||||
def worker():
|
||||
for _ in range(100):
|
||||
try:
|
||||
scanner.search()
|
||||
except ValueError as e:
|
||||
if "already executing" in str(e):
|
||||
pass
|
||||
else:
|
||||
raise
|
||||
|
||||
run_concurrently(worker_func=worker, nthreads=NTHREADS)
|
||||
# This test has no assertions. Its purpose is to catch crashes and
|
||||
# enable thread sanitizer to detect race conditions. While "already
|
||||
# executing" errors are very likely, they're not guaranteed due to
|
||||
# non-deterministic thread scheduling, so we can't assert errors > 0.
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
Fix thread safety issue with :mod:`re` scanner objects in free-threaded
|
||||
builds.
|
||||
|
|
@ -2841,20 +2841,25 @@ scanner_dealloc(PyObject *self)
|
|||
static int
|
||||
scanner_begin(ScannerObject* self)
|
||||
{
|
||||
if (self->executing) {
|
||||
#ifdef Py_GIL_DISABLED
|
||||
int was_executing = _Py_atomic_exchange_int(&self->executing, 1);
|
||||
#else
|
||||
int was_executing = self->executing;
|
||||
self->executing = 1;
|
||||
#endif
|
||||
if (was_executing) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"regular expression scanner already executing");
|
||||
return 0;
|
||||
}
|
||||
self->executing = 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
scanner_end(ScannerObject* self)
|
||||
{
|
||||
assert(self->executing);
|
||||
self->executing = 0;
|
||||
assert(FT_ATOMIC_LOAD_INT_RELAXED(self->executing));
|
||||
FT_ATOMIC_STORE_INT(self->executing, 0);
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue