gh-116738: Fix thread-safety issue in re module for free threading (gh-141923)

Added atomic operations to `scanner_begin()` and `scanner_end()` to prevent
race conditions on the `executing` flag in free-threaded builds. Also added
tests for concurrent usage of the `re` module.

Without the atomic operations, `test_scanner_concurrent_access()` triggers
`assert(self->executing)` failures, or a thread sanitizer run emits errors.
This commit is contained in:
Alper 2025-11-26 12:40:45 -08:00 committed by GitHub
parent 9ac14288d7
commit bc9e63dd9d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 79 additions and 4 deletions

View file

@ -0,0 +1,62 @@
import re
import unittest
from test.support import threading_helper
from test.support.threading_helper import run_concurrently
NTHREADS = 10
@threading_helper.requires_working_threading()
class TestRe(unittest.TestCase):
def test_pattern_sub(self):
"""Pattern substitution should work across threads"""
pattern = re.compile(r"\w+@\w+\.\w+")
text = "e-mail: test@python.org or user@pycon.org. " * 5
results = []
def worker():
substituted = pattern.sub("(redacted)", text)
results.append(substituted.count("(redacted)"))
run_concurrently(worker_func=worker, nthreads=NTHREADS)
self.assertEqual(results, [2 * 5] * NTHREADS)
def test_pattern_search(self):
"""Pattern search should work across threads."""
emails = ["alice@python.org", "bob@pycon.org"] * 10
pattern = re.compile(r"\w+@\w+\.\w+")
results = []
def worker():
matches = [pattern.search(e).group() for e in emails]
results.append(len(matches))
run_concurrently(worker_func=worker, nthreads=NTHREADS)
self.assertEqual(results, [2 * 10] * NTHREADS)
def test_scanner_concurrent_access(self):
"""Shared scanner should reject concurrent access."""
pattern = re.compile(r"\w+")
scanner = pattern.scanner("word " * 10)
def worker():
for _ in range(100):
try:
scanner.search()
except ValueError as e:
if "already executing" in str(e):
pass
else:
raise
run_concurrently(worker_func=worker, nthreads=NTHREADS)
# This test has no assertions. Its purpose is to catch crashes and
# enable thread sanitizer to detect race conditions. While "already
# executing" errors are very likely, they're not guaranteed due to
# non-deterministic thread scheduling, so we can't assert errors > 0.
if __name__ == "__main__":
unittest.main()