mirror of
https://github.com/python/cpython.git
synced 2025-12-08 06:10:17 +00:00
gh-140797: Forbid capturing groups in re.Scanner lexicon patterns (GH-140944)
This commit is contained in:
parent
a84181c31b
commit
fa9c3eefd4
3 changed files with 24 additions and 1 deletions
|
|
@ -397,9 +397,12 @@ def __init__(self, lexicon, flags=0):
|
||||||
s = _parser.State()
|
s = _parser.State()
|
||||||
s.flags = flags
|
s.flags = flags
|
||||||
for phrase, action in lexicon:
|
for phrase, action in lexicon:
|
||||||
|
sub_pattern = _parser.parse(phrase, flags)
|
||||||
|
if sub_pattern.state.groups != 1:
|
||||||
|
raise ValueError("Cannot use capturing groups in re.Scanner")
|
||||||
gid = s.opengroup()
|
gid = s.opengroup()
|
||||||
p.append(_parser.SubPattern(s, [
|
p.append(_parser.SubPattern(s, [
|
||||||
(SUBPATTERN, (gid, 0, 0, _parser.parse(phrase, flags))),
|
(SUBPATTERN, (gid, 0, 0, sub_pattern)),
|
||||||
]))
|
]))
|
||||||
s.closegroup(gid, p[-1])
|
s.closegroup(gid, p[-1])
|
||||||
p = _parser.SubPattern(s, [(BRANCH, (None, p))])
|
p = _parser.SubPattern(s, [(BRANCH, (None, p))])
|
||||||
|
|
|
||||||
|
|
@ -1639,6 +1639,24 @@ def s_int(scanner, token): return int(token)
|
||||||
(['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
|
(['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
|
||||||
'op+', 'bar'], ''))
|
'op+', 'bar'], ''))
|
||||||
|
|
||||||
|
def test_bug_gh140797(self):
|
||||||
|
# gh140797: Capturing groups are not allowed in re.Scanner
|
||||||
|
|
||||||
|
msg = r"Cannot use capturing groups in re\.Scanner"
|
||||||
|
# Capturing group throws an error
|
||||||
|
with self.assertRaisesRegex(ValueError, msg):
|
||||||
|
Scanner([("(a)b", None)])
|
||||||
|
|
||||||
|
# Named Group
|
||||||
|
with self.assertRaisesRegex(ValueError, msg):
|
||||||
|
Scanner([("(?P<name>a)", None)])
|
||||||
|
|
||||||
|
# Non-capturing groups should pass normally
|
||||||
|
s = Scanner([("(?:a)b", lambda scanner, token: token)])
|
||||||
|
result, rem = s.scan("ab")
|
||||||
|
self.assertEqual(result,['ab'])
|
||||||
|
self.assertEqual(rem,'')
|
||||||
|
|
||||||
def test_bug_448951(self):
|
def test_bug_448951(self):
|
||||||
# bug 448951 (similar to 429357, but with single char match)
|
# bug 448951 (similar to 429357, but with single char match)
|
||||||
# (Also test greedy matches.)
|
# (Also test greedy matches.)
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,2 @@
|
||||||
|
The undocumented :class:`!re.Scanner` class now forbids regular expressions containing capturing groups in its lexicon patterns. Patterns using capturing groups could
|
||||||
|
previously lead to crashes with segmentation fault. Use non-capturing groups (?:...) instead.
|
||||||
Loading…
Add table
Add a link
Reference in a new issue