Issue #25953: re.sub() now raises an error for invalid numerical group

reference in replacement template even if the pattern is not found in
the string.  Error message for invalid group reference now includes the
group index and the position of the reference.
Based on patch by SilentGhost.
This commit is contained in:
Serhiy Storchaka 2016-10-23 12:12:05 +03:00
commit ee8337a99d
3 changed files with 38 additions and 29 deletions

View file

@ -395,7 +395,7 @@ def _escape(source, escape, state):
len(escape))
state.checklookbehindgroup(group, source)
return GROUPREF, group
raise source.error("invalid group reference", len(escape))
raise source.error("invalid group reference %d" % group, len(escape) - 1)
if len(escape) == 2:
if c in ASCIILETTERS:
raise source.error("bad escape %s" % escape, len(escape))
@ -725,8 +725,8 @@ def _parse(source, state, verbose):
raise source.error("bad group number",
len(condname) + 1)
if condgroup >= MAXGROUPS:
raise source.error("invalid group reference",
len(condname) + 1)
msg = "invalid group reference %d" % condgroup
raise source.error(msg, len(condname) + 1)
state.checklookbehindgroup(condgroup, source)
elif char in FLAGS or char == "-":
# flags
@ -883,7 +883,9 @@ def parse_template(source, pattern):
literals = []
literal = []
lappend = literal.append
def addgroup(index):
def addgroup(index, pos):
if index > pattern.groups:
raise s.error("invalid group reference %d" % index, pos)
if literal:
literals.append(''.join(literal))
del literal[:]
@ -916,9 +918,9 @@ def addgroup(index):
raise s.error("bad character in group name %r" % name,
len(name) + 1) from None
if index >= MAXGROUPS:
raise s.error("invalid group reference",
raise s.error("invalid group reference %d" % index,
len(name) + 1)
addgroup(index)
addgroup(index, len(name) + 1)
elif c == "0":
if s.next in OCTDIGITS:
this += sget()
@ -939,7 +941,7 @@ def addgroup(index):
'range 0-0o377' % this, len(this))
lappend(chr(c))
if not isoctal:
addgroup(int(this[1:]))
addgroup(int(this[1:]), len(this) - 1)
else:
try:
this = chr(ESCAPES[this][1])
@ -966,5 +968,5 @@ def expand_template(template, match):
for index, group in groups:
literals[index] = g(group) or empty
except IndexError:
raise error("invalid group reference")
raise error("invalid group reference %d" % index)
return empty.join(literals)

View file

@ -5,7 +5,6 @@
import re
from re import Scanner
import sre_compile
import sre_constants
import sys
import string
import traceback
@ -186,18 +185,19 @@ def test_sub_template_numeric_escape(self):
r'octal escape value \777 outside of '
r'range 0-0o377', 0)
self.checkTemplateError('x', r'\1', 'x', 'invalid group reference')
self.checkTemplateError('x', r'\8', 'x', 'invalid group reference')
self.checkTemplateError('x', r'\9', 'x', 'invalid group reference')
self.checkTemplateError('x', r'\11', 'x', 'invalid group reference')
self.checkTemplateError('x', r'\18', 'x', 'invalid group reference')
self.checkTemplateError('x', r'\1a', 'x', 'invalid group reference')
self.checkTemplateError('x', r'\90', 'x', 'invalid group reference')
self.checkTemplateError('x', r'\99', 'x', 'invalid group reference')
self.checkTemplateError('x', r'\118', 'x', 'invalid group reference') # r'\11' + '8'
self.checkTemplateError('x', r'\11a', 'x', 'invalid group reference')
self.checkTemplateError('x', r'\181', 'x', 'invalid group reference') # r'\18' + '1'
self.checkTemplateError('x', r'\800', 'x', 'invalid group reference') # r'\80' + '0'
self.checkTemplateError('x', r'\1', 'x', 'invalid group reference 1', 1)
self.checkTemplateError('x', r'\8', 'x', 'invalid group reference 8', 1)
self.checkTemplateError('x', r'\9', 'x', 'invalid group reference 9', 1)
self.checkTemplateError('x', r'\11', 'x', 'invalid group reference 11', 1)
self.checkTemplateError('x', r'\18', 'x', 'invalid group reference 18', 1)
self.checkTemplateError('x', r'\1a', 'x', 'invalid group reference 1', 1)
self.checkTemplateError('x', r'\90', 'x', 'invalid group reference 90', 1)
self.checkTemplateError('x', r'\99', 'x', 'invalid group reference 99', 1)
self.checkTemplateError('x', r'\118', 'x', 'invalid group reference 11', 1)
self.checkTemplateError('x', r'\11a', 'x', 'invalid group reference 11', 1)
self.checkTemplateError('x', r'\181', 'x', 'invalid group reference 18', 1)
self.checkTemplateError('x', r'\800', 'x', 'invalid group reference 80', 1)
self.checkTemplateError('x', r'\8', '', 'invalid group reference 8', 1)
# in python2.3 (etc), these loop endlessly in sre_parser.py
self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
@ -271,9 +271,9 @@ def test_symbolic_refs(self):
self.checkTemplateError('(?P<a>x)', r'\g<1a1>', 'xx',
"bad character in group name '1a1'", 3)
self.checkTemplateError('(?P<a>x)', r'\g<2>', 'xx',
'invalid group reference')
'invalid group reference 2', 3)
self.checkTemplateError('(?P<a>x)', r'\2', 'xx',
'invalid group reference')
'invalid group reference 2', 1)
with self.assertRaisesRegex(IndexError, "unknown group name 'ab'"):
re.sub('(?P<a>x)', r'\g<ab>', 'xx')
self.assertEqual(re.sub('(?P<a>x)|(?P<b>y)', r'\g<b>', 'xx'), '')
@ -558,10 +558,11 @@ def test_re_groupref_exists(self):
'two branches', 10)
def test_re_groupref_overflow(self):
self.checkTemplateError('()', r'\g<%s>' % sre_constants.MAXGROUPS, 'xx',
'invalid group reference', 3)
self.checkPatternError(r'(?P<a>)(?(%d))' % sre_constants.MAXGROUPS,
'invalid group reference', 10)
from sre_constants import MAXGROUPS
self.checkTemplateError('()', r'\g<%s>' % MAXGROUPS, 'xx',
'invalid group reference %d' % MAXGROUPS, 3)
self.checkPatternError(r'(?P<a>)(?(%d))' % MAXGROUPS,
'invalid group reference %d' % MAXGROUPS, 10)
def test_re_groupref(self):
self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
@ -1007,7 +1008,7 @@ def test_sre_character_literals(self):
self.checkPatternError(r"\567",
r'octal escape value \567 outside of '
r'range 0-0o377', 0)
self.checkPatternError(r"\911", 'invalid group reference', 0)
self.checkPatternError(r"\911", 'invalid group reference 91', 1)
self.checkPatternError(r"\x1", r'incomplete escape \x1', 0)
self.checkPatternError(r"\x1z", r'incomplete escape \x1', 0)
self.checkPatternError(r"\u123", r'incomplete escape \u123', 0)
@ -1061,7 +1062,7 @@ def test_sre_byte_literals(self):
self.checkPatternError(br"\567",
r'octal escape value \567 outside of '
r'range 0-0o377', 0)
self.checkPatternError(br"\911", 'invalid group reference', 0)
self.checkPatternError(br"\911", 'invalid group reference 91', 1)
self.checkPatternError(br"\x1", r'incomplete escape \x1', 0)
self.checkPatternError(br"\x1z", r'incomplete escape \x1', 0)

View file

@ -16,6 +16,12 @@ Core and Builtins
- Issue #23782: Fixed possible memory leak in _PyTraceback_Add() and exception
loss in PyTraceBack_Here().
- Issue #25953: re.sub() now raises an error for invalid numerical group
reference in replacement template even if the pattern is not found in
the string. Error message for invalid group reference now includes the
group index and the position of the reference.
Based on patch by SilentGhost.
- Issue #28183: Optimize and cleanup dict iteration.
- Issue #26081: Added C implementation of asyncio.Future.