mirror of
https://github.com/python/cpython.git
synced 2025-11-01 14:11:41 +00:00
fixed character set description in docstring (SRE uses Python
strings, not C strings) removed USE_PYTHON defines, and related sre.py helpers skip calling the subx helper if the template is callable. interestingly enough, this means that def callback(m): return literal result = pattern.sub(callback, string) is much faster than result = pattern.sub(literal, string)
This commit is contained in:
parent
0402dd18cb
commit
dac58492aa
2 changed files with 55 additions and 175 deletions
93
Lib/sre.py
93
Lib/sre.py
|
|
@ -17,15 +17,13 @@
|
|||
r"""Support for regular expressions (RE).
|
||||
|
||||
This module provides regular expression matching operations similar to
|
||||
those found in Perl. It's 8-bit clean: the strings being processed may
|
||||
contain both null bytes and characters whose high bit is set. Regular
|
||||
expression pattern strings may not contain null bytes, but can specify
|
||||
the null byte using the \\number notation. Characters with the high
|
||||
bit set may be included.
|
||||
those found in Perl. It supports both 8-bit and Unicode strings; both
|
||||
the pattern and the strings being processed can contain null bytes and
|
||||
characters outside the US ASCII range.
|
||||
|
||||
Regular expressions can contain both special and ordinary
|
||||
characters. Most ordinary characters, like "A", "a", or "0", are the
|
||||
simplest regular expressions; they simply match themselves. You can
|
||||
Regular expressions can contain both special and ordinary characters.
|
||||
Most ordinary characters, like "A", "a", or "0", are the simplest
|
||||
regular expressions; they simply match themselves. You can
|
||||
concatenate ordinary characters, so last matches the string 'last'.
|
||||
|
||||
The special characters are:
|
||||
|
|
@ -45,7 +43,7 @@
|
|||
"|" A|B, creates an RE that will match either A or B.
|
||||
(...) Matches the RE inside the parentheses.
|
||||
The contents can be retrieved or matched later in the string.
|
||||
(?iLmsx) Set the I, L, M, S, or X flag for the RE (see below).
|
||||
(?iLmsux) Set the I, L, M, S, U, or X flag for the RE (see below).
|
||||
(?:...) Non-grouping version of regular parentheses.
|
||||
(?P<name>...) The substring matched by the group is accessible by name.
|
||||
(?P=name) Matches the text matched earlier by the group named name.
|
||||
|
|
@ -54,7 +52,7 @@
|
|||
(?!...) Matches if ... doesn't match next.
|
||||
|
||||
The special sequences consist of "\\" and a character from the list
|
||||
below. If the ordinary character is not on the list, then the
|
||||
below. If the ordinary character is not on the list, then the
|
||||
resulting RE will match the second character.
|
||||
\number Matches the contents of the group of the same number.
|
||||
\A Matches only at the start of the string.
|
||||
|
|
@ -246,76 +244,13 @@ def _expand(pattern, match, template):
|
|||
|
||||
def _subx(pattern, template):
|
||||
# internal: pattern.sub/subn implementation helper
|
||||
if callable(template):
|
||||
filter = template
|
||||
else:
|
||||
template = _compile_repl(template, pattern)
|
||||
if not template[0] and len(template[1]) == 1:
|
||||
# literal replacement
|
||||
filter = template[1][0]
|
||||
else:
|
||||
def filter(match, template=template):
|
||||
return sre_parse.expand_template(template, match)
|
||||
return filter
|
||||
|
||||
def _sub(pattern, template, text, count=0):
|
||||
# internal: pattern.sub implementation hook
|
||||
# FIXME: not used in SRE 2.2.1 and later; will be removed soon
|
||||
return _subn(pattern, template, text, count)[0]
|
||||
|
||||
def _subn(pattern, template, text, count=0):
|
||||
# internal: pattern.subn implementation hook
|
||||
# FIXME: not used in SRE 2.2.1 and later; will be removed soon
|
||||
filter = _subx(pattern, template)
|
||||
if not callable(filter):
|
||||
template = _compile_repl(template, pattern)
|
||||
if not template[0] and len(template[1]) == 1:
|
||||
# literal replacement
|
||||
def filter(match, literal=filter):
|
||||
return literal
|
||||
n = i = 0
|
||||
s = []
|
||||
append = s.append
|
||||
c = pattern.scanner(text)
|
||||
while not count or n < count:
|
||||
m = c.search()
|
||||
if not m:
|
||||
break
|
||||
b, e = m.span()
|
||||
if i < b:
|
||||
append(text[i:b])
|
||||
elif i == b == e and n:
|
||||
append(text[i:b])
|
||||
continue # ignore empty match at previous position
|
||||
append(filter(m))
|
||||
i = e
|
||||
n = n + 1
|
||||
append(text[i:])
|
||||
return _join(s, text[:0]), n
|
||||
|
||||
def _split(pattern, text, maxsplit=0):
|
||||
# internal: pattern.split implementation hook
|
||||
# FIXME: not used in SRE 2.2.1 and later; will be removed soon
|
||||
n = i = 0
|
||||
s = []
|
||||
append = s.append
|
||||
extend = s.extend
|
||||
c = pattern.scanner(text)
|
||||
g = pattern.groups
|
||||
while not maxsplit or n < maxsplit:
|
||||
m = c.search()
|
||||
if not m:
|
||||
break
|
||||
b, e = m.span()
|
||||
if b == e:
|
||||
if i >= len(text):
|
||||
break
|
||||
continue
|
||||
append(text[i:b])
|
||||
if g and b != e:
|
||||
extend(list(m.groups()))
|
||||
i = e
|
||||
n = n + 1
|
||||
append(text[i:])
|
||||
return s
|
||||
return template[1][0]
|
||||
def filter(match, template=template):
|
||||
return sre_parse.expand_template(template, match)
|
||||
return filter
|
||||
|
||||
# register myself for pickling
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue