mirror of
https://github.com/python/cpython.git
synced 2025-11-01 14:11:41 +00:00
Minor code clean up and improvements in the re module.
This commit is contained in:
parent
9a64ccb997
commit
ab14088141
4 changed files with 13 additions and 17 deletions
|
|
@ -363,7 +363,7 @@ def scan(self, string):
|
||||||
append = result.append
|
append = result.append
|
||||||
match = self.scanner.scanner(string).match
|
match = self.scanner.scanner(string).match
|
||||||
i = 0
|
i = 0
|
||||||
while 1:
|
while True:
|
||||||
m = match()
|
m = match()
|
||||||
if not m:
|
if not m:
|
||||||
break
|
break
|
||||||
|
|
|
||||||
|
|
@ -16,11 +16,6 @@
|
||||||
|
|
||||||
assert _sre.MAGIC == MAGIC, "SRE module mismatch"
|
assert _sre.MAGIC == MAGIC, "SRE module mismatch"
|
||||||
|
|
||||||
if _sre.CODESIZE == 2:
|
|
||||||
MAXCODE = 65535
|
|
||||||
else:
|
|
||||||
MAXCODE = 0xFFFFFFFF
|
|
||||||
|
|
||||||
_LITERAL_CODES = {LITERAL, NOT_LITERAL}
|
_LITERAL_CODES = {LITERAL, NOT_LITERAL}
|
||||||
_REPEATING_CODES = {REPEAT, MIN_REPEAT, MAX_REPEAT}
|
_REPEATING_CODES = {REPEAT, MIN_REPEAT, MAX_REPEAT}
|
||||||
_SUCCESS_CODES = {SUCCESS, FAILURE}
|
_SUCCESS_CODES = {SUCCESS, FAILURE}
|
||||||
|
|
@ -191,7 +186,7 @@ def fixup(literal, flags=flags):
|
||||||
emit(JUMP)
|
emit(JUMP)
|
||||||
tailappend(_len(code)); emit(0)
|
tailappend(_len(code)); emit(0)
|
||||||
code[skip] = _len(code) - skip
|
code[skip] = _len(code) - skip
|
||||||
emit(0) # end of branch
|
emit(FAILURE) # end of branch
|
||||||
for tail in tail:
|
for tail in tail:
|
||||||
code[tail] = _len(code) - tail
|
code[tail] = _len(code) - tail
|
||||||
elif op is CATEGORY:
|
elif op is CATEGORY:
|
||||||
|
|
@ -374,6 +369,7 @@ def _optimize_charset(charset, fixup, fixes):
|
||||||
return out
|
return out
|
||||||
|
|
||||||
_CODEBITS = _sre.CODESIZE * 8
|
_CODEBITS = _sre.CODESIZE * 8
|
||||||
|
MAXCODE = (1 << _CODEBITS) - 1
|
||||||
_BITS_TRANS = b'0' + b'1' * 255
|
_BITS_TRANS = b'0' + b'1' * 255
|
||||||
def _mk_bitmap(bits, _CODEBITS=_CODEBITS, _int=int):
|
def _mk_bitmap(bits, _CODEBITS=_CODEBITS, _int=int):
|
||||||
s = bits.translate(_BITS_TRANS)[::-1]
|
s = bits.translate(_BITS_TRANS)[::-1]
|
||||||
|
|
@ -477,9 +473,9 @@ def _compile_info(code, pattern, flags):
|
||||||
elif op is IN:
|
elif op is IN:
|
||||||
charset = av
|
charset = av
|
||||||
## if prefix:
|
## if prefix:
|
||||||
## print "*** PREFIX", prefix, prefix_skip
|
## print("*** PREFIX", prefix, prefix_skip)
|
||||||
## if charset:
|
## if charset:
|
||||||
## print "*** CHARSET", charset
|
## print("*** CHARSET", charset)
|
||||||
# add an info block
|
# add an info block
|
||||||
emit = code.append
|
emit = code.append
|
||||||
emit(INFO)
|
emit(INFO)
|
||||||
|
|
@ -489,9 +485,9 @@ def _compile_info(code, pattern, flags):
|
||||||
if prefix:
|
if prefix:
|
||||||
mask = SRE_INFO_PREFIX
|
mask = SRE_INFO_PREFIX
|
||||||
if len(prefix) == prefix_skip == len(pattern.data):
|
if len(prefix) == prefix_skip == len(pattern.data):
|
||||||
mask = mask + SRE_INFO_LITERAL
|
mask = mask | SRE_INFO_LITERAL
|
||||||
elif charset:
|
elif charset:
|
||||||
mask = mask + SRE_INFO_CHARSET
|
mask = mask | SRE_INFO_CHARSET
|
||||||
emit(mask)
|
emit(mask)
|
||||||
# pattern length
|
# pattern length
|
||||||
if lo < MAXCODE:
|
if lo < MAXCODE:
|
||||||
|
|
|
||||||
|
|
@ -103,18 +103,18 @@ def dump(self, level=0):
|
||||||
seqtypes = (tuple, list)
|
seqtypes = (tuple, list)
|
||||||
for op, av in self.data:
|
for op, av in self.data:
|
||||||
print(level*" " + str(op), end='')
|
print(level*" " + str(op), end='')
|
||||||
if op == IN:
|
if op is IN:
|
||||||
# member sublanguage
|
# member sublanguage
|
||||||
print()
|
print()
|
||||||
for op, a in av:
|
for op, a in av:
|
||||||
print((level+1)*" " + str(op), a)
|
print((level+1)*" " + str(op), a)
|
||||||
elif op == BRANCH:
|
elif op is BRANCH:
|
||||||
print()
|
print()
|
||||||
for i, a in enumerate(av[1]):
|
for i, a in enumerate(av[1]):
|
||||||
if i:
|
if i:
|
||||||
print(level*" " + "OR")
|
print(level*" " + "OR")
|
||||||
a.dump(level+1)
|
a.dump(level+1)
|
||||||
elif op == GROUPREF_EXISTS:
|
elif op is GROUPREF_EXISTS:
|
||||||
condgroup, item_yes, item_no = av
|
condgroup, item_yes, item_no = av
|
||||||
print('', condgroup)
|
print('', condgroup)
|
||||||
item_yes.dump(level+1)
|
item_yes.dump(level+1)
|
||||||
|
|
@ -607,7 +607,7 @@ def _parse(source, state):
|
||||||
item = subpattern[-1:]
|
item = subpattern[-1:]
|
||||||
else:
|
else:
|
||||||
item = None
|
item = None
|
||||||
if not item or (_len(item) == 1 and item[0][0] == AT):
|
if not item or (_len(item) == 1 and item[0][0] is AT):
|
||||||
raise source.error("nothing to repeat",
|
raise source.error("nothing to repeat",
|
||||||
source.tell() - here + len(this))
|
source.tell() - here + len(this))
|
||||||
if item[0][0] in _REPEATCODES:
|
if item[0][0] in _REPEATCODES:
|
||||||
|
|
|
||||||
|
|
@ -1101,8 +1101,8 @@ def test_empty_array(self):
|
||||||
|
|
||||||
def test_inline_flags(self):
|
def test_inline_flags(self):
|
||||||
# Bug #1700
|
# Bug #1700
|
||||||
upper_char = chr(0x1ea0) # Latin Capital Letter A with Dot Bellow
|
upper_char = '\u1ea0' # Latin Capital Letter A with Dot Below
|
||||||
lower_char = chr(0x1ea1) # Latin Small Letter A with Dot Bellow
|
lower_char = '\u1ea1' # Latin Small Letter A with Dot Below
|
||||||
|
|
||||||
p = re.compile(upper_char, re.I | re.U)
|
p = re.compile(upper_char, re.I | re.U)
|
||||||
q = p.match(lower_char)
|
q = p.match(lower_char)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue