| 
									
										
										
										
											2022-04-02 11:35:13 +03:00
										 |  |  | # | 
					
						
							|  |  |  | # Secret Labs' Regular Expression Engine | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # convert re-style regular expression to sre pattern | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # See the __init__.py file for information on usage and redistribution. | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | """Internal support module for sre""" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # XXX: show string offset and offending character for all errors | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from ._constants import * | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | SPECIAL_CHARS = ".\\[{()*+?^$|" | 
					
						
							|  |  |  | REPEAT_CHARS = "*+?{" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | DIGITS = frozenset("0123456789") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | OCTDIGITS = frozenset("01234567") | 
					
						
							|  |  |  | HEXDIGITS = frozenset("0123456789abcdefABCDEF") | 
					
						
							|  |  |  | ASCIILETTERS = frozenset("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | WHITESPACE = frozenset(" \t\n\r\v\f") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | _REPEATCODES = frozenset({MIN_REPEAT, MAX_REPEAT, POSSESSIVE_REPEAT}) | 
					
						
							|  |  |  | _UNITCODES = frozenset({ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY}) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ESCAPES = { | 
					
						
							|  |  |  |     r"\a": (LITERAL, ord("\a")), | 
					
						
							|  |  |  |     r"\b": (LITERAL, ord("\b")), | 
					
						
							|  |  |  |     r"\f": (LITERAL, ord("\f")), | 
					
						
							|  |  |  |     r"\n": (LITERAL, ord("\n")), | 
					
						
							|  |  |  |     r"\r": (LITERAL, ord("\r")), | 
					
						
							|  |  |  |     r"\t": (LITERAL, ord("\t")), | 
					
						
							|  |  |  |     r"\v": (LITERAL, ord("\v")), | 
					
						
							|  |  |  |     r"\\": (LITERAL, ord("\\")) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | CATEGORIES = { | 
					
						
							|  |  |  |     r"\A": (AT, AT_BEGINNING_STRING), # start of string | 
					
						
							|  |  |  |     r"\b": (AT, AT_BOUNDARY), | 
					
						
							|  |  |  |     r"\B": (AT, AT_NON_BOUNDARY), | 
					
						
							|  |  |  |     r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]), | 
					
						
							|  |  |  |     r"\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]), | 
					
						
							|  |  |  |     r"\s": (IN, [(CATEGORY, CATEGORY_SPACE)]), | 
					
						
							|  |  |  |     r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]), | 
					
						
							|  |  |  |     r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]), | 
					
						
							|  |  |  |     r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]), | 
					
						
							|  |  |  |     r"\Z": (AT, AT_END_STRING), # end of string | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | FLAGS = { | 
					
						
							|  |  |  |     # standard flags | 
					
						
							|  |  |  |     "i": SRE_FLAG_IGNORECASE, | 
					
						
							|  |  |  |     "L": SRE_FLAG_LOCALE, | 
					
						
							|  |  |  |     "m": SRE_FLAG_MULTILINE, | 
					
						
							|  |  |  |     "s": SRE_FLAG_DOTALL, | 
					
						
							|  |  |  |     "x": SRE_FLAG_VERBOSE, | 
					
						
							|  |  |  |     # extensions | 
					
						
							|  |  |  |     "a": SRE_FLAG_ASCII, | 
					
						
							|  |  |  |     "u": SRE_FLAG_UNICODE, | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | TYPE_FLAGS = SRE_FLAG_ASCII | SRE_FLAG_LOCALE | SRE_FLAG_UNICODE | 
					
						
							| 
									
										
										
										
											2022-04-06 19:53:50 +03:00
										 |  |  | GLOBAL_FLAGS = SRE_FLAG_DEBUG | 
					
						
							| 
									
										
										
										
											2022-04-02 11:35:13 +03:00
										 |  |  | 
 | 
					
						
							|  |  |  | class Verbose(Exception): | 
					
						
							|  |  |  |     pass | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class State: | 
					
						
							|  |  |  |     # keeps track of state for parsing | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							|  |  |  |         self.flags = 0 | 
					
						
							|  |  |  |         self.groupdict = {} | 
					
						
							|  |  |  |         self.groupwidths = [None]  # group 0 | 
					
						
							|  |  |  |         self.lookbehindgroups = None | 
					
						
							|  |  |  |     @property | 
					
						
							|  |  |  |     def groups(self): | 
					
						
							|  |  |  |         return len(self.groupwidths) | 
					
						
							|  |  |  |     def opengroup(self, name=None): | 
					
						
							|  |  |  |         gid = self.groups | 
					
						
							|  |  |  |         self.groupwidths.append(None) | 
					
						
							|  |  |  |         if self.groups > MAXGROUPS: | 
					
						
							|  |  |  |             raise error("too many groups") | 
					
						
							|  |  |  |         if name is not None: | 
					
						
							|  |  |  |             ogid = self.groupdict.get(name, None) | 
					
						
							|  |  |  |             if ogid is not None: | 
					
						
							|  |  |  |                 raise error("redefinition of group name %r as group %d; " | 
					
						
							|  |  |  |                             "was group %d" % (name, gid,  ogid)) | 
					
						
							|  |  |  |             self.groupdict[name] = gid | 
					
						
							|  |  |  |         return gid | 
					
						
							|  |  |  |     def closegroup(self, gid, p): | 
					
						
							|  |  |  |         self.groupwidths[gid] = p.getwidth() | 
					
						
							|  |  |  |     def checkgroup(self, gid): | 
					
						
							|  |  |  |         return gid < self.groups and self.groupwidths[gid] is not None | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def checklookbehindgroup(self, gid, source): | 
					
						
							|  |  |  |         if self.lookbehindgroups is not None: | 
					
						
							|  |  |  |             if not self.checkgroup(gid): | 
					
						
							|  |  |  |                 raise source.error('cannot refer to an open group') | 
					
						
							|  |  |  |             if gid >= self.lookbehindgroups: | 
					
						
							|  |  |  |                 raise source.error('cannot refer to group defined in the same ' | 
					
						
							|  |  |  |                                    'lookbehind subpattern') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class SubPattern: | 
					
						
							|  |  |  |     # a subpattern, in intermediate form | 
					
						
							|  |  |  |     def __init__(self, state, data=None): | 
					
						
							|  |  |  |         self.state = state | 
					
						
							|  |  |  |         if data is None: | 
					
						
							|  |  |  |             data = [] | 
					
						
							|  |  |  |         self.data = data | 
					
						
							|  |  |  |         self.width = None | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def dump(self, level=0): | 
					
						
							|  |  |  |         nl = True | 
					
						
							|  |  |  |         seqtypes = (tuple, list) | 
					
						
							|  |  |  |         for op, av in self.data: | 
					
						
							|  |  |  |             print(level*"  " + str(op), end='') | 
					
						
							|  |  |  |             if op is IN: | 
					
						
							|  |  |  |                 # member sublanguage | 
					
						
							|  |  |  |                 print() | 
					
						
							|  |  |  |                 for op, a in av: | 
					
						
							|  |  |  |                     print((level+1)*"  " + str(op), a) | 
					
						
							|  |  |  |             elif op is BRANCH: | 
					
						
							|  |  |  |                 print() | 
					
						
							|  |  |  |                 for i, a in enumerate(av[1]): | 
					
						
							|  |  |  |                     if i: | 
					
						
							|  |  |  |                         print(level*"  " + "OR") | 
					
						
							|  |  |  |                     a.dump(level+1) | 
					
						
							|  |  |  |             elif op is GROUPREF_EXISTS: | 
					
						
							|  |  |  |                 condgroup, item_yes, item_no = av | 
					
						
							|  |  |  |                 print('', condgroup) | 
					
						
							|  |  |  |                 item_yes.dump(level+1) | 
					
						
							|  |  |  |                 if item_no: | 
					
						
							|  |  |  |                     print(level*"  " + "ELSE") | 
					
						
							|  |  |  |                     item_no.dump(level+1) | 
					
						
							|  |  |  |             elif isinstance(av, seqtypes): | 
					
						
							|  |  |  |                 nl = False | 
					
						
							|  |  |  |                 for a in av: | 
					
						
							|  |  |  |                     if isinstance(a, SubPattern): | 
					
						
							|  |  |  |                         if not nl: | 
					
						
							|  |  |  |                             print() | 
					
						
							|  |  |  |                         a.dump(level+1) | 
					
						
							|  |  |  |                         nl = True | 
					
						
							|  |  |  |                     else: | 
					
						
							|  |  |  |                         if not nl: | 
					
						
							|  |  |  |                             print(' ', end='') | 
					
						
							|  |  |  |                         print(a, end='') | 
					
						
							|  |  |  |                         nl = False | 
					
						
							|  |  |  |                 if not nl: | 
					
						
							|  |  |  |                     print() | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 print('', av) | 
					
						
							|  |  |  |     def __repr__(self): | 
					
						
							|  |  |  |         return repr(self.data) | 
					
						
							|  |  |  |     def __len__(self): | 
					
						
							|  |  |  |         return len(self.data) | 
					
						
							|  |  |  |     def __delitem__(self, index): | 
					
						
							|  |  |  |         del self.data[index] | 
					
						
							|  |  |  |     def __getitem__(self, index): | 
					
						
							|  |  |  |         if isinstance(index, slice): | 
					
						
							|  |  |  |             return SubPattern(self.state, self.data[index]) | 
					
						
							|  |  |  |         return self.data[index] | 
					
						
							|  |  |  |     def __setitem__(self, index, code): | 
					
						
							|  |  |  |         self.data[index] = code | 
					
						
							|  |  |  |     def insert(self, index, code): | 
					
						
							|  |  |  |         self.data.insert(index, code) | 
					
						
							|  |  |  |     def append(self, code): | 
					
						
							|  |  |  |         self.data.append(code) | 
					
						
							|  |  |  |     def getwidth(self): | 
					
						
							|  |  |  |         # determine the width (min, max) for this subpattern | 
					
						
							|  |  |  |         if self.width is not None: | 
					
						
							|  |  |  |             return self.width | 
					
						
							|  |  |  |         lo = hi = 0 | 
					
						
							|  |  |  |         for op, av in self.data: | 
					
						
							|  |  |  |             if op is BRANCH: | 
					
						
							|  |  |  |                 i = MAXREPEAT - 1 | 
					
						
							|  |  |  |                 j = 0 | 
					
						
							|  |  |  |                 for av in av[1]: | 
					
						
							|  |  |  |                     l, h = av.getwidth() | 
					
						
							|  |  |  |                     i = min(i, l) | 
					
						
							|  |  |  |                     j = max(j, h) | 
					
						
							|  |  |  |                 lo = lo + i | 
					
						
							|  |  |  |                 hi = hi + j | 
					
						
							|  |  |  |             elif op is CALL: | 
					
						
							|  |  |  |                 i, j = av.getwidth() | 
					
						
							|  |  |  |                 lo = lo + i | 
					
						
							|  |  |  |                 hi = hi + j | 
					
						
							|  |  |  |             elif op is ATOMIC_GROUP: | 
					
						
							|  |  |  |                 i, j = av.getwidth() | 
					
						
							|  |  |  |                 lo = lo + i | 
					
						
							|  |  |  |                 hi = hi + j | 
					
						
							|  |  |  |             elif op is SUBPATTERN: | 
					
						
							|  |  |  |                 i, j = av[-1].getwidth() | 
					
						
							|  |  |  |                 lo = lo + i | 
					
						
							|  |  |  |                 hi = hi + j | 
					
						
							|  |  |  |             elif op in _REPEATCODES: | 
					
						
							|  |  |  |                 i, j = av[2].getwidth() | 
					
						
							|  |  |  |                 lo = lo + i * av[0] | 
					
						
							|  |  |  |                 hi = hi + j * av[1] | 
					
						
							|  |  |  |             elif op in _UNITCODES: | 
					
						
							|  |  |  |                 lo = lo + 1 | 
					
						
							|  |  |  |                 hi = hi + 1 | 
					
						
							|  |  |  |             elif op is GROUPREF: | 
					
						
							|  |  |  |                 i, j = self.state.groupwidths[av] | 
					
						
							|  |  |  |                 lo = lo + i | 
					
						
							|  |  |  |                 hi = hi + j | 
					
						
							|  |  |  |             elif op is GROUPREF_EXISTS: | 
					
						
							|  |  |  |                 i, j = av[1].getwidth() | 
					
						
							|  |  |  |                 if av[2] is not None: | 
					
						
							|  |  |  |                     l, h = av[2].getwidth() | 
					
						
							|  |  |  |                     i = min(i, l) | 
					
						
							|  |  |  |                     j = max(j, h) | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     i = 0 | 
					
						
							|  |  |  |                 lo = lo + i | 
					
						
							|  |  |  |                 hi = hi + j | 
					
						
							|  |  |  |             elif op is SUCCESS: | 
					
						
							|  |  |  |                 break | 
					
						
							|  |  |  |         self.width = min(lo, MAXREPEAT - 1), min(hi, MAXREPEAT) | 
					
						
							|  |  |  |         return self.width | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class Tokenizer: | 
					
						
							|  |  |  |     def __init__(self, string): | 
					
						
							|  |  |  |         self.istext = isinstance(string, str) | 
					
						
							|  |  |  |         self.string = string | 
					
						
							|  |  |  |         if not self.istext: | 
					
						
							|  |  |  |             string = str(string, 'latin1') | 
					
						
							|  |  |  |         self.decoded_string = string | 
					
						
							|  |  |  |         self.index = 0 | 
					
						
							|  |  |  |         self.next = None | 
					
						
							|  |  |  |         self.__next() | 
					
						
							|  |  |  |     def __next(self): | 
					
						
							|  |  |  |         index = self.index | 
					
						
							|  |  |  |         try: | 
					
						
							|  |  |  |             char = self.decoded_string[index] | 
					
						
							|  |  |  |         except IndexError: | 
					
						
							|  |  |  |             self.next = None | 
					
						
							|  |  |  |             return | 
					
						
							|  |  |  |         if char == "\\": | 
					
						
							|  |  |  |             index += 1 | 
					
						
							|  |  |  |             try: | 
					
						
							|  |  |  |                 char += self.decoded_string[index] | 
					
						
							|  |  |  |             except IndexError: | 
					
						
							|  |  |  |                 raise error("bad escape (end of pattern)", | 
					
						
							|  |  |  |                             self.string, len(self.string) - 1) from None | 
					
						
							|  |  |  |         self.index = index + 1 | 
					
						
							|  |  |  |         self.next = char | 
					
						
							|  |  |  |     def match(self, char): | 
					
						
							|  |  |  |         if char == self.next: | 
					
						
							|  |  |  |             self.__next() | 
					
						
							|  |  |  |             return True | 
					
						
							|  |  |  |         return False | 
					
						
							|  |  |  |     def get(self): | 
					
						
							|  |  |  |         this = self.next | 
					
						
							|  |  |  |         self.__next() | 
					
						
							|  |  |  |         return this | 
					
						
							|  |  |  |     def getwhile(self, n, charset): | 
					
						
							|  |  |  |         result = '' | 
					
						
							|  |  |  |         for _ in range(n): | 
					
						
							|  |  |  |             c = self.next | 
					
						
							|  |  |  |             if c not in charset: | 
					
						
							|  |  |  |                 break | 
					
						
							|  |  |  |             result += c | 
					
						
							|  |  |  |             self.__next() | 
					
						
							|  |  |  |         return result | 
					
						
							|  |  |  |     def getuntil(self, terminator, name): | 
					
						
							|  |  |  |         result = '' | 
					
						
							|  |  |  |         while True: | 
					
						
							|  |  |  |             c = self.next | 
					
						
							|  |  |  |             self.__next() | 
					
						
							|  |  |  |             if c is None: | 
					
						
							|  |  |  |                 if not result: | 
					
						
							|  |  |  |                     raise self.error("missing " + name) | 
					
						
							|  |  |  |                 raise self.error("missing %s, unterminated name" % terminator, | 
					
						
							|  |  |  |                                  len(result)) | 
					
						
							|  |  |  |             if c == terminator: | 
					
						
							|  |  |  |                 if not result: | 
					
						
							|  |  |  |                     raise self.error("missing " + name, 1) | 
					
						
							|  |  |  |                 break | 
					
						
							|  |  |  |             result += c | 
					
						
							|  |  |  |         return result | 
					
						
							|  |  |  |     @property | 
					
						
							|  |  |  |     def pos(self): | 
					
						
							|  |  |  |         return self.index - len(self.next or '') | 
					
						
							|  |  |  |     def tell(self): | 
					
						
							|  |  |  |         return self.index - len(self.next or '') | 
					
						
							|  |  |  |     def seek(self, index): | 
					
						
							|  |  |  |         self.index = index | 
					
						
							|  |  |  |         self.__next() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def error(self, msg, offset=0): | 
					
						
							|  |  |  |         return error(msg, self.string, self.tell() - offset) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def _class_escape(source, escape): | 
					
						
							|  |  |  |     # handle escape code inside character class | 
					
						
							|  |  |  |     code = ESCAPES.get(escape) | 
					
						
							|  |  |  |     if code: | 
					
						
							|  |  |  |         return code | 
					
						
							|  |  |  |     code = CATEGORIES.get(escape) | 
					
						
							|  |  |  |     if code and code[0] is IN: | 
					
						
							|  |  |  |         return code | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         c = escape[1:2] | 
					
						
							|  |  |  |         if c == "x": | 
					
						
							|  |  |  |             # hexadecimal escape (exactly two digits) | 
					
						
							|  |  |  |             escape += source.getwhile(2, HEXDIGITS) | 
					
						
							|  |  |  |             if len(escape) != 4: | 
					
						
							|  |  |  |                 raise source.error("incomplete escape %s" % escape, len(escape)) | 
					
						
							|  |  |  |             return LITERAL, int(escape[2:], 16) | 
					
						
							|  |  |  |         elif c == "u" and source.istext: | 
					
						
							|  |  |  |             # unicode escape (exactly four digits) | 
					
						
							|  |  |  |             escape += source.getwhile(4, HEXDIGITS) | 
					
						
							|  |  |  |             if len(escape) != 6: | 
					
						
							|  |  |  |                 raise source.error("incomplete escape %s" % escape, len(escape)) | 
					
						
							|  |  |  |             return LITERAL, int(escape[2:], 16) | 
					
						
							|  |  |  |         elif c == "U" and source.istext: | 
					
						
							|  |  |  |             # unicode escape (exactly eight digits) | 
					
						
							|  |  |  |             escape += source.getwhile(8, HEXDIGITS) | 
					
						
							|  |  |  |             if len(escape) != 10: | 
					
						
							|  |  |  |                 raise source.error("incomplete escape %s" % escape, len(escape)) | 
					
						
							|  |  |  |             c = int(escape[2:], 16) | 
					
						
							|  |  |  |             chr(c) # raise ValueError for invalid code | 
					
						
							|  |  |  |             return LITERAL, c | 
					
						
							|  |  |  |         elif c == "N" and source.istext: | 
					
						
							|  |  |  |             import unicodedata | 
					
						
							|  |  |  |             # named unicode escape e.g. \N{EM DASH} | 
					
						
							|  |  |  |             if not source.match('{'): | 
					
						
							|  |  |  |                 raise source.error("missing {") | 
					
						
							|  |  |  |             charname = source.getuntil('}', 'character name') | 
					
						
							|  |  |  |             try: | 
					
						
							|  |  |  |                 c = ord(unicodedata.lookup(charname)) | 
					
						
							|  |  |  |             except KeyError: | 
					
						
							|  |  |  |                 raise source.error("undefined character name %r" % charname, | 
					
						
							| 
									
										
										
										
											2022-04-06 19:54:44 +03:00
										 |  |  |                                    len(charname) + len(r'\N{}')) from None | 
					
						
							| 
									
										
										
										
											2022-04-02 11:35:13 +03:00
										 |  |  |             return LITERAL, c | 
					
						
							|  |  |  |         elif c in OCTDIGITS: | 
					
						
							|  |  |  |             # octal escape (up to three digits) | 
					
						
							|  |  |  |             escape += source.getwhile(2, OCTDIGITS) | 
					
						
							|  |  |  |             c = int(escape[1:], 8) | 
					
						
							|  |  |  |             if c > 0o377: | 
					
						
							|  |  |  |                 raise source.error('octal escape value %s outside of ' | 
					
						
							|  |  |  |                                    'range 0-0o377' % escape, len(escape)) | 
					
						
							|  |  |  |             return LITERAL, c | 
					
						
							|  |  |  |         elif c in DIGITS: | 
					
						
							|  |  |  |             raise ValueError | 
					
						
							|  |  |  |         if len(escape) == 2: | 
					
						
							|  |  |  |             if c in ASCIILETTERS: | 
					
						
							|  |  |  |                 raise source.error('bad escape %s' % escape, len(escape)) | 
					
						
							|  |  |  |             return LITERAL, ord(escape[1]) | 
					
						
							|  |  |  |     except ValueError: | 
					
						
							|  |  |  |         pass | 
					
						
							|  |  |  |     raise source.error("bad escape %s" % escape, len(escape)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def _escape(source, escape, state): | 
					
						
							|  |  |  |     # handle escape code in expression | 
					
						
							|  |  |  |     code = CATEGORIES.get(escape) | 
					
						
							|  |  |  |     if code: | 
					
						
							|  |  |  |         return code | 
					
						
							|  |  |  |     code = ESCAPES.get(escape) | 
					
						
							|  |  |  |     if code: | 
					
						
							|  |  |  |         return code | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         c = escape[1:2] | 
					
						
							|  |  |  |         if c == "x": | 
					
						
							|  |  |  |             # hexadecimal escape | 
					
						
							|  |  |  |             escape += source.getwhile(2, HEXDIGITS) | 
					
						
							|  |  |  |             if len(escape) != 4: | 
					
						
							|  |  |  |                 raise source.error("incomplete escape %s" % escape, len(escape)) | 
					
						
							|  |  |  |             return LITERAL, int(escape[2:], 16) | 
					
						
							|  |  |  |         elif c == "u" and source.istext: | 
					
						
							|  |  |  |             # unicode escape (exactly four digits) | 
					
						
							|  |  |  |             escape += source.getwhile(4, HEXDIGITS) | 
					
						
							|  |  |  |             if len(escape) != 6: | 
					
						
							|  |  |  |                 raise source.error("incomplete escape %s" % escape, len(escape)) | 
					
						
							|  |  |  |             return LITERAL, int(escape[2:], 16) | 
					
						
							|  |  |  |         elif c == "U" and source.istext: | 
					
						
							|  |  |  |             # unicode escape (exactly eight digits) | 
					
						
							|  |  |  |             escape += source.getwhile(8, HEXDIGITS) | 
					
						
							|  |  |  |             if len(escape) != 10: | 
					
						
							|  |  |  |                 raise source.error("incomplete escape %s" % escape, len(escape)) | 
					
						
							|  |  |  |             c = int(escape[2:], 16) | 
					
						
							|  |  |  |             chr(c) # raise ValueError for invalid code | 
					
						
							|  |  |  |             return LITERAL, c | 
					
						
							|  |  |  |         elif c == "N" and source.istext: | 
					
						
							|  |  |  |             import unicodedata | 
					
						
							|  |  |  |             # named unicode escape e.g. \N{EM DASH} | 
					
						
							|  |  |  |             if not source.match('{'): | 
					
						
							|  |  |  |                 raise source.error("missing {") | 
					
						
							|  |  |  |             charname = source.getuntil('}', 'character name') | 
					
						
							|  |  |  |             try: | 
					
						
							|  |  |  |                 c = ord(unicodedata.lookup(charname)) | 
					
						
							|  |  |  |             except KeyError: | 
					
						
							|  |  |  |                 raise source.error("undefined character name %r" % charname, | 
					
						
							| 
									
										
										
										
											2022-04-06 19:54:44 +03:00
										 |  |  |                                    len(charname) + len(r'\N{}')) from None | 
					
						
							| 
									
										
										
										
											2022-04-02 11:35:13 +03:00
										 |  |  |             return LITERAL, c | 
					
						
							|  |  |  |         elif c == "0": | 
					
						
							|  |  |  |             # octal escape | 
					
						
							|  |  |  |             escape += source.getwhile(2, OCTDIGITS) | 
					
						
							|  |  |  |             return LITERAL, int(escape[1:], 8) | 
					
						
							|  |  |  |         elif c in DIGITS: | 
					
						
							|  |  |  |             # octal escape *or* decimal group reference (sigh) | 
					
						
							|  |  |  |             if source.next in DIGITS: | 
					
						
							|  |  |  |                 escape += source.get() | 
					
						
							|  |  |  |                 if (escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and | 
					
						
							|  |  |  |                     source.next in OCTDIGITS): | 
					
						
							|  |  |  |                     # got three octal digits; this is an octal escape | 
					
						
							|  |  |  |                     escape += source.get() | 
					
						
							|  |  |  |                     c = int(escape[1:], 8) | 
					
						
							|  |  |  |                     if c > 0o377: | 
					
						
							|  |  |  |                         raise source.error('octal escape value %s outside of ' | 
					
						
							|  |  |  |                                            'range 0-0o377' % escape, | 
					
						
							|  |  |  |                                            len(escape)) | 
					
						
							|  |  |  |                     return LITERAL, c | 
					
						
							|  |  |  |             # not an octal escape, so this is a group reference | 
					
						
							|  |  |  |             group = int(escape[1:]) | 
					
						
							|  |  |  |             if group < state.groups: | 
					
						
							|  |  |  |                 if not state.checkgroup(group): | 
					
						
							|  |  |  |                     raise source.error("cannot refer to an open group", | 
					
						
							|  |  |  |                                        len(escape)) | 
					
						
							|  |  |  |                 state.checklookbehindgroup(group, source) | 
					
						
							|  |  |  |                 return GROUPREF, group | 
					
						
							|  |  |  |             raise source.error("invalid group reference %d" % group, len(escape) - 1) | 
					
						
							|  |  |  |         if len(escape) == 2: | 
					
						
							|  |  |  |             if c in ASCIILETTERS: | 
					
						
							|  |  |  |                 raise source.error("bad escape %s" % escape, len(escape)) | 
					
						
							|  |  |  |             return LITERAL, ord(escape[1]) | 
					
						
							|  |  |  |     except ValueError: | 
					
						
							|  |  |  |         pass | 
					
						
							|  |  |  |     raise source.error("bad escape %s" % escape, len(escape)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def _uniq(items): | 
					
						
							|  |  |  |     return list(dict.fromkeys(items)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def _parse_sub(source, state, verbose, nested): | 
					
						
							|  |  |  |     # parse an alternation: a|b|c | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     items = [] | 
					
						
							|  |  |  |     itemsappend = items.append | 
					
						
							|  |  |  |     sourcematch = source.match | 
					
						
							|  |  |  |     start = source.tell() | 
					
						
							|  |  |  |     while True: | 
					
						
							|  |  |  |         itemsappend(_parse(source, state, verbose, nested + 1, | 
					
						
							|  |  |  |                            not nested and not items)) | 
					
						
							|  |  |  |         if not sourcematch("|"): | 
					
						
							|  |  |  |             break | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if len(items) == 1: | 
					
						
							|  |  |  |         return items[0] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     subpattern = SubPattern(state) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # check if all items share a common prefix | 
					
						
							|  |  |  |     while True: | 
					
						
							|  |  |  |         prefix = None | 
					
						
							|  |  |  |         for item in items: | 
					
						
							|  |  |  |             if not item: | 
					
						
							|  |  |  |                 break | 
					
						
							|  |  |  |             if prefix is None: | 
					
						
							|  |  |  |                 prefix = item[0] | 
					
						
							|  |  |  |             elif item[0] != prefix: | 
					
						
							|  |  |  |                 break | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             # all subitems start with a common "prefix". | 
					
						
							|  |  |  |             # move it out of the branch | 
					
						
							|  |  |  |             for item in items: | 
					
						
							|  |  |  |                 del item[0] | 
					
						
							|  |  |  |             subpattern.append(prefix) | 
					
						
							|  |  |  |             continue # check next one | 
					
						
							|  |  |  |         break | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # check if the branch can be replaced by a character set | 
					
						
							|  |  |  |     set = [] | 
					
						
							|  |  |  |     for item in items: | 
					
						
							|  |  |  |         if len(item) != 1: | 
					
						
							|  |  |  |             break | 
					
						
							|  |  |  |         op, av = item[0] | 
					
						
							|  |  |  |         if op is LITERAL: | 
					
						
							|  |  |  |             set.append((op, av)) | 
					
						
							|  |  |  |         elif op is IN and av[0][0] is not NEGATE: | 
					
						
							|  |  |  |             set.extend(av) | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             break | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         # we can store this as a character set instead of a | 
					
						
							|  |  |  |         # branch (the compiler may optimize this even more) | 
					
						
							|  |  |  |         subpattern.append((IN, _uniq(set))) | 
					
						
							|  |  |  |         return subpattern | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     subpattern.append((BRANCH, (None, items))) | 
					
						
							|  |  |  |     return subpattern | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def _parse(source, state, verbose, nested, first=False): | 
					
						
							|  |  |  |     # parse a simple pattern | 
					
						
							|  |  |  |     subpattern = SubPattern(state) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # precompute constants into local variables | 
					
						
							|  |  |  |     subpatternappend = subpattern.append | 
					
						
							|  |  |  |     sourceget = source.get | 
					
						
							|  |  |  |     sourcematch = source.match | 
					
						
							|  |  |  |     _len = len | 
					
						
							|  |  |  |     _ord = ord | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     while True: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         this = source.next | 
					
						
							|  |  |  |         if this is None: | 
					
						
							|  |  |  |             break # end of pattern | 
					
						
							|  |  |  |         if this in "|)": | 
					
						
							|  |  |  |             break # end of subpattern | 
					
						
							|  |  |  |         sourceget() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if verbose: | 
					
						
							|  |  |  |             # skip whitespace and comments | 
					
						
							|  |  |  |             if this in WHITESPACE: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             if this == "#": | 
					
						
							|  |  |  |                 while True: | 
					
						
							|  |  |  |                     this = sourceget() | 
					
						
							|  |  |  |                     if this is None or this == "\n": | 
					
						
							|  |  |  |                         break | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if this[0] == "\\": | 
					
						
							|  |  |  |             code = _escape(source, this, state) | 
					
						
							|  |  |  |             subpatternappend(code) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         elif this not in SPECIAL_CHARS: | 
					
						
							|  |  |  |             subpatternappend((LITERAL, _ord(this))) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         elif this == "[": | 
					
						
							|  |  |  |             here = source.tell() - 1 | 
					
						
							|  |  |  |             # character set | 
					
						
							|  |  |  |             set = [] | 
					
						
							|  |  |  |             setappend = set.append | 
					
						
							|  |  |  | ##          if sourcematch(":"): | 
					
						
							|  |  |  | ##              pass # handle character classes | 
					
						
							|  |  |  |             if source.next == '[': | 
					
						
							|  |  |  |                 import warnings | 
					
						
							|  |  |  |                 warnings.warn( | 
					
						
							|  |  |  |                     'Possible nested set at position %d' % source.tell(), | 
					
						
							|  |  |  |                     FutureWarning, stacklevel=nested + 6 | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  |             negate = sourcematch("^") | 
					
						
							|  |  |  |             # check remaining characters | 
					
						
							|  |  |  |             while True: | 
					
						
							|  |  |  |                 this = sourceget() | 
					
						
							|  |  |  |                 if this is None: | 
					
						
							|  |  |  |                     raise source.error("unterminated character set", | 
					
						
							|  |  |  |                                        source.tell() - here) | 
					
						
							|  |  |  |                 if this == "]" and set: | 
					
						
							|  |  |  |                     break | 
					
						
							|  |  |  |                 elif this[0] == "\\": | 
					
						
							|  |  |  |                     code1 = _class_escape(source, this) | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     if set and this in '-&~|' and source.next == this: | 
					
						
							|  |  |  |                         import warnings | 
					
						
							|  |  |  |                         warnings.warn( | 
					
						
							|  |  |  |                             'Possible set %s at position %d' % ( | 
					
						
							|  |  |  |                                 'difference' if this == '-' else | 
					
						
							|  |  |  |                                 'intersection' if this == '&' else | 
					
						
							|  |  |  |                                 'symmetric difference' if this == '~' else | 
					
						
							|  |  |  |                                 'union', | 
					
						
							|  |  |  |                                 source.tell() - 1), | 
					
						
							|  |  |  |                             FutureWarning, stacklevel=nested + 6 | 
					
						
							|  |  |  |                         ) | 
					
						
							|  |  |  |                     code1 = LITERAL, _ord(this) | 
					
						
							|  |  |  |                 if sourcematch("-"): | 
					
						
							|  |  |  |                     # potential range | 
					
						
							|  |  |  |                     that = sourceget() | 
					
						
							|  |  |  |                     if that is None: | 
					
						
							|  |  |  |                         raise source.error("unterminated character set", | 
					
						
							|  |  |  |                                            source.tell() - here) | 
					
						
							|  |  |  |                     if that == "]": | 
					
						
							|  |  |  |                         if code1[0] is IN: | 
					
						
							|  |  |  |                             code1 = code1[1][0] | 
					
						
							|  |  |  |                         setappend(code1) | 
					
						
							|  |  |  |                         setappend((LITERAL, _ord("-"))) | 
					
						
							|  |  |  |                         break | 
					
						
							|  |  |  |                     if that[0] == "\\": | 
					
						
							|  |  |  |                         code2 = _class_escape(source, that) | 
					
						
							|  |  |  |                     else: | 
					
						
							|  |  |  |                         if that == '-': | 
					
						
							|  |  |  |                             import warnings | 
					
						
							|  |  |  |                             warnings.warn( | 
					
						
							|  |  |  |                                 'Possible set difference at position %d' % ( | 
					
						
							|  |  |  |                                     source.tell() - 2), | 
					
						
							|  |  |  |                                 FutureWarning, stacklevel=nested + 6 | 
					
						
							|  |  |  |                             ) | 
					
						
							|  |  |  |                         code2 = LITERAL, _ord(that) | 
					
						
							|  |  |  |                     if code1[0] != LITERAL or code2[0] != LITERAL: | 
					
						
							|  |  |  |                         msg = "bad character range %s-%s" % (this, that) | 
					
						
							|  |  |  |                         raise source.error(msg, len(this) + 1 + len(that)) | 
					
						
							|  |  |  |                     lo = code1[1] | 
					
						
							|  |  |  |                     hi = code2[1] | 
					
						
							|  |  |  |                     if hi < lo: | 
					
						
							|  |  |  |                         msg = "bad character range %s-%s" % (this, that) | 
					
						
							|  |  |  |                         raise source.error(msg, len(this) + 1 + len(that)) | 
					
						
							|  |  |  |                     setappend((RANGE, (lo, hi))) | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     if code1[0] is IN: | 
					
						
							|  |  |  |                         code1 = code1[1][0] | 
					
						
							|  |  |  |                     setappend(code1) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             set = _uniq(set) | 
					
						
							|  |  |  |             # XXX: <fl> should move set optimization to compiler! | 
					
						
							|  |  |  |             if _len(set) == 1 and set[0][0] is LITERAL: | 
					
						
							|  |  |  |                 # optimization | 
					
						
							|  |  |  |                 if negate: | 
					
						
							|  |  |  |                     subpatternappend((NOT_LITERAL, set[0][1])) | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     subpatternappend(set[0]) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 if negate: | 
					
						
							|  |  |  |                     set.insert(0, (NEGATE, None)) | 
					
						
							|  |  |  |                 # charmap optimization can't be added here because | 
					
						
							|  |  |  |                 # global flags still are not known | 
					
						
							|  |  |  |                 subpatternappend((IN, set)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         elif this in REPEAT_CHARS: | 
					
						
							|  |  |  |             # repeat previous item | 
					
						
							|  |  |  |             here = source.tell() | 
					
						
							|  |  |  |             if this == "?": | 
					
						
							|  |  |  |                 min, max = 0, 1 | 
					
						
							|  |  |  |             elif this == "*": | 
					
						
							|  |  |  |                 min, max = 0, MAXREPEAT | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             elif this == "+": | 
					
						
							|  |  |  |                 min, max = 1, MAXREPEAT | 
					
						
							|  |  |  |             elif this == "{": | 
					
						
							|  |  |  |                 if source.next == "}": | 
					
						
							|  |  |  |                     subpatternappend((LITERAL, _ord(this))) | 
					
						
							|  |  |  |                     continue | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 min, max = 0, MAXREPEAT | 
					
						
							|  |  |  |                 lo = hi = "" | 
					
						
							|  |  |  |                 while source.next in DIGITS: | 
					
						
							|  |  |  |                     lo += sourceget() | 
					
						
							|  |  |  |                 if sourcematch(","): | 
					
						
							|  |  |  |                     while source.next in DIGITS: | 
					
						
							|  |  |  |                         hi += sourceget() | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     hi = lo | 
					
						
							|  |  |  |                 if not sourcematch("}"): | 
					
						
							|  |  |  |                     subpatternappend((LITERAL, _ord(this))) | 
					
						
							|  |  |  |                     source.seek(here) | 
					
						
							|  |  |  |                     continue | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 if lo: | 
					
						
							|  |  |  |                     min = int(lo) | 
					
						
							|  |  |  |                     if min >= MAXREPEAT: | 
					
						
							|  |  |  |                         raise OverflowError("the repetition number is too large") | 
					
						
							|  |  |  |                 if hi: | 
					
						
							|  |  |  |                     max = int(hi) | 
					
						
							|  |  |  |                     if max >= MAXREPEAT: | 
					
						
							|  |  |  |                         raise OverflowError("the repetition number is too large") | 
					
						
							|  |  |  |                     if max < min: | 
					
						
							|  |  |  |                         raise source.error("min repeat greater than max repeat", | 
					
						
							|  |  |  |                                            source.tell() - here) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 raise AssertionError("unsupported quantifier %r" % (char,)) | 
					
						
							|  |  |  |             # figure out which item to repeat | 
					
						
							|  |  |  |             if subpattern: | 
					
						
							|  |  |  |                 item = subpattern[-1:] | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 item = None | 
					
						
							|  |  |  |             if not item or item[0][0] is AT: | 
					
						
							|  |  |  |                 raise source.error("nothing to repeat", | 
					
						
							|  |  |  |                                    source.tell() - here + len(this)) | 
					
						
							|  |  |  |             if item[0][0] in _REPEATCODES: | 
					
						
							|  |  |  |                 raise source.error("multiple repeat", | 
					
						
							|  |  |  |                                    source.tell() - here + len(this)) | 
					
						
							|  |  |  |             if item[0][0] is SUBPATTERN: | 
					
						
							|  |  |  |                 group, add_flags, del_flags, p = item[0][1] | 
					
						
							|  |  |  |                 if group is None and not add_flags and not del_flags: | 
					
						
							|  |  |  |                     item = p | 
					
						
							|  |  |  |             if sourcematch("?"): | 
					
						
							|  |  |  |                 # Non-Greedy Match | 
					
						
							|  |  |  |                 subpattern[-1] = (MIN_REPEAT, (min, max, item)) | 
					
						
							|  |  |  |             elif sourcematch("+"): | 
					
						
							|  |  |  |                 # Possessive Match (Always Greedy) | 
					
						
							|  |  |  |                 subpattern[-1] = (POSSESSIVE_REPEAT, (min, max, item)) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 # Greedy Match | 
					
						
							|  |  |  |                 subpattern[-1] = (MAX_REPEAT, (min, max, item)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         elif this == ".": | 
					
						
							|  |  |  |             subpatternappend((ANY, None)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         elif this == "(": | 
					
						
							|  |  |  |             start = source.tell() - 1 | 
					
						
							|  |  |  |             capture = True | 
					
						
							|  |  |  |             atomic = False | 
					
						
							|  |  |  |             name = None | 
					
						
							|  |  |  |             add_flags = 0 | 
					
						
							|  |  |  |             del_flags = 0 | 
					
						
							|  |  |  |             if sourcematch("?"): | 
					
						
							|  |  |  |                 # options | 
					
						
							|  |  |  |                 char = sourceget() | 
					
						
							|  |  |  |                 if char is None: | 
					
						
							|  |  |  |                     raise source.error("unexpected end of pattern") | 
					
						
							|  |  |  |                 if char == "P": | 
					
						
							|  |  |  |                     # python extensions | 
					
						
							|  |  |  |                     if sourcematch("<"): | 
					
						
							|  |  |  |                         # named group: skip forward to end of name | 
					
						
							|  |  |  |                         name = source.getuntil(">", "group name") | 
					
						
							|  |  |  |                         if not name.isidentifier(): | 
					
						
							|  |  |  |                             msg = "bad character in group name %r" % name | 
					
						
							|  |  |  |                             raise source.error(msg, len(name) + 1) | 
					
						
							|  |  |  |                     elif sourcematch("="): | 
					
						
							|  |  |  |                         # named backreference | 
					
						
							|  |  |  |                         name = source.getuntil(")", "group name") | 
					
						
							|  |  |  |                         if not name.isidentifier(): | 
					
						
							|  |  |  |                             msg = "bad character in group name %r" % name | 
					
						
							|  |  |  |                             raise source.error(msg, len(name) + 1) | 
					
						
							|  |  |  |                         gid = state.groupdict.get(name) | 
					
						
							|  |  |  |                         if gid is None: | 
					
						
							|  |  |  |                             msg = "unknown group name %r" % name | 
					
						
							|  |  |  |                             raise source.error(msg, len(name) + 1) | 
					
						
							|  |  |  |                         if not state.checkgroup(gid): | 
					
						
							|  |  |  |                             raise source.error("cannot refer to an open group", | 
					
						
							|  |  |  |                                                len(name) + 1) | 
					
						
							|  |  |  |                         state.checklookbehindgroup(gid, source) | 
					
						
							|  |  |  |                         subpatternappend((GROUPREF, gid)) | 
					
						
							|  |  |  |                         continue | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                     else: | 
					
						
							|  |  |  |                         char = sourceget() | 
					
						
							|  |  |  |                         if char is None: | 
					
						
							|  |  |  |                             raise source.error("unexpected end of pattern") | 
					
						
							|  |  |  |                         raise source.error("unknown extension ?P" + char, | 
					
						
							|  |  |  |                                            len(char) + 2) | 
					
						
							|  |  |  |                 elif char == ":": | 
					
						
							|  |  |  |                     # non-capturing group | 
					
						
							|  |  |  |                     capture = False | 
					
						
							|  |  |  |                 elif char == "#": | 
					
						
							|  |  |  |                     # comment | 
					
						
							|  |  |  |                     while True: | 
					
						
							|  |  |  |                         if source.next is None: | 
					
						
							|  |  |  |                             raise source.error("missing ), unterminated comment", | 
					
						
							|  |  |  |                                                source.tell() - start) | 
					
						
							|  |  |  |                         if sourceget() == ")": | 
					
						
							|  |  |  |                             break | 
					
						
							|  |  |  |                     continue | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 elif char in "=!<": | 
					
						
							|  |  |  |                     # lookahead assertions | 
					
						
							|  |  |  |                     dir = 1 | 
					
						
							|  |  |  |                     if char == "<": | 
					
						
							|  |  |  |                         char = sourceget() | 
					
						
							|  |  |  |                         if char is None: | 
					
						
							|  |  |  |                             raise source.error("unexpected end of pattern") | 
					
						
							|  |  |  |                         if char not in "=!": | 
					
						
							|  |  |  |                             raise source.error("unknown extension ?<" + char, | 
					
						
							|  |  |  |                                                len(char) + 2) | 
					
						
							|  |  |  |                         dir = -1 # lookbehind | 
					
						
							|  |  |  |                         lookbehindgroups = state.lookbehindgroups | 
					
						
							|  |  |  |                         if lookbehindgroups is None: | 
					
						
							|  |  |  |                             state.lookbehindgroups = state.groups | 
					
						
							|  |  |  |                     p = _parse_sub(source, state, verbose, nested + 1) | 
					
						
							|  |  |  |                     if dir < 0: | 
					
						
							|  |  |  |                         if lookbehindgroups is None: | 
					
						
							|  |  |  |                             state.lookbehindgroups = None | 
					
						
							|  |  |  |                     if not sourcematch(")"): | 
					
						
							|  |  |  |                         raise source.error("missing ), unterminated subpattern", | 
					
						
							|  |  |  |                                            source.tell() - start) | 
					
						
							|  |  |  |                     if char == "=": | 
					
						
							|  |  |  |                         subpatternappend((ASSERT, (dir, p))) | 
					
						
							|  |  |  |                     else: | 
					
						
							|  |  |  |                         subpatternappend((ASSERT_NOT, (dir, p))) | 
					
						
							|  |  |  |                     continue | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 elif char == "(": | 
					
						
							|  |  |  |                     # conditional backreference group | 
					
						
							|  |  |  |                     condname = source.getuntil(")", "group name") | 
					
						
							|  |  |  |                     if condname.isidentifier(): | 
					
						
							|  |  |  |                         condgroup = state.groupdict.get(condname) | 
					
						
							|  |  |  |                         if condgroup is None: | 
					
						
							|  |  |  |                             msg = "unknown group name %r" % condname | 
					
						
							|  |  |  |                             raise source.error(msg, len(condname) + 1) | 
					
						
							|  |  |  |                     else: | 
					
						
							|  |  |  |                         try: | 
					
						
							|  |  |  |                             condgroup = int(condname) | 
					
						
							|  |  |  |                             if condgroup < 0: | 
					
						
							|  |  |  |                                 raise ValueError | 
					
						
							|  |  |  |                         except ValueError: | 
					
						
							|  |  |  |                             msg = "bad character in group name %r" % condname | 
					
						
							|  |  |  |                             raise source.error(msg, len(condname) + 1) from None | 
					
						
							|  |  |  |                         if not condgroup: | 
					
						
							|  |  |  |                             raise source.error("bad group number", | 
					
						
							|  |  |  |                                                len(condname) + 1) | 
					
						
							|  |  |  |                         if condgroup >= MAXGROUPS: | 
					
						
							|  |  |  |                             msg = "invalid group reference %d" % condgroup | 
					
						
							|  |  |  |                             raise source.error(msg, len(condname) + 1) | 
					
						
							|  |  |  |                     state.checklookbehindgroup(condgroup, source) | 
					
						
							|  |  |  |                     item_yes = _parse(source, state, verbose, nested + 1) | 
					
						
							|  |  |  |                     if source.match("|"): | 
					
						
							|  |  |  |                         item_no = _parse(source, state, verbose, nested + 1) | 
					
						
							|  |  |  |                         if source.next == "|": | 
					
						
							|  |  |  |                             raise source.error("conditional backref with more than two branches") | 
					
						
							|  |  |  |                     else: | 
					
						
							|  |  |  |                         item_no = None | 
					
						
							|  |  |  |                     if not source.match(")"): | 
					
						
							|  |  |  |                         raise source.error("missing ), unterminated subpattern", | 
					
						
							|  |  |  |                                            source.tell() - start) | 
					
						
							|  |  |  |                     subpatternappend((GROUPREF_EXISTS, (condgroup, item_yes, item_no))) | 
					
						
							|  |  |  |                     continue | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 elif char == ">": | 
					
						
							|  |  |  |                     # non-capturing, atomic group | 
					
						
							|  |  |  |                     capture = False | 
					
						
							|  |  |  |                     atomic = True | 
					
						
							|  |  |  |                 elif char in FLAGS or char == "-": | 
					
						
							|  |  |  |                     # flags | 
					
						
							|  |  |  |                     flags = _parse_flags(source, state, char) | 
					
						
							|  |  |  |                     if flags is None:  # global flags | 
					
						
							|  |  |  |                         if not first or subpattern: | 
					
						
							|  |  |  |                             raise source.error('global flags not at the start ' | 
					
						
							|  |  |  |                                                'of the expression', | 
					
						
							|  |  |  |                                                source.tell() - start) | 
					
						
							|  |  |  |                         if (state.flags & SRE_FLAG_VERBOSE) and not verbose: | 
					
						
							|  |  |  |                             raise Verbose | 
					
						
							|  |  |  |                         continue | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                     add_flags, del_flags = flags | 
					
						
							|  |  |  |                     capture = False | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     raise source.error("unknown extension ?" + char, | 
					
						
							|  |  |  |                                        len(char) + 1) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # parse group contents | 
					
						
							|  |  |  |             if capture: | 
					
						
							|  |  |  |                 try: | 
					
						
							|  |  |  |                     group = state.opengroup(name) | 
					
						
							|  |  |  |                 except error as err: | 
					
						
							|  |  |  |                     raise source.error(err.msg, len(name) + 1) from None | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 group = None | 
					
						
							|  |  |  |             sub_verbose = ((verbose or (add_flags & SRE_FLAG_VERBOSE)) and | 
					
						
							|  |  |  |                            not (del_flags & SRE_FLAG_VERBOSE)) | 
					
						
							|  |  |  |             p = _parse_sub(source, state, sub_verbose, nested + 1) | 
					
						
							|  |  |  |             if not source.match(")"): | 
					
						
							|  |  |  |                 raise source.error("missing ), unterminated subpattern", | 
					
						
							|  |  |  |                                    source.tell() - start) | 
					
						
							|  |  |  |             if group is not None: | 
					
						
							|  |  |  |                 state.closegroup(group, p) | 
					
						
							|  |  |  |             if atomic: | 
					
						
							|  |  |  |                 assert group is None | 
					
						
							|  |  |  |                 subpatternappend((ATOMIC_GROUP, p)) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 subpatternappend((SUBPATTERN, (group, add_flags, del_flags, p))) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         elif this == "^": | 
					
						
							|  |  |  |             subpatternappend((AT, AT_BEGINNING)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         elif this == "$": | 
					
						
							|  |  |  |             subpatternappend((AT, AT_END)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             raise AssertionError("unsupported special character %r" % (char,)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # unpack non-capturing groups | 
					
						
							|  |  |  |     for i in range(len(subpattern))[::-1]: | 
					
						
							|  |  |  |         op, av = subpattern[i] | 
					
						
							|  |  |  |         if op is SUBPATTERN: | 
					
						
							|  |  |  |             group, add_flags, del_flags, p = av | 
					
						
							|  |  |  |             if group is None and not add_flags and not del_flags: | 
					
						
							|  |  |  |                 subpattern[i: i+1] = p | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return subpattern | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def _parse_flags(source, state, char): | 
					
						
							|  |  |  |     sourceget = source.get | 
					
						
							|  |  |  |     add_flags = 0 | 
					
						
							|  |  |  |     del_flags = 0 | 
					
						
							|  |  |  |     if char != "-": | 
					
						
							|  |  |  |         while True: | 
					
						
							|  |  |  |             flag = FLAGS[char] | 
					
						
							|  |  |  |             if source.istext: | 
					
						
							|  |  |  |                 if char == 'L': | 
					
						
							|  |  |  |                     msg = "bad inline flags: cannot use 'L' flag with a str pattern" | 
					
						
							|  |  |  |                     raise source.error(msg) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 if char == 'u': | 
					
						
							|  |  |  |                     msg = "bad inline flags: cannot use 'u' flag with a bytes pattern" | 
					
						
							|  |  |  |                     raise source.error(msg) | 
					
						
							|  |  |  |             add_flags |= flag | 
					
						
							|  |  |  |             if (flag & TYPE_FLAGS) and (add_flags & TYPE_FLAGS) != flag: | 
					
						
							|  |  |  |                 msg = "bad inline flags: flags 'a', 'u' and 'L' are incompatible" | 
					
						
							|  |  |  |                 raise source.error(msg) | 
					
						
							|  |  |  |             char = sourceget() | 
					
						
							|  |  |  |             if char is None: | 
					
						
							|  |  |  |                 raise source.error("missing -, : or )") | 
					
						
							|  |  |  |             if char in ")-:": | 
					
						
							|  |  |  |                 break | 
					
						
							|  |  |  |             if char not in FLAGS: | 
					
						
							|  |  |  |                 msg = "unknown flag" if char.isalpha() else "missing -, : or )" | 
					
						
							|  |  |  |                 raise source.error(msg, len(char)) | 
					
						
							|  |  |  |     if char == ")": | 
					
						
							|  |  |  |         state.flags |= add_flags | 
					
						
							|  |  |  |         return None | 
					
						
							|  |  |  |     if add_flags & GLOBAL_FLAGS: | 
					
						
							|  |  |  |         raise source.error("bad inline flags: cannot turn on global flag", 1) | 
					
						
							|  |  |  |     if char == "-": | 
					
						
							|  |  |  |         char = sourceget() | 
					
						
							|  |  |  |         if char is None: | 
					
						
							|  |  |  |             raise source.error("missing flag") | 
					
						
							|  |  |  |         if char not in FLAGS: | 
					
						
							|  |  |  |             msg = "unknown flag" if char.isalpha() else "missing flag" | 
					
						
							|  |  |  |             raise source.error(msg, len(char)) | 
					
						
							|  |  |  |         while True: | 
					
						
							|  |  |  |             flag = FLAGS[char] | 
					
						
							|  |  |  |             if flag & TYPE_FLAGS: | 
					
						
							|  |  |  |                 msg = "bad inline flags: cannot turn off flags 'a', 'u' and 'L'" | 
					
						
							|  |  |  |                 raise source.error(msg) | 
					
						
							|  |  |  |             del_flags |= flag | 
					
						
							|  |  |  |             char = sourceget() | 
					
						
							|  |  |  |             if char is None: | 
					
						
							|  |  |  |                 raise source.error("missing :") | 
					
						
							|  |  |  |             if char == ":": | 
					
						
							|  |  |  |                 break | 
					
						
							|  |  |  |             if char not in FLAGS: | 
					
						
							|  |  |  |                 msg = "unknown flag" if char.isalpha() else "missing :" | 
					
						
							|  |  |  |                 raise source.error(msg, len(char)) | 
					
						
							|  |  |  |     assert char == ":" | 
					
						
							|  |  |  |     if del_flags & GLOBAL_FLAGS: | 
					
						
							|  |  |  |         raise source.error("bad inline flags: cannot turn off global flag", 1) | 
					
						
							|  |  |  |     if add_flags & del_flags: | 
					
						
							|  |  |  |         raise source.error("bad inline flags: flag turned on and off", 1) | 
					
						
							|  |  |  |     return add_flags, del_flags | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def fix_flags(src, flags): | 
					
						
							|  |  |  |     # Check and fix flags according to the type of pattern (str or bytes) | 
					
						
							|  |  |  |     if isinstance(src, str): | 
					
						
							|  |  |  |         if flags & SRE_FLAG_LOCALE: | 
					
						
							|  |  |  |             raise ValueError("cannot use LOCALE flag with a str pattern") | 
					
						
							|  |  |  |         if not flags & SRE_FLAG_ASCII: | 
					
						
							|  |  |  |             flags |= SRE_FLAG_UNICODE | 
					
						
							|  |  |  |         elif flags & SRE_FLAG_UNICODE: | 
					
						
							|  |  |  |             raise ValueError("ASCII and UNICODE flags are incompatible") | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         if flags & SRE_FLAG_UNICODE: | 
					
						
							|  |  |  |             raise ValueError("cannot use UNICODE flag with a bytes pattern") | 
					
						
							|  |  |  |         if flags & SRE_FLAG_LOCALE and flags & SRE_FLAG_ASCII: | 
					
						
							|  |  |  |             raise ValueError("ASCII and LOCALE flags are incompatible") | 
					
						
							|  |  |  |     return flags | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def parse(str, flags=0, state=None): | 
					
						
							|  |  |  |     # parse 're' pattern into list of (opcode, argument) tuples | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     source = Tokenizer(str) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if state is None: | 
					
						
							|  |  |  |         state = State() | 
					
						
							|  |  |  |     state.flags = flags | 
					
						
							|  |  |  |     state.str = str | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         p = _parse_sub(source, state, flags & SRE_FLAG_VERBOSE, 0) | 
					
						
							|  |  |  |     except Verbose: | 
					
						
							|  |  |  |         # the VERBOSE flag was switched on inside the pattern.  to be | 
					
						
							|  |  |  |         # on the safe side, we'll parse the whole thing again... | 
					
						
							|  |  |  |         state = State() | 
					
						
							|  |  |  |         state.flags = flags | SRE_FLAG_VERBOSE | 
					
						
							|  |  |  |         state.str = str | 
					
						
							|  |  |  |         source.seek(0) | 
					
						
							|  |  |  |         p = _parse_sub(source, state, True, 0) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     p.state.flags = fix_flags(str, p.state.flags) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if source.next is not None: | 
					
						
							|  |  |  |         assert source.next == ")" | 
					
						
							|  |  |  |         raise source.error("unbalanced parenthesis") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if flags & SRE_FLAG_DEBUG: | 
					
						
							|  |  |  |         p.dump() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return p | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def parse_template(source, state): | 
					
						
							|  |  |  |     # parse 're' replacement string into list of literals and | 
					
						
							|  |  |  |     # group references | 
					
						
							|  |  |  |     s = Tokenizer(source) | 
					
						
							|  |  |  |     sget = s.get | 
					
						
							|  |  |  |     groups = [] | 
					
						
							|  |  |  |     literals = [] | 
					
						
							|  |  |  |     literal = [] | 
					
						
							|  |  |  |     lappend = literal.append | 
					
						
							|  |  |  |     def addgroup(index, pos): | 
					
						
							|  |  |  |         if index > state.groups: | 
					
						
							|  |  |  |             raise s.error("invalid group reference %d" % index, pos) | 
					
						
							|  |  |  |         if literal: | 
					
						
							|  |  |  |             literals.append(''.join(literal)) | 
					
						
							|  |  |  |             del literal[:] | 
					
						
							|  |  |  |         groups.append((len(literals), index)) | 
					
						
							|  |  |  |         literals.append(None) | 
					
						
							|  |  |  |     groupindex = state.groupindex | 
					
						
							|  |  |  |     while True: | 
					
						
							|  |  |  |         this = sget() | 
					
						
							|  |  |  |         if this is None: | 
					
						
							|  |  |  |             break # end of replacement string | 
					
						
							|  |  |  |         if this[0] == "\\": | 
					
						
							|  |  |  |             # group | 
					
						
							|  |  |  |             c = this[1] | 
					
						
							|  |  |  |             if c == "g": | 
					
						
							|  |  |  |                 name = "" | 
					
						
							|  |  |  |                 if not s.match("<"): | 
					
						
							|  |  |  |                     raise s.error("missing <") | 
					
						
							|  |  |  |                 name = s.getuntil(">", "group name") | 
					
						
							|  |  |  |                 if name.isidentifier(): | 
					
						
							|  |  |  |                     try: | 
					
						
							|  |  |  |                         index = groupindex[name] | 
					
						
							|  |  |  |                     except KeyError: | 
					
						
							| 
									
										
										
										
											2022-04-06 19:54:44 +03:00
										 |  |  |                         raise IndexError("unknown group name %r" % name) from None | 
					
						
							| 
									
										
										
										
											2022-04-02 11:35:13 +03:00
										 |  |  |                 else: | 
					
						
							|  |  |  |                     try: | 
					
						
							|  |  |  |                         index = int(name) | 
					
						
							|  |  |  |                         if index < 0: | 
					
						
							|  |  |  |                             raise ValueError | 
					
						
							|  |  |  |                     except ValueError: | 
					
						
							|  |  |  |                         raise s.error("bad character in group name %r" % name, | 
					
						
							|  |  |  |                                       len(name) + 1) from None | 
					
						
							|  |  |  |                     if index >= MAXGROUPS: | 
					
						
							|  |  |  |                         raise s.error("invalid group reference %d" % index, | 
					
						
							|  |  |  |                                       len(name) + 1) | 
					
						
							|  |  |  |                 addgroup(index, len(name) + 1) | 
					
						
							|  |  |  |             elif c == "0": | 
					
						
							|  |  |  |                 if s.next in OCTDIGITS: | 
					
						
							|  |  |  |                     this += sget() | 
					
						
							|  |  |  |                     if s.next in OCTDIGITS: | 
					
						
							|  |  |  |                         this += sget() | 
					
						
							|  |  |  |                 lappend(chr(int(this[1:], 8) & 0xff)) | 
					
						
							|  |  |  |             elif c in DIGITS: | 
					
						
							|  |  |  |                 isoctal = False | 
					
						
							|  |  |  |                 if s.next in DIGITS: | 
					
						
							|  |  |  |                     this += sget() | 
					
						
							|  |  |  |                     if (c in OCTDIGITS and this[2] in OCTDIGITS and | 
					
						
							|  |  |  |                         s.next in OCTDIGITS): | 
					
						
							|  |  |  |                         this += sget() | 
					
						
							|  |  |  |                         isoctal = True | 
					
						
							|  |  |  |                         c = int(this[1:], 8) | 
					
						
							|  |  |  |                         if c > 0o377: | 
					
						
							|  |  |  |                             raise s.error('octal escape value %s outside of ' | 
					
						
							|  |  |  |                                           'range 0-0o377' % this, len(this)) | 
					
						
							|  |  |  |                         lappend(chr(c)) | 
					
						
							|  |  |  |                 if not isoctal: | 
					
						
							|  |  |  |                     addgroup(int(this[1:]), len(this) - 1) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 try: | 
					
						
							|  |  |  |                     this = chr(ESCAPES[this][1]) | 
					
						
							|  |  |  |                 except KeyError: | 
					
						
							|  |  |  |                     if c in ASCIILETTERS: | 
					
						
							| 
									
										
										
										
											2022-04-06 19:54:44 +03:00
										 |  |  |                         raise s.error('bad escape %s' % this, len(this)) from None | 
					
						
							| 
									
										
										
										
											2022-04-02 11:35:13 +03:00
										 |  |  |                 lappend(this) | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             lappend(this) | 
					
						
							|  |  |  |     if literal: | 
					
						
							|  |  |  |         literals.append(''.join(literal)) | 
					
						
							|  |  |  |     if not isinstance(source, str): | 
					
						
							|  |  |  |         # The tokenizer implicitly decodes bytes objects as latin-1, we must | 
					
						
							|  |  |  |         # therefore re-encode the final representation. | 
					
						
							|  |  |  |         literals = [None if s is None else s.encode('latin-1') for s in literals] | 
					
						
							|  |  |  |     return groups, literals | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def expand_template(template, match): | 
					
						
							|  |  |  |     g = match.group | 
					
						
							|  |  |  |     empty = match.string[:0] | 
					
						
							|  |  |  |     groups, literals = template | 
					
						
							|  |  |  |     literals = literals[:] | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         for index, group in groups: | 
					
						
							|  |  |  |             literals[index] = g(group) or empty | 
					
						
							|  |  |  |     except IndexError: | 
					
						
							| 
									
										
										
										
											2022-04-06 19:54:44 +03:00
										 |  |  |         raise error("invalid group reference %d" % index) from None | 
					
						
							| 
									
										
										
										
											2022-04-02 11:35:13 +03:00
										 |  |  |     return empty.join(literals) |