mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	Applying modified version of patch #1018386, which fixes
some escaping bugs in SRE.
This commit is contained in:
		
							parent
							
								
									ab9351bf36
								
							
						
					
					
						commit
						a01a2ee933
					
				
					 3 changed files with 91 additions and 43 deletions
				
			
		|  | @ -387,7 +387,8 @@ also accepted by the regular expression parser: | |||
| 
 | ||||
| Octal escapes are included in a limited form: If the first digit is a | ||||
| 0, or if there are three octal digits, it is considered an octal | ||||
| escape. Otherwise, it is a group reference. | ||||
| escape. Otherwise, it is a group reference.  As for string literals, | ||||
| octal escapes are always at most three digits in length. | ||||
| 
 | ||||
| 
 | ||||
| % Note the lack of a period in the section title; it causes problems | ||||
|  |  | |||
|  | @ -217,21 +217,11 @@ def isname(name): | |||
|     # check that group name is a valid string | ||||
|     if not isident(name[0]): | ||||
|         return False | ||||
|     for char in name: | ||||
|     for char in name[1:]: | ||||
|         if not isident(char) and not isdigit(char): | ||||
|             return False | ||||
|     return True | ||||
| 
 | ||||
| def _group(escape, groups): | ||||
|     # check if the escape string represents a valid group | ||||
|     try: | ||||
|         gid = int(escape[1:]) | ||||
|         if gid and gid < groups: | ||||
|             return gid | ||||
|     except ValueError: | ||||
|         pass | ||||
|     return None # not a valid group | ||||
| 
 | ||||
| def _class_escape(source, escape): | ||||
|     # handle escape code inside character class | ||||
|     code = ESCAPES.get(escape) | ||||
|  | @ -241,7 +231,8 @@ def _class_escape(source, escape): | |||
|     if code: | ||||
|         return code | ||||
|     try: | ||||
|         if escape[1:2] == "x": | ||||
|         c = escape[1:2] | ||||
|         if c == "x": | ||||
|             # hexadecimal escape (exactly two digits) | ||||
|             while source.next in HEXDIGITS and len(escape) < 4: | ||||
|                 escape = escape + source.get() | ||||
|  | @ -249,12 +240,14 @@ def _class_escape(source, escape): | |||
|             if len(escape) != 2: | ||||
|                 raise error, "bogus escape: %s" % repr("\\" + escape) | ||||
|             return LITERAL, int(escape, 16) & 0xff | ||||
|         elif escape[1:2] in OCTDIGITS: | ||||
|         elif c in OCTDIGITS: | ||||
|             # octal escape (up to three digits) | ||||
|             while source.next in OCTDIGITS and len(escape) < 5: | ||||
|             while source.next in OCTDIGITS and len(escape) < 4: | ||||
|                 escape = escape + source.get() | ||||
|             escape = escape[1:] | ||||
|             return LITERAL, int(escape, 8) & 0xff | ||||
|         elif c in DIGITS: | ||||
|             raise error, "bogus escape: %s" % repr(escape) | ||||
|         if len(escape) == 2: | ||||
|             return LITERAL, ord(escape[1]) | ||||
|     except ValueError: | ||||
|  | @ -270,19 +263,20 @@ def _escape(source, escape, state): | |||
|     if code: | ||||
|         return code | ||||
|     try: | ||||
|         if escape[1:2] == "x": | ||||
|         c = escape[1:2] | ||||
|         if c == "x": | ||||
|             # hexadecimal escape | ||||
|             while source.next in HEXDIGITS and len(escape) < 4: | ||||
|                 escape = escape + source.get() | ||||
|             if len(escape) != 4: | ||||
|                 raise ValueError | ||||
|             return LITERAL, int(escape[2:], 16) & 0xff | ||||
|         elif escape[1:2] == "0": | ||||
|         elif c == "0": | ||||
|             # octal escape | ||||
|             while source.next in OCTDIGITS and len(escape) < 4: | ||||
|                 escape = escape + source.get() | ||||
|             return LITERAL, int(escape[1:], 8) & 0xff | ||||
|         elif escape[1:2] in DIGITS: | ||||
|         elif c in DIGITS: | ||||
|             # octal escape *or* decimal group reference (sigh) | ||||
|             if source.next in DIGITS: | ||||
|                 escape = escape + source.get() | ||||
|  | @ -291,9 +285,9 @@ def _escape(source, escape, state): | |||
|                     # got three octal digits; this is an octal escape | ||||
|                     escape = escape + source.get() | ||||
|                     return LITERAL, int(escape[1:], 8) & 0xff | ||||
|             # got at least one decimal digit; this is a group reference | ||||
|             group = _group(escape, state.groups) | ||||
|             if group: | ||||
|             # not an octal escape, so this is a group reference | ||||
|             group = int(escape[1:]) | ||||
|             if group < state.groups: | ||||
|                 if not state.checkgroup(group): | ||||
|                     raise error, "cannot refer to open group" | ||||
|                 return GROUPREF, group | ||||
|  | @ -709,7 +703,8 @@ def literal(literal, p=p, pappend=a): | |||
|             break # end of replacement string | ||||
|         if this and this[0] == "\\": | ||||
|             # group | ||||
|             if this == "\\g": | ||||
|             c = this[1:2] | ||||
|             if c == "g": | ||||
|                 name = "" | ||||
|                 if s.match("<"): | ||||
|                     while 1: | ||||
|  | @ -723,6 +718,8 @@ def literal(literal, p=p, pappend=a): | |||
|                     raise error, "bad group name" | ||||
|                 try: | ||||
|                     index = int(name) | ||||
|                     if index < 0: | ||||
|                         raise error, "negative group number" | ||||
|                 except ValueError: | ||||
|                     if not isname(name): | ||||
|                         raise error, "bad character in group name" | ||||
|  | @ -731,26 +728,23 @@ def literal(literal, p=p, pappend=a): | |||
|                     except KeyError: | ||||
|                         raise IndexError, "unknown group name" | ||||
|                 a((MARK, index)) | ||||
|             elif len(this) > 1 and this[1] in DIGITS: | ||||
|                 code = None | ||||
|                 while 1: | ||||
|                     group = _group(this, pattern.groups+1) | ||||
|                     if group: | ||||
|                         if (s.next not in DIGITS or | ||||
|                             not _group(this + s.next, pattern.groups+1)): | ||||
|                             code = MARK, group | ||||
|                             break | ||||
|                     elif s.next in OCTDIGITS: | ||||
|             elif c == "0": | ||||
|                 if s.next in OCTDIGITS: | ||||
|                     this = this + sget() | ||||
|                     if s.next in OCTDIGITS: | ||||
|                         this = this + sget() | ||||
|                     else: | ||||
|                         break | ||||
|                 if not code: | ||||
|                     this = this[1:] | ||||
|                     code = LITERAL, makechar(int(this[-6:], 8) & 0xff) | ||||
|                 if code[0] is LITERAL: | ||||
|                     literal(code[1]) | ||||
|                 else: | ||||
|                     a(code) | ||||
|                 literal(makechar(int(this[1:], 8) & 0xff)) | ||||
|             elif c in DIGITS: | ||||
|                 isoctal = False | ||||
|                 if s.next in DIGITS: | ||||
|                     this = this + sget() | ||||
|                     if (c in OCTDIGITS and s.next in OCTDIGITS and | ||||
|                         this[2] in OCTDIGITS): | ||||
|                         this = this + sget() | ||||
|                         isoctal = True | ||||
|                         literal(makechar(int(this[1:], 8) & 0xff)) | ||||
|                 if not isoctal: | ||||
|                     a((MARK, int(this[1:]))) | ||||
|             else: | ||||
|                 try: | ||||
|                     this = makechar(ESCAPES[this][1]) | ||||
|  | @ -782,7 +776,7 @@ def expand_template(template, match): | |||
|         for index, group in groups: | ||||
|             literals[index] = s = g(group) | ||||
|             if s is None: | ||||
|                 raise IndexError | ||||
|                 raise error, "unmatched group" | ||||
|     except IndexError: | ||||
|         raise error, "empty group" | ||||
|         raise error, "invalid group reference" | ||||
|     return sep.join(literals) | ||||
|  |  | |||
|  | @ -83,6 +83,48 @@ def test_bug_449000(self): | |||
|         self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'), | ||||
|                          'abc\ndef\n') | ||||
| 
 | ||||
|     def test_sub_template_numeric_escape(self): | ||||
|         # bug 776311 and friends | ||||
|         self.assertEqual(re.sub('x', r'\0', 'x'), '\0') | ||||
|         self.assertEqual(re.sub('x', r'\000', 'x'), '\000') | ||||
|         self.assertEqual(re.sub('x', r'\001', 'x'), '\001') | ||||
|         self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8') | ||||
|         self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9') | ||||
|         self.assertEqual(re.sub('x', r'\111', 'x'), '\111') | ||||
|         self.assertEqual(re.sub('x', r'\117', 'x'), '\117') | ||||
| 
 | ||||
|         self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111') | ||||
|         self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1') | ||||
| 
 | ||||
|         self.assertEqual(re.sub('x', r'\00', 'x'), '\x00') | ||||
|         self.assertEqual(re.sub('x', r'\07', 'x'), '\x07') | ||||
|         self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8') | ||||
|         self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9') | ||||
|         self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a') | ||||
| 
 | ||||
|         self.assertEqual(re.sub('x', r'\400', 'x'), '\0') | ||||
|         self.assertEqual(re.sub('x', r'\777', 'x'), '\377') | ||||
|          | ||||
|         self.assertRaises(re.error, re.sub, 'x', r'\1', 'x') | ||||
|         self.assertRaises(re.error, re.sub, 'x', r'\8', 'x') | ||||
|         self.assertRaises(re.error, re.sub, 'x', r'\9', 'x') | ||||
|         self.assertRaises(re.error, re.sub, 'x', r'\11', 'x') | ||||
|         self.assertRaises(re.error, re.sub, 'x', r'\18', 'x') | ||||
|         self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x') | ||||
|         self.assertRaises(re.error, re.sub, 'x', r'\90', 'x') | ||||
|         self.assertRaises(re.error, re.sub, 'x', r'\99', 'x') | ||||
|         self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8' | ||||
|         self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x') | ||||
|         self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1' | ||||
|         self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0' | ||||
| 
 | ||||
|         # in python2.3 (etc), these loop endlessly in sre_parser.py | ||||
|         self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x') | ||||
|         self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'), | ||||
|                          'xz8') | ||||
|         self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'), | ||||
|                          'xza') | ||||
| 
 | ||||
|     def test_qualified_re_sub(self): | ||||
|         self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb') | ||||
|         self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa') | ||||
|  | @ -105,6 +147,7 @@ def test_symbolic_refs(self): | |||
|         self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx') | ||||
|         self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx') | ||||
|         self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx') | ||||
|         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx') | ||||
| 
 | ||||
|     def test_re_subn(self): | ||||
|         self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2)) | ||||
|  | @ -386,6 +429,16 @@ def test_sre_character_literals(self): | |||
|             self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None) | ||||
|         self.assertRaises(re.error, re.match, "\911", "") | ||||
| 
 | ||||
|     def test_sre_character_class_literals(self): | ||||
|         for i in [0, 8, 16, 32, 64, 127, 128, 255]: | ||||
|             self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None) | ||||
|             self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None) | ||||
|             self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None) | ||||
|             self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None) | ||||
|             self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None) | ||||
|             self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None) | ||||
|         self.assertRaises(re.error, re.match, "[\911]", "") | ||||
| 
 | ||||
|     def test_bug_113254(self): | ||||
|         self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1) | ||||
|         self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1) | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Gustavo Niemeyer
						Gustavo Niemeyer