mirror of
				https://github.com/python/cpython.git
				synced 2025-10-30 21:21:22 +00:00 
			
		
		
		
	- untabified sre_constants.py
This commit is contained in:
		
							parent
							
								
									a4657f736c
								
							
						
					
					
						commit
						43b3b49b5a
					
				
					 6 changed files with 146 additions and 90 deletions
				
			
		|  | @ -26,52 +26,12 @@ def _compile(code, pattern, flags): | ||||||
|     # internal: compile a (sub)pattern |     # internal: compile a (sub)pattern | ||||||
|     emit = code.append |     emit = code.append | ||||||
|     for op, av in pattern: |     for op, av in pattern: | ||||||
|         if op is ANY: |         if op in (LITERAL, NOT_LITERAL): | ||||||
|             if flags & SRE_FLAG_DOTALL: |  | ||||||
|                 emit(OPCODES[op]) |  | ||||||
|             else: |  | ||||||
|                 emit(OPCODES[CATEGORY]) |  | ||||||
|                 emit(CHCODES[CATEGORY_NOT_LINEBREAK]) |  | ||||||
|         elif op in (SUCCESS, FAILURE): |  | ||||||
|             emit(OPCODES[op]) |  | ||||||
|         elif op is AT: |  | ||||||
|             emit(OPCODES[op]) |  | ||||||
|             if flags & SRE_FLAG_MULTILINE: |  | ||||||
|                 emit(ATCODES[AT_MULTILINE[av]]) |  | ||||||
|             else: |  | ||||||
|                 emit(ATCODES[av]) |  | ||||||
|         elif op is BRANCH: |  | ||||||
|             emit(OPCODES[op]) |  | ||||||
|             tail = [] |  | ||||||
|             for av in av[1]: |  | ||||||
|                 skip = len(code); emit(0) |  | ||||||
|                 _compile(code, av, flags) |  | ||||||
|                 emit(OPCODES[JUMP]) |  | ||||||
|                 tail.append(len(code)); emit(0) |  | ||||||
|                 code[skip] = len(code) - skip |  | ||||||
|             emit(0) # end of branch |  | ||||||
|             for tail in tail: |  | ||||||
|                 code[tail] = len(code) - tail |  | ||||||
|         elif op is CALL: |  | ||||||
|             emit(OPCODES[op]) |  | ||||||
|             skip = len(code); emit(0) |  | ||||||
|             _compile(code, av, flags) |  | ||||||
|             emit(OPCODES[SUCCESS]) |  | ||||||
|             code[skip] = len(code) - skip |  | ||||||
|         elif op is CATEGORY: |  | ||||||
|             emit(OPCODES[op]) |  | ||||||
|             if flags & SRE_FLAG_LOCALE: |  | ||||||
|                 emit(CHCODES[CH_LOCALE[av]]) |  | ||||||
|             elif flags & SRE_FLAG_UNICODE: |  | ||||||
|                 emit(CHCODES[CH_UNICODE[av]]) |  | ||||||
|             else: |  | ||||||
|                 emit(CHCODES[av]) |  | ||||||
|         elif op is GROUP: |  | ||||||
|             if flags & SRE_FLAG_IGNORECASE: |             if flags & SRE_FLAG_IGNORECASE: | ||||||
|                 emit(OPCODES[OP_IGNORE[op]]) |                 emit(OPCODES[OP_IGNORE[op]]) | ||||||
|             else: |             else: | ||||||
|                 emit(OPCODES[op]) |                 emit(OPCODES[op]) | ||||||
|             emit(av-1) |             emit(ord(av)) | ||||||
|         elif op is IN: |         elif op is IN: | ||||||
|             if flags & SRE_FLAG_IGNORECASE: |             if flags & SRE_FLAG_IGNORECASE: | ||||||
|                 emit(OPCODES[OP_IGNORE[op]]) |                 emit(OPCODES[OP_IGNORE[op]]) | ||||||
|  | @ -101,15 +61,12 @@ def fixup(literal, flags=flags): | ||||||
|                     raise error, "internal: unsupported set operator" |                     raise error, "internal: unsupported set operator" | ||||||
|             emit(OPCODES[FAILURE]) |             emit(OPCODES[FAILURE]) | ||||||
|             code[skip] = len(code) - skip |             code[skip] = len(code) - skip | ||||||
|         elif op in (LITERAL, NOT_LITERAL): |         elif op is ANY: | ||||||
|             if flags & SRE_FLAG_IGNORECASE: |             if flags & SRE_FLAG_DOTALL: | ||||||
|                 emit(OPCODES[OP_IGNORE[op]]) |  | ||||||
|             else: |  | ||||||
|                 emit(OPCODES[op]) |                 emit(OPCODES[op]) | ||||||
|             emit(ord(av)) |             else: | ||||||
|         elif op is MARK: |                 emit(OPCODES[CATEGORY]) | ||||||
|             emit(OPCODES[op]) |                 emit(CHCODES[CATEGORY_NOT_LINEBREAK]) | ||||||
|             emit(av) |  | ||||||
|         elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT): |         elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT): | ||||||
|             if flags & SRE_FLAG_TEMPLATE: |             if flags & SRE_FLAG_TEMPLATE: | ||||||
|                 emit(OPCODES[REPEAT]) |                 emit(OPCODES[REPEAT]) | ||||||
|  | @ -150,6 +107,49 @@ def fixup(literal, flags=flags): | ||||||
|             if group: |             if group: | ||||||
|                 emit(OPCODES[MARK]) |                 emit(OPCODES[MARK]) | ||||||
|                 emit((group-1)*2+1) |                 emit((group-1)*2+1) | ||||||
|  |         elif op in (SUCCESS, FAILURE): | ||||||
|  |             emit(OPCODES[op]) | ||||||
|  |         elif op in (ASSERT, ASSERT_NOT, CALL): | ||||||
|  |             emit(OPCODES[op]) | ||||||
|  |             skip = len(code); emit(0) | ||||||
|  |             _compile(code, av, flags) | ||||||
|  |             emit(OPCODES[SUCCESS]) | ||||||
|  |             code[skip] = len(code) - skip | ||||||
|  |         elif op is AT: | ||||||
|  |             emit(OPCODES[op]) | ||||||
|  |             if flags & SRE_FLAG_MULTILINE: | ||||||
|  |                 emit(ATCODES[AT_MULTILINE[av]]) | ||||||
|  |             else: | ||||||
|  |                 emit(ATCODES[av]) | ||||||
|  |         elif op is BRANCH: | ||||||
|  |             emit(OPCODES[op]) | ||||||
|  |             tail = [] | ||||||
|  |             for av in av[1]: | ||||||
|  |                 skip = len(code); emit(0) | ||||||
|  |                 _compile(code, av, flags) | ||||||
|  |                 emit(OPCODES[JUMP]) | ||||||
|  |                 tail.append(len(code)); emit(0) | ||||||
|  |                 code[skip] = len(code) - skip | ||||||
|  |             emit(0) # end of branch | ||||||
|  |             for tail in tail: | ||||||
|  |                 code[tail] = len(code) - tail | ||||||
|  |         elif op is CATEGORY: | ||||||
|  |             emit(OPCODES[op]) | ||||||
|  |             if flags & SRE_FLAG_LOCALE: | ||||||
|  |                 emit(CHCODES[CH_LOCALE[av]]) | ||||||
|  |             elif flags & SRE_FLAG_UNICODE: | ||||||
|  |                 emit(CHCODES[CH_UNICODE[av]]) | ||||||
|  |             else: | ||||||
|  |                 emit(CHCODES[av]) | ||||||
|  |         elif op is GROUP: | ||||||
|  |             if flags & SRE_FLAG_IGNORECASE: | ||||||
|  |                 emit(OPCODES[OP_IGNORE[op]]) | ||||||
|  |             else: | ||||||
|  |                 emit(OPCODES[op]) | ||||||
|  |             emit(av-1) | ||||||
|  |         elif op is MARK: | ||||||
|  |             emit(OPCODES[op]) | ||||||
|  |             emit(av) | ||||||
|         else: |         else: | ||||||
|             raise ValueError, ("unsupported operand type", op) |             raise ValueError, ("unsupported operand type", op) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -23,6 +23,7 @@ class error(Exception): | ||||||
| 
 | 
 | ||||||
| ANY = "any" | ANY = "any" | ||||||
| ASSERT = "assert" | ASSERT = "assert" | ||||||
|  | ASSERT_NOT = "assert_not" | ||||||
| AT = "at" | AT = "at" | ||||||
| BRANCH = "branch" | BRANCH = "branch" | ||||||
| CALL = "call" | CALL = "call" | ||||||
|  | @ -81,7 +82,7 @@ class error(Exception): | ||||||
|     FAILURE, SUCCESS, |     FAILURE, SUCCESS, | ||||||
| 
 | 
 | ||||||
|     ANY, |     ANY, | ||||||
|     ASSERT, |     ASSERT, ASSERT_NOT, | ||||||
|     AT, |     AT, | ||||||
|     BRANCH, |     BRANCH, | ||||||
|     CALL, |     CALL, | ||||||
|  | @ -121,8 +122,8 @@ def makedict(list): | ||||||
|     d = {} |     d = {} | ||||||
|     i = 0 |     i = 0 | ||||||
|     for item in list: |     for item in list: | ||||||
| 	d[item] = i |         d[item] = i | ||||||
| 	i = i + 1 |         i = i + 1 | ||||||
|     return d |     return d | ||||||
| 
 | 
 | ||||||
| OPCODES = makedict(OPCODES) | OPCODES = makedict(OPCODES) | ||||||
|  | @ -176,12 +177,27 @@ def makedict(list): | ||||||
| if __name__ == "__main__": | if __name__ == "__main__": | ||||||
|     import string |     import string | ||||||
|     def dump(f, d, prefix): |     def dump(f, d, prefix): | ||||||
| 	items = d.items() |         items = d.items() | ||||||
| 	items.sort(lambda a, b: cmp(a[1], b[1])) |         items.sort(lambda a, b: cmp(a[1], b[1])) | ||||||
| 	for k, v in items: |         for k, v in items: | ||||||
| 	    f.write("#define %s_%s %s\n" % (prefix, string.upper(k), v)) |             f.write("#define %s_%s %s\n" % (prefix, string.upper(k), v)) | ||||||
|     f = open("sre_constants.h", "w") |     f = open("sre_constants.h", "w") | ||||||
|     f.write("/* generated from sre_constants.py */\n") |     f.write("""\ | ||||||
|  | /* | ||||||
|  |  * Secret Labs' Regular Expression Engine | ||||||
|  |  * | ||||||
|  |  * regular expression matching engine | ||||||
|  |  * | ||||||
|  |  * NOTE: This file is generated by sre_constants.py.  If you need | ||||||
|  |  * to change anything in here, edit sre_constants.py and run it. | ||||||
|  |  * | ||||||
|  |  * Copyright (c) 1997-2000 by Secret Labs AB.  All rights reserved. | ||||||
|  |  * | ||||||
|  |  * See the _sre.c file for information on usage and redistribution. | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | """) | ||||||
|  | 
 | ||||||
|     dump(f, OPCODES, "SRE_OP") |     dump(f, OPCODES, "SRE_OP") | ||||||
|     dump(f, ATCODES, "SRE") |     dump(f, ATCODES, "SRE") | ||||||
|     dump(f, CHCODES, "SRE") |     dump(f, CHCODES, "SRE") | ||||||
|  |  | ||||||
|  | @ -470,6 +470,25 @@ def _parse(source, state, flags=0): | ||||||
|                         if source.next is None or source.next == ")": |                         if source.next is None or source.next == ")": | ||||||
|                             break |                             break | ||||||
|                         source.get() |                         source.get() | ||||||
|  |                 elif source.next in ("=", "!"): | ||||||
|  |                     # lookahead assertions | ||||||
|  |                     char = source.get() | ||||||
|  |                     b = [] | ||||||
|  |                     while 1: | ||||||
|  |                         p = _parse(source, state, flags) | ||||||
|  |                         if source.next == ")": | ||||||
|  |                             if b: | ||||||
|  |                                 b.append(p) | ||||||
|  |                                 p = _branch(state, b) | ||||||
|  |                             if char == "=": | ||||||
|  |                                 subpattern.append((ASSERT, p)) | ||||||
|  |                             else: | ||||||
|  |                                 subpattern.append((ASSERT_NOT, p)) | ||||||
|  |                             break | ||||||
|  |                         elif source.match("|"): | ||||||
|  |                             b.append(p) | ||||||
|  |                         else: | ||||||
|  |                             raise error, "pattern not properly closed" | ||||||
|                 else: |                 else: | ||||||
|                     # flags |                     # flags | ||||||
|                     while FLAGS.has_key(source.next): |                     while FLAGS.has_key(source.next): | ||||||
|  |  | ||||||
|  | @ -6,7 +6,4 @@ test_support -- test failed re module cPickle | ||||||
| === grouping error ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', 0, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/') 'd:msgs/tdir/sub1/-trial/' should be 'd:msgs/tdir/sub1/-tdir/' | === grouping error ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', 0, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/') 'd:msgs/tdir/sub1/-trial/' should be 'd:msgs/tdir/sub1/-tdir/' | ||||||
| === grouping error ('([abc])*bcd', 'abcd', 0, 'found+"-"+g1', 'abcd-a') 'abcd-c' should be 'abcd-a' | === grouping error ('([abc])*bcd', 'abcd', 0, 'found+"-"+g1', 'abcd-a') 'abcd-c' should be 'abcd-a' | ||||||
| === grouping error ('(?i)([abc])*bcd', 'ABCD', 0, 'found+"-"+g1', 'ABCD-A') 'ABCD-C' should be 'ABCD-A' | === grouping error ('(?i)([abc])*bcd', 'ABCD', 0, 'found+"-"+g1', 'ABCD-A') 'ABCD-C' should be 'ABCD-A' | ||||||
| === Syntax error: ('a(?!b).', 'abad', 0, 'found', 'ad') |  | ||||||
| === Syntax error: ('a(?=d).', 'abad', 0, 'found', 'ad') |  | ||||||
| === Syntax error: ('a(?=c|d).', 'abad', 0, 'found', 'ad') |  | ||||||
| === Failed incorrectly ('^(.+)?B', 'AB', 0, 'g1', 'A') | === Failed incorrectly ('^(.+)?B', 'AB', 0, 'g1', 'A') | ||||||
|  |  | ||||||
|  | @ -20,6 +20,7 @@ | ||||||
|  * 00-06-28 fl	fixed findall (0.9.1) |  * 00-06-28 fl	fixed findall (0.9.1) | ||||||
|  * 00-06-29 fl	fixed split, added more scanner features (0.9.2) |  * 00-06-29 fl	fixed split, added more scanner features (0.9.2) | ||||||
|  * 00-06-30 fl	tuning, fast search (0.9.3) |  * 00-06-30 fl	tuning, fast search (0.9.3) | ||||||
|  |  * 00-06-30 fl	added assert (lookahead) primitives (0.9.4) | ||||||
|  * |  * | ||||||
|  * Copyright (c) 1997-2000 by Secret Labs AB.  All rights reserved. |  * Copyright (c) 1997-2000 by Secret Labs AB.  All rights reserved. | ||||||
|  * |  * | ||||||
|  | @ -30,7 +31,7 @@ | ||||||
| 
 | 
 | ||||||
| #ifndef SRE_RECURSIVE | #ifndef SRE_RECURSIVE | ||||||
| 
 | 
 | ||||||
| char copyright[] = " SRE 0.9.3 Copyright (c) 1997-2000 by Secret Labs AB "; | char copyright[] = " SRE 0.9.4 Copyright (c) 1997-2000 by Secret Labs AB "; | ||||||
| 
 | 
 | ||||||
| #include "Python.h" | #include "Python.h" | ||||||
| 
 | 
 | ||||||
|  | @ -576,11 +577,10 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern) | ||||||
| 			pattern += pattern[0]; | 			pattern += pattern[0]; | ||||||
| 			break; | 			break; | ||||||
| 
 | 
 | ||||||
| #if 0 | 		case SRE_OP_ASSERT: | ||||||
| 		case SRE_OP_CALL: | 			/* assert subpattern */ | ||||||
| 			/* match subpattern, without backtracking */ |  | ||||||
| 			/* args: <skip> <pattern> */ | 			/* args: <skip> <pattern> */ | ||||||
| 			TRACE(("%8d: subpattern\n", PTR(ptr))); | 			TRACE(("%8d: assert subpattern\n", PTR(ptr))); | ||||||
| 			state->ptr = ptr; | 			state->ptr = ptr; | ||||||
| 			i = SRE_MATCH(state, pattern + 1); | 			i = SRE_MATCH(state, pattern + 1); | ||||||
|             if (i < 0) |             if (i < 0) | ||||||
|  | @ -588,9 +588,20 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern) | ||||||
|             if (!i) |             if (!i) | ||||||
| 				goto failure; | 				goto failure; | ||||||
| 			pattern += pattern[0]; | 			pattern += pattern[0]; | ||||||
| 			ptr = state->ptr; |  | ||||||
| 			break; | 			break; | ||||||
| #endif | 
 | ||||||
|  | 		case SRE_OP_ASSERT_NOT: | ||||||
|  | 			/* assert not subpattern */ | ||||||
|  | 			/* args: <skip> <pattern> */ | ||||||
|  | 			TRACE(("%8d: assert not subpattern\n", PTR(ptr))); | ||||||
|  | 			state->ptr = ptr; | ||||||
|  | 			i = SRE_MATCH(state, pattern + 1); | ||||||
|  |             if (i < 0) | ||||||
|  |                 return i; | ||||||
|  |             if (i) | ||||||
|  | 				goto failure; | ||||||
|  | 			pattern += pattern[0]; | ||||||
|  | 			break; | ||||||
| 
 | 
 | ||||||
| #if 0 | #if 0 | ||||||
| 		case SRE_OP_MAX_REPEAT_ONE: | 		case SRE_OP_MAX_REPEAT_ONE: | ||||||
|  |  | ||||||
|  | @ -1,29 +1,42 @@ | ||||||
| /* generated from sre_constants.py */ | /*
 | ||||||
|  |  * Secret Labs' Regular Expression Engine | ||||||
|  |  * | ||||||
|  |  * regular expression matching engine | ||||||
|  |  * | ||||||
|  |  * NOTE: This file is generated by sre_constants.py.  If you need | ||||||
|  |  * to change anything in here, edit sre_constants.py and run it. | ||||||
|  |  * | ||||||
|  |  * Copyright (c) 1997-2000 by Secret Labs AB.  All rights reserved. | ||||||
|  |  * | ||||||
|  |  * See the _sre.c file for information on usage and redistribution. | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
| #define SRE_OP_FAILURE 0 | #define SRE_OP_FAILURE 0 | ||||||
| #define SRE_OP_SUCCESS 1 | #define SRE_OP_SUCCESS 1 | ||||||
| #define SRE_OP_ANY 2 | #define SRE_OP_ANY 2 | ||||||
| #define SRE_OP_ASSERT 3 | #define SRE_OP_ASSERT 3 | ||||||
| #define SRE_OP_AT 4 | #define SRE_OP_ASSERT_NOT 4 | ||||||
| #define SRE_OP_BRANCH 5 | #define SRE_OP_AT 5 | ||||||
| #define SRE_OP_CALL 6 | #define SRE_OP_BRANCH 6 | ||||||
| #define SRE_OP_CATEGORY 7 | #define SRE_OP_CALL 7 | ||||||
| #define SRE_OP_GROUP 8 | #define SRE_OP_CATEGORY 8 | ||||||
| #define SRE_OP_GROUP_IGNORE 9 | #define SRE_OP_GROUP 9 | ||||||
| #define SRE_OP_IN 10 | #define SRE_OP_GROUP_IGNORE 10 | ||||||
| #define SRE_OP_IN_IGNORE 11 | #define SRE_OP_IN 11 | ||||||
| #define SRE_OP_INFO 12 | #define SRE_OP_IN_IGNORE 12 | ||||||
| #define SRE_OP_JUMP 13 | #define SRE_OP_INFO 13 | ||||||
| #define SRE_OP_LITERAL 14 | #define SRE_OP_JUMP 14 | ||||||
| #define SRE_OP_LITERAL_IGNORE 15 | #define SRE_OP_LITERAL 15 | ||||||
| #define SRE_OP_MARK 16 | #define SRE_OP_LITERAL_IGNORE 16 | ||||||
| #define SRE_OP_MAX_REPEAT 17 | #define SRE_OP_MARK 17 | ||||||
| #define SRE_OP_MAX_REPEAT_ONE 18 | #define SRE_OP_MAX_REPEAT 18 | ||||||
| #define SRE_OP_MIN_REPEAT 19 | #define SRE_OP_MAX_REPEAT_ONE 19 | ||||||
| #define SRE_OP_NOT_LITERAL 20 | #define SRE_OP_MIN_REPEAT 20 | ||||||
| #define SRE_OP_NOT_LITERAL_IGNORE 21 | #define SRE_OP_NOT_LITERAL 21 | ||||||
| #define SRE_OP_NEGATE 22 | #define SRE_OP_NOT_LITERAL_IGNORE 22 | ||||||
| #define SRE_OP_RANGE 23 | #define SRE_OP_NEGATE 23 | ||||||
| #define SRE_OP_REPEAT 24 | #define SRE_OP_RANGE 24 | ||||||
|  | #define SRE_OP_REPEAT 25 | ||||||
| #define SRE_AT_BEGINNING 0 | #define SRE_AT_BEGINNING 0 | ||||||
| #define SRE_AT_BEGINNING_LINE 1 | #define SRE_AT_BEGINNING_LINE 1 | ||||||
| #define SRE_AT_BOUNDARY 2 | #define SRE_AT_BOUNDARY 2 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Fredrik Lundh
						Fredrik Lundh