mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 05:31:20 +00:00 
			
		
		
		
	
		
			
	
	
		
			70 lines
		
	
	
	
		
			2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			70 lines
		
	
	
	
		
			2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
|   | """Iterator based sre token scanner
 | ||
|  | 
 | ||
|  | """
 | ||
|  | 
 | ||
|  | import re | ||
|  | import sre_parse | ||
|  | import sre_compile | ||
|  | import sre_constants | ||
|  | 
 | ||
|  | from re import VERBOSE, MULTILINE, DOTALL | ||
|  | from sre_constants import BRANCH, SUBPATTERN | ||
|  | 
 | ||
|  | __all__ = ['Scanner', 'pattern'] | ||
|  | 
 | ||
|  | FLAGS = (VERBOSE | MULTILINE | DOTALL) | ||
|  | 
 | ||
|  | class Scanner(object): | ||
|  |     def __init__(self, lexicon, flags=FLAGS): | ||
|  |         self.actions = [None] | ||
|  |         # Combine phrases into a compound pattern | ||
|  |         s = sre_parse.Pattern() | ||
|  |         s.flags = flags | ||
|  |         p = [] | ||
|  |         for idx, token in enumerate(lexicon): | ||
|  |             phrase = token.pattern | ||
|  |             try: | ||
|  |                 subpattern = sre_parse.SubPattern(s, | ||
|  |                     [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))]) | ||
|  |             except sre_constants.error: | ||
|  |                 raise | ||
|  |             p.append(subpattern) | ||
|  |             self.actions.append(token) | ||
|  | 
 | ||
|  |         s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work | ||
|  |         p = sre_parse.SubPattern(s, [(BRANCH, (None, p))]) | ||
|  |         self.scanner = sre_compile.compile(p) | ||
|  | 
 | ||
|  |     def iterscan(self, string, idx=0, context=None): | ||
|  |         """Yield match, end_idx for each match
 | ||
|  | 
 | ||
|  |         """
 | ||
|  |         match = self.scanner.scanner(string, idx).match | ||
|  |         actions = self.actions | ||
|  |         lastend = idx | ||
|  |         end = len(string) | ||
|  |         while True: | ||
|  |             m = match() | ||
|  |             if m is None: | ||
|  |                 break | ||
|  |             matchbegin, matchend = m.span() | ||
|  |             if lastend == matchend: | ||
|  |                 break | ||
|  |             action = actions[m.lastindex] | ||
|  |             if action is not None: | ||
|  |                 rval, next_pos = action(m, context) | ||
|  |                 if next_pos is not None and next_pos != matchend: | ||
|  |                     # "fast forward" the scanner | ||
|  |                     matchend = next_pos | ||
|  |                     match = self.scanner.scanner(string, matchend).match | ||
|  |                 yield rval, matchend | ||
|  |             lastend = matchend | ||
|  | 
 | ||
|  | 
 | ||
|  | def pattern(pattern, flags=FLAGS): | ||
|  |     def decorator(fn): | ||
|  |         fn.pattern = pattern | ||
|  |         fn.regex = re.compile(pattern, flags) | ||
|  |         return fn | ||
|  |     return decorator |