mirror of
				https://github.com/python/cpython.git
				synced 2025-10-30 21:21:22 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			69 lines
		
	
	
	
		
			2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			69 lines
		
	
	
	
		
			2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """Iterator based sre token scanner
 | |
| 
 | |
| """
 | |
| 
 | |
| import re
 | |
| import sre_parse
 | |
| import sre_compile
 | |
| import sre_constants
 | |
| 
 | |
| from re import VERBOSE, MULTILINE, DOTALL
 | |
| from sre_constants import BRANCH, SUBPATTERN
 | |
| 
 | |
| __all__ = ['Scanner', 'pattern']
 | |
| 
 | |
| FLAGS = (VERBOSE | MULTILINE | DOTALL)
 | |
| 
 | |
| class Scanner(object):
 | |
|     def __init__(self, lexicon, flags=FLAGS):
 | |
|         self.actions = [None]
 | |
|         # Combine phrases into a compound pattern
 | |
|         s = sre_parse.Pattern()
 | |
|         s.flags = flags
 | |
|         p = []
 | |
|         for idx, token in enumerate(lexicon):
 | |
|             phrase = token.pattern
 | |
|             try:
 | |
|                 subpattern = sre_parse.SubPattern(s,
 | |
|                     [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))])
 | |
|             except sre_constants.error:
 | |
|                 raise
 | |
|             p.append(subpattern)
 | |
|             self.actions.append(token)
 | |
| 
 | |
|         s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work
 | |
|         p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
 | |
|         self.scanner = sre_compile.compile(p)
 | |
| 
 | |
|     def iterscan(self, string, idx=0, context=None):
 | |
|         """Yield match, end_idx for each match
 | |
| 
 | |
|         """
 | |
|         match = self.scanner.scanner(string, idx).match
 | |
|         actions = self.actions
 | |
|         lastend = idx
 | |
|         end = len(string)
 | |
|         while True:
 | |
|             m = match()
 | |
|             if m is None:
 | |
|                 break
 | |
|             matchbegin, matchend = m.span()
 | |
|             if lastend == matchend:
 | |
|                 break
 | |
|             action = actions[m.lastindex]
 | |
|             if action is not None:
 | |
|                 rval, next_pos = action(m, context)
 | |
|                 if next_pos is not None and next_pos != matchend:
 | |
|                     # "fast forward" the scanner
 | |
|                     matchend = next_pos
 | |
|                     match = self.scanner.scanner(string, matchend).match
 | |
|                 yield rval, matchend
 | |
|             lastend = matchend
 | |
| 
 | |
| 
 | |
| def pattern(pattern, flags=FLAGS):
 | |
|     def decorator(fn):
 | |
|         fn.pattern = pattern
 | |
|         fn.regex = re.compile(pattern, flags)
 | |
|         return fn
 | |
|     return decorator
 | 
