mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 07:31:38 +00:00 
			
		
		
		
	
		
			
	
	
		
			70 lines
		
	
	
	
		
			2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			70 lines
		
	
	
	
		
			2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| 
								 | 
							
								"""Iterator based sre token scanner
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								"""
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								import re
							 | 
						||
| 
								 | 
							
								import sre_parse
							 | 
						||
| 
								 | 
							
								import sre_compile
							 | 
						||
| 
								 | 
							
								import sre_constants
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								from re import VERBOSE, MULTILINE, DOTALL
							 | 
						||
| 
								 | 
							
								from sre_constants import BRANCH, SUBPATTERN
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								__all__ = ['Scanner', 'pattern']
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								FLAGS = (VERBOSE | MULTILINE | DOTALL)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								class Scanner(object):
							 | 
						||
| 
								 | 
							
								    def __init__(self, lexicon, flags=FLAGS):
							 | 
						||
| 
								 | 
							
								        self.actions = [None]
							 | 
						||
| 
								 | 
							
								        # Combine phrases into a compound pattern
							 | 
						||
| 
								 | 
							
								        s = sre_parse.Pattern()
							 | 
						||
| 
								 | 
							
								        s.flags = flags
							 | 
						||
| 
								 | 
							
								        p = []
							 | 
						||
| 
								 | 
							
								        for idx, token in enumerate(lexicon):
							 | 
						||
| 
								 | 
							
								            phrase = token.pattern
							 | 
						||
| 
								 | 
							
								            try:
							 | 
						||
| 
								 | 
							
								                subpattern = sre_parse.SubPattern(s,
							 | 
						||
| 
								 | 
							
								                    [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))])
							 | 
						||
| 
								 | 
							
								            except sre_constants.error:
							 | 
						||
| 
								 | 
							
								                raise
							 | 
						||
| 
								 | 
							
								            p.append(subpattern)
							 | 
						||
| 
								 | 
							
								            self.actions.append(token)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work
							 | 
						||
| 
								 | 
							
								        p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
							 | 
						||
| 
								 | 
							
								        self.scanner = sre_compile.compile(p)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def iterscan(self, string, idx=0, context=None):
							 | 
						||
| 
								 | 
							
								        """Yield match, end_idx for each match
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        """
							 | 
						||
| 
								 | 
							
								        match = self.scanner.scanner(string, idx).match
							 | 
						||
| 
								 | 
							
								        actions = self.actions
							 | 
						||
| 
								 | 
							
								        lastend = idx
							 | 
						||
| 
								 | 
							
								        end = len(string)
							 | 
						||
| 
								 | 
							
								        while True:
							 | 
						||
| 
								 | 
							
								            m = match()
							 | 
						||
| 
								 | 
							
								            if m is None:
							 | 
						||
| 
								 | 
							
								                break
							 | 
						||
| 
								 | 
							
								            matchbegin, matchend = m.span()
							 | 
						||
| 
								 | 
							
								            if lastend == matchend:
							 | 
						||
| 
								 | 
							
								                break
							 | 
						||
| 
								 | 
							
								            action = actions[m.lastindex]
							 | 
						||
| 
								 | 
							
								            if action is not None:
							 | 
						||
| 
								 | 
							
								                rval, next_pos = action(m, context)
							 | 
						||
| 
								 | 
							
								                if next_pos is not None and next_pos != matchend:
							 | 
						||
| 
								 | 
							
								                    # "fast forward" the scanner
							 | 
						||
| 
								 | 
							
								                    matchend = next_pos
							 | 
						||
| 
								 | 
							
								                    match = self.scanner.scanner(string, matchend).match
							 | 
						||
| 
								 | 
							
								                yield rval, matchend
							 | 
						||
| 
								 | 
							
								            lastend = matchend
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								def pattern(pattern, flags=FLAGS):
							 | 
						||
| 
								 | 
							
								    def decorator(fn):
							 | 
						||
| 
								 | 
							
								        fn.pattern = pattern
							 | 
						||
| 
								 | 
							
								        fn.regex = re.compile(pattern, flags)
							 | 
						||
| 
								 | 
							
								        return fn
							 | 
						||
| 
								 | 
							
								    return decorator
							 |