mirror of
				https://github.com/python/cpython.git
				synced 2025-11-03 23:21:29 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			193 lines
		
	
	
	
		
			4.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			193 lines
		
	
	
	
		
			4.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
#
 | 
						|
# Secret Labs' Regular Expression Engine
 | 
						|
# $Id$
 | 
						|
#
 | 
						|
# convert template to internal format
 | 
						|
#
 | 
						|
# Copyright (c) 1997-2000 by Secret Labs AB.  All rights reserved.
 | 
						|
#
 | 
						|
# This code can only be used for 1.6 alpha testing.  All other use
 | 
						|
# require explicit permission from Secret Labs AB.
 | 
						|
#
 | 
						|
# Portions of this engine have been developed in cooperation with
 | 
						|
# CNRI.  Hewlett-Packard provided funding for 1.6 integration and
 | 
						|
# other compatibility work.
 | 
						|
#
 | 
						|
 | 
						|
import array, string, sys
 | 
						|
 | 
						|
import _sre
 | 
						|
 | 
						|
from sre_constants import *
 | 
						|
 | 
						|
# find an array type code that matches the engine's code size
 | 
						|
for WORDSIZE in "BHil":
 | 
						|
    if len(array.array(WORDSIZE, [0]).tostring()) == _sre.getcodesize():
 | 
						|
	break
 | 
						|
else:
 | 
						|
    raise RuntimeError, "cannot find a useable array type"
 | 
						|
 | 
						|
# FIXME: <fl> should move some optimizations from the parser to here!
 | 
						|
 | 
						|
class Code:
 | 
						|
    def __init__(self):
 | 
						|
	self.data = []
 | 
						|
    def __len__(self):
 | 
						|
	return len(self.data)
 | 
						|
    def __getitem__(self, index):
 | 
						|
	return self.data[index]
 | 
						|
    def __setitem__(self, index, code):
 | 
						|
	self.data[index] = code
 | 
						|
    def append(self, code):
 | 
						|
	self.data.append(code)
 | 
						|
    def todata(self):
 | 
						|
	# print self.data
 | 
						|
	try:
 | 
						|
	    return array.array(WORDSIZE, self.data).tostring()
 | 
						|
	except OverflowError:
 | 
						|
	    print self.data
 | 
						|
	    raise
 | 
						|
 | 
						|
def _compile(code, pattern, flags, level=0):
 | 
						|
    append = code.append
 | 
						|
    for op, av in pattern:
 | 
						|
	if op is ANY:
 | 
						|
	    if flags & SRE_FLAG_DOTALL:
 | 
						|
		append(OPCODES[op]) # any character at all!
 | 
						|
	    else:
 | 
						|
		append(OPCODES[CATEGORY])
 | 
						|
		append(CHCODES[CATEGORY_NOT_LINEBREAK])
 | 
						|
	elif op in (SUCCESS, FAILURE):
 | 
						|
	    append(OPCODES[op])
 | 
						|
	elif op is AT:
 | 
						|
	    append(OPCODES[op])
 | 
						|
	    if flags & SRE_FLAG_MULTILINE:
 | 
						|
		append(ATCODES[AT_MULTILINE[av]])
 | 
						|
	    else:
 | 
						|
		append(ATCODES[av])
 | 
						|
	elif op is BRANCH:
 | 
						|
	    append(OPCODES[op])
 | 
						|
	    tail = []
 | 
						|
	    for av in av[1]:
 | 
						|
		skip = len(code); append(0)
 | 
						|
		_compile(code, av, flags, level)
 | 
						|
		append(OPCODES[JUMP])
 | 
						|
		tail.append(len(code)); append(0)
 | 
						|
		code[skip] = len(code) - skip
 | 
						|
	    append(0) # end of branch
 | 
						|
	    for tail in tail:
 | 
						|
		code[tail] = len(code) - tail
 | 
						|
	elif op is CALL:
 | 
						|
	    append(OPCODES[op])
 | 
						|
	    skip = len(code); append(0)
 | 
						|
	    _compile(code, av, flags, level+1)
 | 
						|
	    append(OPCODES[SUCCESS])
 | 
						|
	    code[skip] = len(code) - skip
 | 
						|
	elif op is CATEGORY: # not used by current parser
 | 
						|
	    append(OPCODES[op])
 | 
						|
	    if flags & SRE_FLAG_LOCALE:
 | 
						|
		append(CH_LOCALE[CHCODES[av]])
 | 
						|
	    else:
 | 
						|
		append(CHCODES[av])
 | 
						|
	elif op is GROUP:
 | 
						|
	    if flags & SRE_FLAG_IGNORECASE:
 | 
						|
		append(OPCODES[OP_IGNORE[op]])
 | 
						|
	    else:
 | 
						|
		append(OPCODES[op])
 | 
						|
	    append(av-1)
 | 
						|
	elif op is IN:
 | 
						|
	    if flags & SRE_FLAG_IGNORECASE:
 | 
						|
		append(OPCODES[OP_IGNORE[op]])
 | 
						|
		def fixup(literal):
 | 
						|
		    return ord(literal.lower())
 | 
						|
	    else:
 | 
						|
		append(OPCODES[op])
 | 
						|
		fixup = ord
 | 
						|
	    skip = len(code); append(0)
 | 
						|
	    for op, av in av:
 | 
						|
		append(OPCODES[op])
 | 
						|
		if op is NEGATE:
 | 
						|
		    pass
 | 
						|
		elif op is LITERAL:
 | 
						|
		    append(fixup(av))
 | 
						|
		elif op is RANGE:
 | 
						|
		    append(fixup(av[0]))
 | 
						|
		    append(fixup(av[1]))
 | 
						|
		elif op is CATEGORY:
 | 
						|
		    if flags & SRE_FLAG_LOCALE:
 | 
						|
			append(CH_LOCALE[CHCODES[av]])
 | 
						|
		    else:
 | 
						|
			append(CHCODES[av])
 | 
						|
		else:
 | 
						|
		    raise ValueError, "unsupported set operator"
 | 
						|
	    append(OPCODES[FAILURE])
 | 
						|
	    code[skip] = len(code) - skip
 | 
						|
	elif op in (LITERAL, NOT_LITERAL):
 | 
						|
	    if flags & SRE_FLAG_IGNORECASE:
 | 
						|
		append(OPCODES[OP_IGNORE[op]])
 | 
						|
		append(ord(av.lower()))
 | 
						|
	    else:
 | 
						|
		append(OPCODES[op])
 | 
						|
		append(ord(av))
 | 
						|
	elif op is MARK:
 | 
						|
	    append(OPCODES[op])
 | 
						|
	    append(av)
 | 
						|
 	elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
 | 
						|
	    lo, hi = av[2].getwidth()
 | 
						|
 	    if lo == 0:
 | 
						|
 		raise SyntaxError, "cannot repeat zero-width items"
 | 
						|
	    if lo == hi == 1 and op is MAX_REPEAT:
 | 
						|
		append(OPCODES[MAX_REPEAT_ONE])
 | 
						|
		skip = len(code); append(0)
 | 
						|
		append(av[0])
 | 
						|
		append(av[1])
 | 
						|
		_compile(code, av[2], flags, level+1)
 | 
						|
		append(OPCODES[SUCCESS])
 | 
						|
		code[skip] = len(code) - skip
 | 
						|
	    else:
 | 
						|
		append(OPCODES[op])
 | 
						|
		skip = len(code); append(0)
 | 
						|
		append(av[0])
 | 
						|
		append(av[1])
 | 
						|
		_compile(code, av[2], flags, level+1)
 | 
						|
		if op is MIN_REPEAT:
 | 
						|
		    append(OPCODES[MIN_UNTIL])
 | 
						|
		else:
 | 
						|
		    append(OPCODES[MAX_UNTIL])
 | 
						|
		code[skip] = len(code) - skip
 | 
						|
	elif op is SUBPATTERN:
 | 
						|
 	    group = av[0]
 | 
						|
 	    if group:
 | 
						|
 		append(OPCODES[MARK])
 | 
						|
 		append((group-1)*2)
 | 
						|
	    _compile(code, av[1], flags, level+1)
 | 
						|
 	    if group:
 | 
						|
 		append(OPCODES[MARK])
 | 
						|
 		append((group-1)*2+1)
 | 
						|
	else:
 | 
						|
	    raise ValueError, ("unsupported operand type", op)
 | 
						|
 | 
						|
def compile(p, flags=0):
 | 
						|
    # convert pattern list to internal format
 | 
						|
    if type(p) in (type(""), type(u"")):
 | 
						|
	import sre_parse
 | 
						|
	pattern = p
 | 
						|
	p = sre_parse.parse(p)
 | 
						|
    else:
 | 
						|
	pattern = None
 | 
						|
    flags = p.pattern.flags | flags
 | 
						|
    code = Code()
 | 
						|
    _compile(code, p.data, flags)
 | 
						|
    code.append(OPCODES[SUCCESS])
 | 
						|
    data = code.todata()
 | 
						|
    if 0: # debugging
 | 
						|
	print
 | 
						|
	print "-" * 68
 | 
						|
	import sre_disasm
 | 
						|
	sre_disasm.disasm(data)
 | 
						|
	print "-" * 68
 | 
						|
    return _sre.compile(
 | 
						|
	pattern, flags,
 | 
						|
	data,
 | 
						|
	p.pattern.groups-1, p.pattern.groupdict
 | 
						|
	)
 |