mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	 b0871cac11
			
		
	
	
		b0871cac11
		
	
	
	
	
		
			
			svn+ssh://pythondev@svn.python.org/python/branches/py3k
................
  r85510 | benjamin.peterson | 2010-10-14 18:00:04 -0500 (Thu, 14 Oct 2010) | 61 lines
  Merged revisions 83852-83853,83857,84042,84216,84274-84276,84375,85388,85478,85506-85508 via svnmerge from
  svn+ssh://pythondev@svn.python.org/sandbox/trunk/2to3/lib2to3
  ........
    r83852 | benjamin.peterson | 2010-08-08 15:45:44 -0500 (Sun, 08 Aug 2010) | 1 line
    wrap with parens
  ........
    r83853 | benjamin.peterson | 2010-08-08 15:46:31 -0500 (Sun, 08 Aug 2010) | 1 line
    use parens
  ........
    r83857 | benjamin.peterson | 2010-08-08 15:59:49 -0500 (Sun, 08 Aug 2010) | 1 line
    things which use touch_import should be pre order
  ........
    r84042 | george.boutsioukis | 2010-08-14 16:10:19 -0500 (Sat, 14 Aug 2010) | 2 lines
    This revision incorporates into the 2to3 tool the new, faster, tree matching algorithm developed during a GSOC project. The algorithm resides in the two added modules, btm_matcher and btm_utils. New code has been added to drive the new matching process in refactor.py and a few minor changes were made in other modules. A BM_compatible flag(False by default) has been added in fixer_base and it is set to True in most of the current fixers.
  ........
    r84216 | benjamin.peterson | 2010-08-19 16:44:05 -0500 (Thu, 19 Aug 2010) | 1 line
    allow star_expr in testlist_gexp
  ........
    r84274 | benjamin.peterson | 2010-08-22 18:40:46 -0500 (Sun, 22 Aug 2010) | 1 line
    wrap long line
  ........
    r84275 | benjamin.peterson | 2010-08-22 18:42:22 -0500 (Sun, 22 Aug 2010) | 1 line
    cleanup
  ........
    r84276 | benjamin.peterson | 2010-08-22 18:51:01 -0500 (Sun, 22 Aug 2010) | 1 line
    when there's a None value and a traceback, don't call type with it #9661
  ........
    r84375 | george.boutsioukis | 2010-08-31 08:38:53 -0500 (Tue, 31 Aug 2010) | 3 lines
    Idiomatic code changes & stylistic issues fixed in the BottomMatcher module. Thanks to Benjamin Peterson for taking the time to review the code.
  ........
    r85388 | benjamin.peterson | 2010-10-12 17:27:44 -0500 (Tue, 12 Oct 2010) | 1 line
    fix urllib fixer with multiple as imports on a line #10069
  ........
    r85478 | benjamin.peterson | 2010-10-14 08:09:56 -0500 (Thu, 14 Oct 2010) | 1 line
    stop abusing docstrings
  ........
    r85506 | benjamin.peterson | 2010-10-14 17:45:19 -0500 (Thu, 14 Oct 2010) | 1 line
    kill sibling import
  ........
    r85507 | benjamin.peterson | 2010-10-14 17:54:15 -0500 (Thu, 14 Oct 2010) | 1 line
    remove trailing whitespace
  ........
    r85508 | benjamin.peterson | 2010-10-14 17:55:28 -0500 (Thu, 14 Oct 2010) | 1 line
    typo
  ........
................
		
	
			
		
			
				
	
	
		
			204 lines
		
	
	
	
		
			6.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			204 lines
		
	
	
	
		
			6.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # Copyright 2006 Google, Inc. All Rights Reserved.
 | |
| # Licensed to PSF under a Contributor Agreement.
 | |
| 
 | |
| """Pattern compiler.
 | |
| 
 | |
| The grammer is taken from PatternGrammar.txt.
 | |
| 
 | |
| The compiler compiles a pattern to a pytree.*Pattern instance.
 | |
| """
 | |
| 
 | |
| __author__ = "Guido van Rossum <guido@python.org>"
 | |
| 
 | |
| # Python imports
 | |
| import os
 | |
| 
 | |
| # Fairly local imports
 | |
| from .pgen2 import driver, literals, token, tokenize, parse, grammar
 | |
| 
 | |
| # Really local imports
 | |
| from . import pytree
 | |
| from . import pygram
 | |
| 
 | |
| # The pattern grammar file
 | |
| _PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__),
 | |
|                                      "PatternGrammar.txt")
 | |
| 
 | |
| 
 | |
| class PatternSyntaxError(Exception):
 | |
|     pass
 | |
| 
 | |
| 
 | |
| def tokenize_wrapper(input):
 | |
|     """Tokenizes a string suppressing significant whitespace."""
 | |
|     skip = set((token.NEWLINE, token.INDENT, token.DEDENT))
 | |
|     tokens = tokenize.generate_tokens(driver.generate_lines(input).__next__)
 | |
|     for quintuple in tokens:
 | |
|         type, value, start, end, line_text = quintuple
 | |
|         if type not in skip:
 | |
|             yield quintuple
 | |
| 
 | |
| 
 | |
| class PatternCompiler(object):
 | |
| 
 | |
|     def __init__(self, grammar_file=_PATTERN_GRAMMAR_FILE):
 | |
|         """Initializer.
 | |
| 
 | |
|         Takes an optional alternative filename for the pattern grammar.
 | |
|         """
 | |
|         self.grammar = driver.load_grammar(grammar_file)
 | |
|         self.syms = pygram.Symbols(self.grammar)
 | |
|         self.pygrammar = pygram.python_grammar
 | |
|         self.pysyms = pygram.python_symbols
 | |
|         self.driver = driver.Driver(self.grammar, convert=pattern_convert)
 | |
| 
 | |
|     def compile_pattern(self, input, debug=False, with_tree=False):
 | |
|         """Compiles a pattern string to a nested pytree.*Pattern object."""
 | |
|         tokens = tokenize_wrapper(input)
 | |
|         try:
 | |
|             root = self.driver.parse_tokens(tokens, debug=debug)
 | |
|         except parse.ParseError as e:
 | |
|             raise PatternSyntaxError(str(e))
 | |
|         if with_tree:
 | |
|             return self.compile_node(root), root
 | |
|         else:
 | |
|             return self.compile_node(root)
 | |
| 
 | |
|     def compile_node(self, node):
 | |
|         """Compiles a node, recursively.
 | |
| 
 | |
|         This is one big switch on the node type.
 | |
|         """
 | |
|         # XXX Optimize certain Wildcard-containing-Wildcard patterns
 | |
|         # that can be merged
 | |
|         if node.type == self.syms.Matcher:
 | |
|             node = node.children[0] # Avoid unneeded recursion
 | |
| 
 | |
|         if node.type == self.syms.Alternatives:
 | |
|             # Skip the odd children since they are just '|' tokens
 | |
|             alts = [self.compile_node(ch) for ch in node.children[::2]]
 | |
|             if len(alts) == 1:
 | |
|                 return alts[0]
 | |
|             p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1)
 | |
|             return p.optimize()
 | |
| 
 | |
|         if node.type == self.syms.Alternative:
 | |
|             units = [self.compile_node(ch) for ch in node.children]
 | |
|             if len(units) == 1:
 | |
|                 return units[0]
 | |
|             p = pytree.WildcardPattern([units], min=1, max=1)
 | |
|             return p.optimize()
 | |
| 
 | |
|         if node.type == self.syms.NegatedUnit:
 | |
|             pattern = self.compile_basic(node.children[1:])
 | |
|             p = pytree.NegatedPattern(pattern)
 | |
|             return p.optimize()
 | |
| 
 | |
|         assert node.type == self.syms.Unit
 | |
| 
 | |
|         name = None
 | |
|         nodes = node.children
 | |
|         if len(nodes) >= 3 and nodes[1].type == token.EQUAL:
 | |
|             name = nodes[0].value
 | |
|             nodes = nodes[2:]
 | |
|         repeat = None
 | |
|         if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater:
 | |
|             repeat = nodes[-1]
 | |
|             nodes = nodes[:-1]
 | |
| 
 | |
|         # Now we've reduced it to: STRING | NAME [Details] | (...) | [...]
 | |
|         pattern = self.compile_basic(nodes, repeat)
 | |
| 
 | |
|         if repeat is not None:
 | |
|             assert repeat.type == self.syms.Repeater
 | |
|             children = repeat.children
 | |
|             child = children[0]
 | |
|             if child.type == token.STAR:
 | |
|                 min = 0
 | |
|                 max = pytree.HUGE
 | |
|             elif child.type == token.PLUS:
 | |
|                 min = 1
 | |
|                 max = pytree.HUGE
 | |
|             elif child.type == token.LBRACE:
 | |
|                 assert children[-1].type == token.RBRACE
 | |
|                 assert  len(children) in (3, 5)
 | |
|                 min = max = self.get_int(children[1])
 | |
|                 if len(children) == 5:
 | |
|                     max = self.get_int(children[3])
 | |
|             else:
 | |
|                 assert False
 | |
|             if min != 1 or max != 1:
 | |
|                 pattern = pattern.optimize()
 | |
|                 pattern = pytree.WildcardPattern([[pattern]], min=min, max=max)
 | |
| 
 | |
|         if name is not None:
 | |
|             pattern.name = name
 | |
|         return pattern.optimize()
 | |
| 
 | |
|     def compile_basic(self, nodes, repeat=None):
 | |
|         # Compile STRING | NAME [Details] | (...) | [...]
 | |
|         assert len(nodes) >= 1
 | |
|         node = nodes[0]
 | |
|         if node.type == token.STRING:
 | |
|             value = str(literals.evalString(node.value))
 | |
|             return pytree.LeafPattern(_type_of_literal(value), value)
 | |
|         elif node.type == token.NAME:
 | |
|             value = node.value
 | |
|             if value.isupper():
 | |
|                 if value not in TOKEN_MAP:
 | |
|                     raise PatternSyntaxError("Invalid token: %r" % value)
 | |
|                 if nodes[1:]:
 | |
|                     raise PatternSyntaxError("Can't have details for token")
 | |
|                 return pytree.LeafPattern(TOKEN_MAP[value])
 | |
|             else:
 | |
|                 if value == "any":
 | |
|                     type = None
 | |
|                 elif not value.startswith("_"):
 | |
|                     type = getattr(self.pysyms, value, None)
 | |
|                     if type is None:
 | |
|                         raise PatternSyntaxError("Invalid symbol: %r" % value)
 | |
|                 if nodes[1:]: # Details present
 | |
|                     content = [self.compile_node(nodes[1].children[1])]
 | |
|                 else:
 | |
|                     content = None
 | |
|                 return pytree.NodePattern(type, content)
 | |
|         elif node.value == "(":
 | |
|             return self.compile_node(nodes[1])
 | |
|         elif node.value == "[":
 | |
|             assert repeat is None
 | |
|             subpattern = self.compile_node(nodes[1])
 | |
|             return pytree.WildcardPattern([[subpattern]], min=0, max=1)
 | |
|         assert False, node
 | |
| 
 | |
|     def get_int(self, node):
 | |
|         assert node.type == token.NUMBER
 | |
|         return int(node.value)
 | |
| 
 | |
| 
 | |
| # Map named tokens to the type value for a LeafPattern
 | |
| TOKEN_MAP = {"NAME": token.NAME,
 | |
|              "STRING": token.STRING,
 | |
|              "NUMBER": token.NUMBER,
 | |
|              "TOKEN": None}
 | |
| 
 | |
| 
 | |
| def _type_of_literal(value):
 | |
|     if value[0].isalpha():
 | |
|         return token.NAME
 | |
|     elif value in grammar.opmap:
 | |
|         return grammar.opmap[value]
 | |
|     else:
 | |
|         return None
 | |
| 
 | |
| 
 | |
| def pattern_convert(grammar, raw_node_info):
 | |
|     """Converts raw node information to a Node or Leaf instance."""
 | |
|     type, value, context, children = raw_node_info
 | |
|     if children or type in grammar.number2symbol:
 | |
|         return pytree.Node(type, children, context=context)
 | |
|     else:
 | |
|         return pytree.Leaf(type, value, context=context)
 | |
| 
 | |
| 
 | |
| def compile_pattern(pattern):
 | |
|     return PatternCompiler().compile_pattern(pattern)
 |