mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 07:31:38 +00:00 
			
		
		
		
	svn+ssh://pythondev@svn.python.org/sandbox/trunk/2to3/lib2to3 ........ r73771 | benjamin.peterson | 2009-07-02 10:56:55 -0500 (Thu, 02 Jul 2009) | 1 line force the imports fixer to be run after the import one #6400 ........ r73811 | benjamin.peterson | 2009-07-03 09:03:14 -0500 (Fri, 03 Jul 2009) | 1 line check for sep, not pathsep when looking for a subpackage #6408 ........ r73840 | benjamin.peterson | 2009-07-04 09:52:28 -0500 (Sat, 04 Jul 2009) | 1 line don't print diffs by default; it's annoying ........ r73842 | benjamin.peterson | 2009-07-04 09:58:46 -0500 (Sat, 04 Jul 2009) | 1 line complain when not showing diffs or writing ........ r73848 | alexandre.vassalotti | 2009-07-04 23:38:19 -0500 (Sat, 04 Jul 2009) | 2 lines Fix test_refactor_stdin to handle print_output() method with 4 arguments. ........ r73849 | alexandre.vassalotti | 2009-07-04 23:43:18 -0500 (Sat, 04 Jul 2009) | 5 lines Issue 2370: Add fixer for the removal of operator.isCallable() and operator.sequenceIncludes(). Patch contributed by Jeff Balogh (and updated by me). ........ r73861 | benjamin.peterson | 2009-07-05 09:15:53 -0500 (Sun, 05 Jul 2009) | 1 line cleanup and use unicode where appropiate ........ r73957 | benjamin.peterson | 2009-07-11 15:49:56 -0500 (Sat, 11 Jul 2009) | 1 line fix calls to str() with unicode() ........ r73958 | benjamin.peterson | 2009-07-11 15:51:51 -0500 (Sat, 11 Jul 2009) | 1 line more str() -> unicode() ........ r73959 | benjamin.peterson | 2009-07-11 16:40:08 -0500 (Sat, 11 Jul 2009) | 1 line add tests for refactor_dir() ........ r73960 | benjamin.peterson | 2009-07-11 16:44:32 -0500 (Sat, 11 Jul 2009) | 1 line don't parse files just because they end with 'py' (no dot) ........ r73964 | benjamin.peterson | 2009-07-11 17:30:15 -0500 (Sat, 11 Jul 2009) | 1 line simplify ........ r73965 | benjamin.peterson | 2009-07-11 17:31:30 -0500 (Sat, 11 Jul 2009) | 1 line remove usage of get_prefix() ........ r73966 | benjamin.peterson | 2009-07-11 17:33:35 -0500 (Sat, 11 Jul 2009) | 1 line revert unintended change in 73965 ........ r73967 | benjamin.peterson | 2009-07-11 17:34:44 -0500 (Sat, 11 Jul 2009) | 1 line avoid expensive checks and assume the node did change ........ r73968 | benjamin.peterson | 2009-07-11 20:46:46 -0500 (Sat, 11 Jul 2009) | 1 line use a regular dict for the heads to avoid adding lists in the loop ........ r73969 | benjamin.peterson | 2009-07-11 20:50:43 -0500 (Sat, 11 Jul 2009) | 1 line prefix headnode functions with '_' ........ r73972 | benjamin.peterson | 2009-07-11 21:25:45 -0500 (Sat, 11 Jul 2009) | 1 line try to make the head node dict as sparse as possible ........ r73973 | benjamin.peterson | 2009-07-11 21:59:49 -0500 (Sat, 11 Jul 2009) | 1 line a better idea; add an option to *not* print diffs ........ r73974 | benjamin.peterson | 2009-07-11 22:00:29 -0500 (Sat, 11 Jul 2009) | 1 line add space ........ r73977 | benjamin.peterson | 2009-07-12 10:16:07 -0500 (Sun, 12 Jul 2009) | 1 line update get_headnode_dict tests for recent changes ........ r73981 | benjamin.peterson | 2009-07-12 12:06:39 -0500 (Sun, 12 Jul 2009) | 4 lines detect when "from __future__ import print_function" is given Deprecate the 'print_function' option and the -p flag ........ r73984 | benjamin.peterson | 2009-07-12 16:16:37 -0500 (Sun, 12 Jul 2009) | 1 line add tests for Call; thanks Joe Amenta ........ r74065 | benjamin.peterson | 2009-07-17 12:52:49 -0500 (Fri, 17 Jul 2009) | 1 line pathname2url and url2pathname are in urllib.request not urllib.parse #6496 ........ r74113 | benjamin.peterson | 2009-07-20 08:56:57 -0500 (Mon, 20 Jul 2009) | 1 line fix deprecation warnings in tests ........
		
			
				
	
	
		
			201 lines
		
	
	
	
		
			6.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			201 lines
		
	
	
	
		
			6.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
# Copyright 2006 Google, Inc. All Rights Reserved.
 | 
						|
# Licensed to PSF under a Contributor Agreement.
 | 
						|
 | 
						|
"""Pattern compiler.
 | 
						|
 | 
						|
The grammer is taken from PatternGrammar.txt.
 | 
						|
 | 
						|
The compiler compiles a pattern to a pytree.*Pattern instance.
 | 
						|
"""
 | 
						|
 | 
						|
__author__ = "Guido van Rossum <guido@python.org>"
 | 
						|
 | 
						|
# Python imports
 | 
						|
import os
 | 
						|
 | 
						|
# Fairly local imports
 | 
						|
from .pgen2 import driver, literals, token, tokenize, parse, grammar
 | 
						|
 | 
						|
# Really local imports
 | 
						|
from . import pytree
 | 
						|
from . import pygram
 | 
						|
 | 
						|
# The pattern grammar file
 | 
						|
_PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__),
 | 
						|
                                     "PatternGrammar.txt")
 | 
						|
 | 
						|
 | 
						|
class PatternSyntaxError(Exception):
 | 
						|
    pass
 | 
						|
 | 
						|
 | 
						|
def tokenize_wrapper(input):
 | 
						|
    """Tokenizes a string suppressing significant whitespace."""
 | 
						|
    skip = set((token.NEWLINE, token.INDENT, token.DEDENT))
 | 
						|
    tokens = tokenize.generate_tokens(driver.generate_lines(input).next)
 | 
						|
    for quintuple in tokens:
 | 
						|
        type, value, start, end, line_text = quintuple
 | 
						|
        if type not in skip:
 | 
						|
            yield quintuple
 | 
						|
 | 
						|
 | 
						|
class PatternCompiler(object):
 | 
						|
 | 
						|
    def __init__(self, grammar_file=_PATTERN_GRAMMAR_FILE):
 | 
						|
        """Initializer.
 | 
						|
 | 
						|
        Takes an optional alternative filename for the pattern grammar.
 | 
						|
        """
 | 
						|
        self.grammar = driver.load_grammar(grammar_file)
 | 
						|
        self.syms = pygram.Symbols(self.grammar)
 | 
						|
        self.pygrammar = pygram.python_grammar
 | 
						|
        self.pysyms = pygram.python_symbols
 | 
						|
        self.driver = driver.Driver(self.grammar, convert=pattern_convert)
 | 
						|
 | 
						|
    def compile_pattern(self, input, debug=False):
 | 
						|
        """Compiles a pattern string to a nested pytree.*Pattern object."""
 | 
						|
        tokens = tokenize_wrapper(input)
 | 
						|
        try:
 | 
						|
            root = self.driver.parse_tokens(tokens, debug=debug)
 | 
						|
        except parse.ParseError as e:
 | 
						|
            raise PatternSyntaxError(str(e))
 | 
						|
        return self.compile_node(root)
 | 
						|
 | 
						|
    def compile_node(self, node):
 | 
						|
        """Compiles a node, recursively.
 | 
						|
 | 
						|
        This is one big switch on the node type.
 | 
						|
        """
 | 
						|
        # XXX Optimize certain Wildcard-containing-Wildcard patterns
 | 
						|
        # that can be merged
 | 
						|
        if node.type == self.syms.Matcher:
 | 
						|
            node = node.children[0] # Avoid unneeded recursion
 | 
						|
 | 
						|
        if node.type == self.syms.Alternatives:
 | 
						|
            # Skip the odd children since they are just '|' tokens
 | 
						|
            alts = [self.compile_node(ch) for ch in node.children[::2]]
 | 
						|
            if len(alts) == 1:
 | 
						|
                return alts[0]
 | 
						|
            p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1)
 | 
						|
            return p.optimize()
 | 
						|
 | 
						|
        if node.type == self.syms.Alternative:
 | 
						|
            units = [self.compile_node(ch) for ch in node.children]
 | 
						|
            if len(units) == 1:
 | 
						|
                return units[0]
 | 
						|
            p = pytree.WildcardPattern([units], min=1, max=1)
 | 
						|
            return p.optimize()
 | 
						|
 | 
						|
        if node.type == self.syms.NegatedUnit:
 | 
						|
            pattern = self.compile_basic(node.children[1:])
 | 
						|
            p = pytree.NegatedPattern(pattern)
 | 
						|
            return p.optimize()
 | 
						|
 | 
						|
        assert node.type == self.syms.Unit
 | 
						|
 | 
						|
        name = None
 | 
						|
        nodes = node.children
 | 
						|
        if len(nodes) >= 3 and nodes[1].type == token.EQUAL:
 | 
						|
            name = nodes[0].value
 | 
						|
            nodes = nodes[2:]
 | 
						|
        repeat = None
 | 
						|
        if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater:
 | 
						|
            repeat = nodes[-1]
 | 
						|
            nodes = nodes[:-1]
 | 
						|
 | 
						|
        # Now we've reduced it to: STRING | NAME [Details] | (...) | [...]
 | 
						|
        pattern = self.compile_basic(nodes, repeat)
 | 
						|
 | 
						|
        if repeat is not None:
 | 
						|
            assert repeat.type == self.syms.Repeater
 | 
						|
            children = repeat.children
 | 
						|
            child = children[0]
 | 
						|
            if child.type == token.STAR:
 | 
						|
                min = 0
 | 
						|
                max = pytree.HUGE
 | 
						|
            elif child.type == token.PLUS:
 | 
						|
                min = 1
 | 
						|
                max = pytree.HUGE
 | 
						|
            elif child.type == token.LBRACE:
 | 
						|
                assert children[-1].type == token.RBRACE
 | 
						|
                assert  len(children) in (3, 5)
 | 
						|
                min = max = self.get_int(children[1])
 | 
						|
                if len(children) == 5:
 | 
						|
                    max = self.get_int(children[3])
 | 
						|
            else:
 | 
						|
                assert False
 | 
						|
            if min != 1 or max != 1:
 | 
						|
                pattern = pattern.optimize()
 | 
						|
                pattern = pytree.WildcardPattern([[pattern]], min=min, max=max)
 | 
						|
 | 
						|
        if name is not None:
 | 
						|
            pattern.name = name
 | 
						|
        return pattern.optimize()
 | 
						|
 | 
						|
    def compile_basic(self, nodes, repeat=None):
 | 
						|
        # Compile STRING | NAME [Details] | (...) | [...]
 | 
						|
        assert len(nodes) >= 1
 | 
						|
        node = nodes[0]
 | 
						|
        if node.type == token.STRING:
 | 
						|
            value = unicode(literals.evalString(node.value))
 | 
						|
            return pytree.LeafPattern(_type_of_literal(value), value)
 | 
						|
        elif node.type == token.NAME:
 | 
						|
            value = node.value
 | 
						|
            if value.isupper():
 | 
						|
                if value not in TOKEN_MAP:
 | 
						|
                    raise PatternSyntaxError("Invalid token: %r" % value)
 | 
						|
                if nodes[1:]:
 | 
						|
                    raise PatternSyntaxError("Can't have details for token")
 | 
						|
                return pytree.LeafPattern(TOKEN_MAP[value])
 | 
						|
            else:
 | 
						|
                if value == "any":
 | 
						|
                    type = None
 | 
						|
                elif not value.startswith("_"):
 | 
						|
                    type = getattr(self.pysyms, value, None)
 | 
						|
                    if type is None:
 | 
						|
                        raise PatternSyntaxError("Invalid symbol: %r" % value)
 | 
						|
                if nodes[1:]: # Details present
 | 
						|
                    content = [self.compile_node(nodes[1].children[1])]
 | 
						|
                else:
 | 
						|
                    content = None
 | 
						|
                return pytree.NodePattern(type, content)
 | 
						|
        elif node.value == "(":
 | 
						|
            return self.compile_node(nodes[1])
 | 
						|
        elif node.value == "[":
 | 
						|
            assert repeat is None
 | 
						|
            subpattern = self.compile_node(nodes[1])
 | 
						|
            return pytree.WildcardPattern([[subpattern]], min=0, max=1)
 | 
						|
        assert False, node
 | 
						|
 | 
						|
    def get_int(self, node):
 | 
						|
        assert node.type == token.NUMBER
 | 
						|
        return int(node.value)
 | 
						|
 | 
						|
 | 
						|
# Map named tokens to the type value for a LeafPattern
 | 
						|
TOKEN_MAP = {"NAME": token.NAME,
 | 
						|
             "STRING": token.STRING,
 | 
						|
             "NUMBER": token.NUMBER,
 | 
						|
             "TOKEN": None}
 | 
						|
 | 
						|
 | 
						|
def _type_of_literal(value):
 | 
						|
    if value[0].isalpha():
 | 
						|
        return token.NAME
 | 
						|
    elif value in grammar.opmap:
 | 
						|
        return grammar.opmap[value]
 | 
						|
    else:
 | 
						|
        return None
 | 
						|
 | 
						|
 | 
						|
def pattern_convert(grammar, raw_node_info):
 | 
						|
    """Converts raw node information to a Node or Leaf instance."""
 | 
						|
    type, value, context, children = raw_node_info
 | 
						|
    if children or type in grammar.number2symbol:
 | 
						|
        return pytree.Node(type, children, context=context)
 | 
						|
    else:
 | 
						|
        return pytree.Leaf(type, value, context=context)
 | 
						|
 | 
						|
 | 
						|
def compile_pattern(pattern):
 | 
						|
    return PatternCompiler().compile_pattern(pattern)
 |