mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 07:31:38 +00:00 
			
		
		
		
	svn+ssh://pythondev@svn.python.org/sandbox/trunk/2to3/lib2to3 ........ r67384 | benjamin.peterson | 2008-11-25 16:13:31 -0600 (Tue, 25 Nov 2008) | 4 lines don't duplicate calls to start_tree() RefactoringTool.pre_order values now holds a list of the fixers while pre_order_mapping holds the dict. ........ r67386 | benjamin.peterson | 2008-11-25 16:44:52 -0600 (Tue, 25 Nov 2008) | 1 line #4423 fix_imports was still replacing usage of a module if attributes were being used ........ r67387 | benjamin.peterson | 2008-11-25 16:47:54 -0600 (Tue, 25 Nov 2008) | 1 line fix broken test ........ r67389 | benjamin.peterson | 2008-11-25 17:13:17 -0600 (Tue, 25 Nov 2008) | 1 line remove compatibility code; we only cater to 2.5+ ........ r67390 | benjamin.peterson | 2008-11-25 22:03:36 -0600 (Tue, 25 Nov 2008) | 1 line fix #3994; the usage of changed imports was fixed in nested cases ........ r67392 | benjamin.peterson | 2008-11-26 11:11:40 -0600 (Wed, 26 Nov 2008) | 1 line simpilfy and comment fix_imports ........ r67399 | benjamin.peterson | 2008-11-26 11:47:03 -0600 (Wed, 26 Nov 2008) | 1 line remove more compatibility code ........ r67400 | benjamin.peterson | 2008-11-26 12:07:41 -0600 (Wed, 26 Nov 2008) | 1 line set svn:ignore ........ r67403 | benjamin.peterson | 2008-11-26 13:11:11 -0600 (Wed, 26 Nov 2008) | 1 line wrap import ........ r67404 | benjamin.peterson | 2008-11-26 13:29:49 -0600 (Wed, 26 Nov 2008) | 1 line build the fix_imports pattern in compile_pattern, so MAPPING can be changed and reflected in the pattern ........ r67405 | benjamin.peterson | 2008-11-26 14:01:24 -0600 (Wed, 26 Nov 2008) | 1 line stop ugly messages about runtime errors being from printed ........ r67426 | benjamin.peterson | 2008-11-28 16:01:40 -0600 (Fri, 28 Nov 2008) | 5 lines don't replace a module name if it is in the middle of a attribute lookup This fix also stops module names from being replaced if they are not in an attribute lookup. ........
		
			
				
	
	
		
			201 lines
		
	
	
	
		
			7.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			201 lines
		
	
	
	
		
			7.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
 | 
						|
# Licensed to PSF under a Contributor Agreement.
 | 
						|
 | 
						|
"""Parser engine for the grammar tables generated by pgen.
 | 
						|
 | 
						|
The grammar table must be loaded first.
 | 
						|
 | 
						|
See Parser/parser.c in the Python distribution for additional info on
 | 
						|
how this parsing engine works.
 | 
						|
 | 
						|
"""
 | 
						|
 | 
						|
# Local imports
 | 
						|
from . import token
 | 
						|
 | 
						|
class ParseError(Exception):
 | 
						|
    """Exception to signal the parser is stuck."""
 | 
						|
 | 
						|
    def __init__(self, msg, type, value, context):
 | 
						|
        Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %
 | 
						|
                           (msg, type, value, context))
 | 
						|
        self.msg = msg
 | 
						|
        self.type = type
 | 
						|
        self.value = value
 | 
						|
        self.context = context
 | 
						|
 | 
						|
class Parser(object):
 | 
						|
    """Parser engine.
 | 
						|
 | 
						|
    The proper usage sequence is:
 | 
						|
 | 
						|
    p = Parser(grammar, [converter])  # create instance
 | 
						|
    p.setup([start])                  # prepare for parsing
 | 
						|
    <for each input token>:
 | 
						|
        if p.addtoken(...):           # parse a token; may raise ParseError
 | 
						|
            break
 | 
						|
    root = p.rootnode                 # root of abstract syntax tree
 | 
						|
 | 
						|
    A Parser instance may be reused by calling setup() repeatedly.
 | 
						|
 | 
						|
    A Parser instance contains state pertaining to the current token
 | 
						|
    sequence, and should not be used concurrently by different threads
 | 
						|
    to parse separate token sequences.
 | 
						|
 | 
						|
    See driver.py for how to get input tokens by tokenizing a file or
 | 
						|
    string.
 | 
						|
 | 
						|
    Parsing is complete when addtoken() returns True; the root of the
 | 
						|
    abstract syntax tree can then be retrieved from the rootnode
 | 
						|
    instance variable.  When a syntax error occurs, addtoken() raises
 | 
						|
    the ParseError exception.  There is no error recovery; the parser
 | 
						|
    cannot be used after a syntax error was reported (but it can be
 | 
						|
    reinitialized by calling setup()).
 | 
						|
 | 
						|
    """
 | 
						|
 | 
						|
    def __init__(self, grammar, convert=None):
 | 
						|
        """Constructor.
 | 
						|
 | 
						|
        The grammar argument is a grammar.Grammar instance; see the
 | 
						|
        grammar module for more information.
 | 
						|
 | 
						|
        The parser is not ready yet for parsing; you must call the
 | 
						|
        setup() method to get it started.
 | 
						|
 | 
						|
        The optional convert argument is a function mapping concrete
 | 
						|
        syntax tree nodes to abstract syntax tree nodes.  If not
 | 
						|
        given, no conversion is done and the syntax tree produced is
 | 
						|
        the concrete syntax tree.  If given, it must be a function of
 | 
						|
        two arguments, the first being the grammar (a grammar.Grammar
 | 
						|
        instance), and the second being the concrete syntax tree node
 | 
						|
        to be converted.  The syntax tree is converted from the bottom
 | 
						|
        up.
 | 
						|
 | 
						|
        A concrete syntax tree node is a (type, value, context, nodes)
 | 
						|
        tuple, where type is the node type (a token or symbol number),
 | 
						|
        value is None for symbols and a string for tokens, context is
 | 
						|
        None or an opaque value used for error reporting (typically a
 | 
						|
        (lineno, offset) pair), and nodes is a list of children for
 | 
						|
        symbols, and None for tokens.
 | 
						|
 | 
						|
        An abstract syntax tree node may be anything; this is entirely
 | 
						|
        up to the converter function.
 | 
						|
 | 
						|
        """
 | 
						|
        self.grammar = grammar
 | 
						|
        self.convert = convert or (lambda grammar, node: node)
 | 
						|
 | 
						|
    def setup(self, start=None):
 | 
						|
        """Prepare for parsing.
 | 
						|
 | 
						|
        This *must* be called before starting to parse.
 | 
						|
 | 
						|
        The optional argument is an alternative start symbol; it
 | 
						|
        defaults to the grammar's start symbol.
 | 
						|
 | 
						|
        You can use a Parser instance to parse any number of programs;
 | 
						|
        each time you call setup() the parser is reset to an initial
 | 
						|
        state determined by the (implicit or explicit) start symbol.
 | 
						|
 | 
						|
        """
 | 
						|
        if start is None:
 | 
						|
            start = self.grammar.start
 | 
						|
        # Each stack entry is a tuple: (dfa, state, node).
 | 
						|
        # A node is a tuple: (type, value, context, children),
 | 
						|
        # where children is a list of nodes or None, and context may be None.
 | 
						|
        newnode = (start, None, None, [])
 | 
						|
        stackentry = (self.grammar.dfas[start], 0, newnode)
 | 
						|
        self.stack = [stackentry]
 | 
						|
        self.rootnode = None
 | 
						|
        self.used_names = set() # Aliased to self.rootnode.used_names in pop()
 | 
						|
 | 
						|
    def addtoken(self, type, value, context):
 | 
						|
        """Add a token; return True iff this is the end of the program."""
 | 
						|
        # Map from token to label
 | 
						|
        ilabel = self.classify(type, value, context)
 | 
						|
        # Loop until the token is shifted; may raise exceptions
 | 
						|
        while True:
 | 
						|
            dfa, state, node = self.stack[-1]
 | 
						|
            states, first = dfa
 | 
						|
            arcs = states[state]
 | 
						|
            # Look for a state with this label
 | 
						|
            for i, newstate in arcs:
 | 
						|
                t, v = self.grammar.labels[i]
 | 
						|
                if ilabel == i:
 | 
						|
                    # Look it up in the list of labels
 | 
						|
                    assert t < 256
 | 
						|
                    # Shift a token; we're done with it
 | 
						|
                    self.shift(type, value, newstate, context)
 | 
						|
                    # Pop while we are in an accept-only state
 | 
						|
                    state = newstate
 | 
						|
                    while states[state] == [(0, state)]:
 | 
						|
                        self.pop()
 | 
						|
                        if not self.stack:
 | 
						|
                            # Done parsing!
 | 
						|
                            return True
 | 
						|
                        dfa, state, node = self.stack[-1]
 | 
						|
                        states, first = dfa
 | 
						|
                    # Done with this token
 | 
						|
                    return False
 | 
						|
                elif t >= 256:
 | 
						|
                    # See if it's a symbol and if we're in its first set
 | 
						|
                    itsdfa = self.grammar.dfas[t]
 | 
						|
                    itsstates, itsfirst = itsdfa
 | 
						|
                    if ilabel in itsfirst:
 | 
						|
                        # Push a symbol
 | 
						|
                        self.push(t, self.grammar.dfas[t], newstate, context)
 | 
						|
                        break # To continue the outer while loop
 | 
						|
            else:
 | 
						|
                if (0, state) in arcs:
 | 
						|
                    # An accepting state, pop it and try something else
 | 
						|
                    self.pop()
 | 
						|
                    if not self.stack:
 | 
						|
                        # Done parsing, but another token is input
 | 
						|
                        raise ParseError("too much input",
 | 
						|
                                         type, value, context)
 | 
						|
                else:
 | 
						|
                    # No success finding a transition
 | 
						|
                    raise ParseError("bad input", type, value, context)
 | 
						|
 | 
						|
    def classify(self, type, value, context):
 | 
						|
        """Turn a token into a label.  (Internal)"""
 | 
						|
        if type == token.NAME:
 | 
						|
            # Keep a listing of all used names
 | 
						|
            self.used_names.add(value)
 | 
						|
            # Check for reserved words
 | 
						|
            ilabel = self.grammar.keywords.get(value)
 | 
						|
            if ilabel is not None:
 | 
						|
                return ilabel
 | 
						|
        ilabel = self.grammar.tokens.get(type)
 | 
						|
        if ilabel is None:
 | 
						|
            raise ParseError("bad token", type, value, context)
 | 
						|
        return ilabel
 | 
						|
 | 
						|
    def shift(self, type, value, newstate, context):
 | 
						|
        """Shift a token.  (Internal)"""
 | 
						|
        dfa, state, node = self.stack[-1]
 | 
						|
        newnode = (type, value, context, None)
 | 
						|
        newnode = self.convert(self.grammar, newnode)
 | 
						|
        if newnode is not None:
 | 
						|
            node[-1].append(newnode)
 | 
						|
        self.stack[-1] = (dfa, newstate, node)
 | 
						|
 | 
						|
    def push(self, type, newdfa, newstate, context):
 | 
						|
        """Push a nonterminal.  (Internal)"""
 | 
						|
        dfa, state, node = self.stack[-1]
 | 
						|
        newnode = (type, None, context, [])
 | 
						|
        self.stack[-1] = (dfa, newstate, node)
 | 
						|
        self.stack.append((newdfa, 0, newnode))
 | 
						|
 | 
						|
    def pop(self):
 | 
						|
        """Pop a nonterminal.  (Internal)"""
 | 
						|
        popdfa, popstate, popnode = self.stack.pop()
 | 
						|
        newnode = self.convert(self.grammar, popnode)
 | 
						|
        if newnode is not None:
 | 
						|
            if self.stack:
 | 
						|
                dfa, state, node = self.stack[-1]
 | 
						|
                node[-1].append(newnode)
 | 
						|
            else:
 | 
						|
                self.rootnode = newnode
 | 
						|
                self.rootnode.used_names = self.used_names
 |