| 
									
										
										
										
											2008-03-19 05:04:44 +00:00
										 |  |  | # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. | 
					
						
							|  |  |  | # Licensed to PSF under a Contributor Agreement. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | """This module defines the data structures used to represent a grammar.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | These are a bit arcane because they are derived from the data | 
					
						
							|  |  |  | structures used by Python's 'pgen' parser generator. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | There's also a table here mapping operators to their names in the | 
					
						
							|  |  |  | token module; the Python tokenize module reports all operators as the | 
					
						
							|  |  |  | fallback token code OP, but the parser needs the actual token code. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Python imports | 
					
						
							|  |  |  | import pickle | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Local imports | 
					
						
							|  |  |  | from . import token, tokenize | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class Grammar(object): | 
					
						
							| 
									
										
										
										
											2013-03-11 17:57:08 -04:00
										 |  |  |     """Pgen parsing tables conversion class.
 | 
					
						
							| 
									
										
										
										
											2008-03-19 05:04:44 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     Once initialized, this class supplies the grammar tables for the | 
					
						
							|  |  |  |     parsing engine implemented by parse.py.  The parsing engine | 
					
						
							|  |  |  |     accesses the instance variables directly.  The class here does not | 
					
						
							|  |  |  |     provide initialization of the tables; several subclasses exist to | 
					
						
							|  |  |  |     do this (see the conv and pgen modules). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     The load() method reads the tables from a pickle file, which is | 
					
						
							|  |  |  |     much faster than the other ways offered by subclasses.  The pickle | 
					
						
							|  |  |  |     file is written by calling dump() (after loading the grammar | 
					
						
							|  |  |  |     tables using a subclass).  The report() method prints a readable | 
					
						
							|  |  |  |     representation of the tables to stdout, for debugging. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     The instance variables are as follows: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     symbol2number -- a dict mapping symbol names to numbers.  Symbol | 
					
						
							|  |  |  |                      numbers are always 256 or higher, to distinguish | 
					
						
							|  |  |  |                      them from token numbers, which are between 0 and | 
					
						
							|  |  |  |                      255 (inclusive). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     number2symbol -- a dict mapping numbers to symbol names; | 
					
						
							|  |  |  |                      these two are each other's inverse. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     states        -- a list of DFAs, where each DFA is a list of | 
					
						
							| 
									
										
										
										
											2013-03-11 17:57:08 -04:00
										 |  |  |                      states, each state is a list of arcs, and each | 
					
						
							| 
									
										
										
										
											2008-03-19 05:04:44 +00:00
										 |  |  |                      arc is a (i, j) pair where i is a label and j is | 
					
						
							|  |  |  |                      a state number.  The DFA number is the index into | 
					
						
							|  |  |  |                      this list.  (This name is slightly confusing.) | 
					
						
							|  |  |  |                      Final states are represented by a special arc of | 
					
						
							|  |  |  |                      the form (0, j) where j is its own state number. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     dfas          -- a dict mapping symbol numbers to (DFA, first) | 
					
						
							|  |  |  |                      pairs, where DFA is an item from the states list | 
					
						
							|  |  |  |                      above, and first is a set of tokens that can | 
					
						
							|  |  |  |                      begin this grammar rule (represented by a dict | 
					
						
							|  |  |  |                      whose values are always 1). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     labels        -- a list of (x, y) pairs where x is either a token | 
					
						
							|  |  |  |                      number or a symbol number, and y is either None | 
					
						
							|  |  |  |                      or a string; the strings are keywords.  The label | 
					
						
							|  |  |  |                      number is the index in this list; label numbers | 
					
						
							|  |  |  |                      are used to mark state transitions (arcs) in the | 
					
						
							|  |  |  |                      DFAs. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     start         -- the number of the grammar's start symbol. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     keywords      -- a dict mapping keyword strings to arc labels. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     tokens        -- a dict mapping token numbers to arc labels. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							|  |  |  |         self.symbol2number = {} | 
					
						
							|  |  |  |         self.number2symbol = {} | 
					
						
							|  |  |  |         self.states = [] | 
					
						
							|  |  |  |         self.dfas = {} | 
					
						
							|  |  |  |         self.labels = [(0, "EMPTY")] | 
					
						
							|  |  |  |         self.keywords = {} | 
					
						
							|  |  |  |         self.tokens = {} | 
					
						
							|  |  |  |         self.symbol2label = {} | 
					
						
							|  |  |  |         self.start = 256 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def dump(self, filename): | 
					
						
							|  |  |  |         """Dump the grammar tables to a pickle file.""" | 
					
						
							| 
									
										
										
										
											2013-02-12 02:04:27 +01:00
										 |  |  |         with open(filename, "wb") as f: | 
					
						
							|  |  |  |             pickle.dump(self.__dict__, f, 2) | 
					
						
							| 
									
										
										
										
											2008-03-19 05:04:44 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def load(self, filename): | 
					
						
							|  |  |  |         """Load the grammar tables from a pickle file.""" | 
					
						
							| 
									
										
										
										
											2013-02-12 02:04:27 +01:00
										 |  |  |         with open(filename, "rb") as f: | 
					
						
							|  |  |  |             d = pickle.load(f) | 
					
						
							| 
									
										
										
										
											2008-03-19 05:04:44 +00:00
										 |  |  |         self.__dict__.update(d) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
											  
											
												Merged revisions 74114 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk
................
  r74114 | benjamin.peterson | 2009-07-20 10:33:09 -0500 (Mon, 20 Jul 2009) | 110 lines
  Merged revisions 73771,73811,73840,73842,73848-73849,73861,73957-73960,73964-73969,73972-73974,73977,73981,73984,74065,74113 via svnmerge from
  svn+ssh://pythondev@svn.python.org/sandbox/trunk/2to3/lib2to3
  ........
    r73771 | benjamin.peterson | 2009-07-02 10:56:55 -0500 (Thu, 02 Jul 2009) | 1 line
    force the imports fixer to be run after the import one #6400
  ........
    r73811 | benjamin.peterson | 2009-07-03 09:03:14 -0500 (Fri, 03 Jul 2009) | 1 line
    check for sep, not pathsep when looking for a subpackage #6408
  ........
    r73840 | benjamin.peterson | 2009-07-04 09:52:28 -0500 (Sat, 04 Jul 2009) | 1 line
    don't print diffs by default; it's annoying
  ........
    r73842 | benjamin.peterson | 2009-07-04 09:58:46 -0500 (Sat, 04 Jul 2009) | 1 line
    complain when not showing diffs or writing
  ........
    r73848 | alexandre.vassalotti | 2009-07-04 23:38:19 -0500 (Sat, 04 Jul 2009) | 2 lines
    Fix test_refactor_stdin to handle print_output() method with 4 arguments.
  ........
    r73849 | alexandre.vassalotti | 2009-07-04 23:43:18 -0500 (Sat, 04 Jul 2009) | 5 lines
    Issue 2370: Add fixer for the removal of operator.isCallable() and
    operator.sequenceIncludes().
    Patch contributed by Jeff Balogh (and updated by me).
  ........
    r73861 | benjamin.peterson | 2009-07-05 09:15:53 -0500 (Sun, 05 Jul 2009) | 1 line
    cleanup and use unicode where appropiate
  ........
    r73957 | benjamin.peterson | 2009-07-11 15:49:56 -0500 (Sat, 11 Jul 2009) | 1 line
    fix calls to str() with unicode()
  ........
    r73958 | benjamin.peterson | 2009-07-11 15:51:51 -0500 (Sat, 11 Jul 2009) | 1 line
    more str() -> unicode()
  ........
    r73959 | benjamin.peterson | 2009-07-11 16:40:08 -0500 (Sat, 11 Jul 2009) | 1 line
    add tests for refactor_dir()
  ........
    r73960 | benjamin.peterson | 2009-07-11 16:44:32 -0500 (Sat, 11 Jul 2009) | 1 line
    don't parse files just because they end with 'py' (no dot)
  ........
    r73964 | benjamin.peterson | 2009-07-11 17:30:15 -0500 (Sat, 11 Jul 2009) | 1 line
    simplify
  ........
    r73965 | benjamin.peterson | 2009-07-11 17:31:30 -0500 (Sat, 11 Jul 2009) | 1 line
    remove usage of get_prefix()
  ........
    r73966 | benjamin.peterson | 2009-07-11 17:33:35 -0500 (Sat, 11 Jul 2009) | 1 line
    revert unintended change in 73965
  ........
    r73967 | benjamin.peterson | 2009-07-11 17:34:44 -0500 (Sat, 11 Jul 2009) | 1 line
    avoid expensive checks and assume the node did change
  ........
    r73968 | benjamin.peterson | 2009-07-11 20:46:46 -0500 (Sat, 11 Jul 2009) | 1 line
    use a regular dict for the heads to avoid adding lists in the loop
  ........
    r73969 | benjamin.peterson | 2009-07-11 20:50:43 -0500 (Sat, 11 Jul 2009) | 1 line
    prefix headnode functions with '_'
  ........
    r73972 | benjamin.peterson | 2009-07-11 21:25:45 -0500 (Sat, 11 Jul 2009) | 1 line
    try to make the head node dict as sparse as possible
  ........
    r73973 | benjamin.peterson | 2009-07-11 21:59:49 -0500 (Sat, 11 Jul 2009) | 1 line
    a better idea; add an option to *not* print diffs
  ........
    r73974 | benjamin.peterson | 2009-07-11 22:00:29 -0500 (Sat, 11 Jul 2009) | 1 line
    add space
  ........
    r73977 | benjamin.peterson | 2009-07-12 10:16:07 -0500 (Sun, 12 Jul 2009) | 1 line
    update get_headnode_dict tests for recent changes
  ........
    r73981 | benjamin.peterson | 2009-07-12 12:06:39 -0500 (Sun, 12 Jul 2009) | 4 lines
    detect when "from __future__ import print_function" is given
    Deprecate the 'print_function' option and the -p flag
  ........
    r73984 | benjamin.peterson | 2009-07-12 16:16:37 -0500 (Sun, 12 Jul 2009) | 1 line
    add tests for Call; thanks Joe Amenta
  ........
    r74065 | benjamin.peterson | 2009-07-17 12:52:49 -0500 (Fri, 17 Jul 2009) | 1 line
    pathname2url and url2pathname are in urllib.request not urllib.parse #6496
  ........
    r74113 | benjamin.peterson | 2009-07-20 08:56:57 -0500 (Mon, 20 Jul 2009) | 1 line
    fix deprecation warnings in tests
  ........
................
											
										 
											2009-07-20 16:42:03 +00:00
										 |  |  |     def copy(self): | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         Copy the grammar. | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         new = self.__class__() | 
					
						
							|  |  |  |         for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords", | 
					
						
							|  |  |  |                           "tokens", "symbol2label"): | 
					
						
							|  |  |  |             setattr(new, dict_attr, getattr(self, dict_attr).copy()) | 
					
						
							|  |  |  |         new.labels = self.labels[:] | 
					
						
							|  |  |  |         new.states = self.states[:] | 
					
						
							|  |  |  |         new.start = self.start | 
					
						
							|  |  |  |         return new | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-03-19 05:04:44 +00:00
										 |  |  |     def report(self): | 
					
						
							|  |  |  |         """Dump the grammar tables to standard output, for debugging.""" | 
					
						
							|  |  |  |         from pprint import pprint | 
					
						
							| 
									
										
										
										
											2008-03-19 05:33:36 +00:00
										 |  |  |         print("s2n") | 
					
						
							| 
									
										
										
										
											2008-03-19 05:04:44 +00:00
										 |  |  |         pprint(self.symbol2number) | 
					
						
							| 
									
										
										
										
											2008-03-19 05:33:36 +00:00
										 |  |  |         print("n2s") | 
					
						
							| 
									
										
										
										
											2008-03-19 05:04:44 +00:00
										 |  |  |         pprint(self.number2symbol) | 
					
						
							| 
									
										
										
										
											2008-03-19 05:33:36 +00:00
										 |  |  |         print("states") | 
					
						
							| 
									
										
										
										
											2008-03-19 05:04:44 +00:00
										 |  |  |         pprint(self.states) | 
					
						
							| 
									
										
										
										
											2008-03-19 05:33:36 +00:00
										 |  |  |         print("dfas") | 
					
						
							| 
									
										
										
										
											2008-03-19 05:04:44 +00:00
										 |  |  |         pprint(self.dfas) | 
					
						
							| 
									
										
										
										
											2008-03-19 05:33:36 +00:00
										 |  |  |         print("labels") | 
					
						
							| 
									
										
										
										
											2008-03-19 05:04:44 +00:00
										 |  |  |         pprint(self.labels) | 
					
						
							| 
									
										
										
										
											2008-03-19 05:33:36 +00:00
										 |  |  |         print("start", self.start) | 
					
						
							| 
									
										
										
										
											2008-03-19 05:04:44 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Map from operator to number (since tokenize doesn't do this) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | opmap_raw = """
 | 
					
						
							|  |  |  | ( LPAR | 
					
						
							|  |  |  | ) RPAR | 
					
						
							|  |  |  | [ LSQB | 
					
						
							|  |  |  | ] RSQB | 
					
						
							|  |  |  | : COLON | 
					
						
							|  |  |  | , COMMA | 
					
						
							|  |  |  | ; SEMI | 
					
						
							|  |  |  | + PLUS | 
					
						
							|  |  |  | - MINUS | 
					
						
							|  |  |  | * STAR | 
					
						
							|  |  |  | / SLASH | 
					
						
							|  |  |  | | VBAR | 
					
						
							|  |  |  | & AMPER | 
					
						
							|  |  |  | < LESS | 
					
						
							|  |  |  | > GREATER | 
					
						
							|  |  |  | = EQUAL | 
					
						
							|  |  |  | . DOT | 
					
						
							|  |  |  | % PERCENT | 
					
						
							|  |  |  | ` BACKQUOTE | 
					
						
							|  |  |  | { LBRACE | 
					
						
							|  |  |  | } RBRACE | 
					
						
							|  |  |  | @ AT | 
					
						
							| 
									
										
										
										
											2014-04-10 00:12:47 -04:00
										 |  |  | @= ATEQUAL | 
					
						
							| 
									
										
										
										
											2008-03-19 05:04:44 +00:00
										 |  |  | == EQEQUAL | 
					
						
							|  |  |  | != NOTEQUAL | 
					
						
							|  |  |  | <> NOTEQUAL | 
					
						
							|  |  |  | <= LESSEQUAL | 
					
						
							|  |  |  | >= GREATEREQUAL | 
					
						
							|  |  |  | ~ TILDE | 
					
						
							|  |  |  | ^ CIRCUMFLEX | 
					
						
							|  |  |  | << LEFTSHIFT | 
					
						
							|  |  |  | >> RIGHTSHIFT | 
					
						
							|  |  |  | ** DOUBLESTAR | 
					
						
							|  |  |  | += PLUSEQUAL | 
					
						
							|  |  |  | -= MINEQUAL | 
					
						
							|  |  |  | *= STAREQUAL | 
					
						
							|  |  |  | /= SLASHEQUAL | 
					
						
							|  |  |  | %= PERCENTEQUAL | 
					
						
							|  |  |  | &= AMPEREQUAL | 
					
						
							|  |  |  | |= VBAREQUAL | 
					
						
							|  |  |  | ^= CIRCUMFLEXEQUAL | 
					
						
							|  |  |  | <<= LEFTSHIFTEQUAL | 
					
						
							|  |  |  | >>= RIGHTSHIFTEQUAL | 
					
						
							|  |  |  | **= DOUBLESTAREQUAL | 
					
						
							|  |  |  | // DOUBLESLASH | 
					
						
							|  |  |  | //= DOUBLESLASHEQUAL | 
					
						
							|  |  |  | -> RARROW | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | opmap = {} | 
					
						
							|  |  |  | for line in opmap_raw.splitlines(): | 
					
						
							|  |  |  |     if line: | 
					
						
							|  |  |  |         op, name = line.split() | 
					
						
							|  |  |  |         opmap[op] = getattr(token, name) |