| 
									
										
										
										
											2019-03-01 15:34:44 -08:00
										 |  |  | import collections | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-03-04 07:26:13 +00:00
										 |  |  | class Grammar: | 
					
						
							|  |  |  |     """Pgen parsing tables class.
 | 
					
						
							| 
									
										
										
										
											2019-03-01 15:34:44 -08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     The instance variables are as follows: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     symbol2number -- a dict mapping symbol names to numbers.  Symbol | 
					
						
							|  |  |  |                      numbers are always 256 or higher, to distinguish | 
					
						
							|  |  |  |                      them from token numbers, which are between 0 and | 
					
						
							|  |  |  |                      255 (inclusive). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     number2symbol -- a dict mapping numbers to symbol names; | 
					
						
							|  |  |  |                      these two are each other's inverse. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     states        -- a list of DFAs, where each DFA is a list of | 
					
						
							|  |  |  |                      states, each state is a list of arcs, and each | 
					
						
							|  |  |  |                      arc is a (i, j) pair where i is a label and j is | 
					
						
							|  |  |  |                      a state number.  The DFA number is the index into | 
					
						
							|  |  |  |                      this list.  (This name is slightly confusing.) | 
					
						
							|  |  |  |                      Final states are represented by a special arc of | 
					
						
							|  |  |  |                      the form (0, j) where j is its own state number. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     dfas          -- a dict mapping symbol numbers to (DFA, first) | 
					
						
							|  |  |  |                      pairs, where DFA is an item from the states list | 
					
						
							|  |  |  |                      above, and first is a set of tokens that can | 
					
						
							| 
									
										
										
										
											2019-03-04 07:26:13 +00:00
										 |  |  |                      begin this grammar rule. | 
					
						
							| 
									
										
										
										
											2019-03-01 15:34:44 -08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     labels        -- a list of (x, y) pairs where x is either a token | 
					
						
							|  |  |  |                      number or a symbol number, and y is either None | 
					
						
							|  |  |  |                      or a string; the strings are keywords.  The label | 
					
						
							|  |  |  |                      number is the index in this list; label numbers | 
					
						
							|  |  |  |                      are used to mark state transitions (arcs) in the | 
					
						
							|  |  |  |                      DFAs. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     start         -- the number of the grammar's start symbol. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     keywords      -- a dict mapping keyword strings to arc labels. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     tokens        -- a dict mapping token numbers to arc labels. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							|  |  |  |         self.symbol2number = collections.OrderedDict() | 
					
						
							|  |  |  |         self.number2symbol = collections.OrderedDict() | 
					
						
							|  |  |  |         self.states = [] | 
					
						
							|  |  |  |         self.dfas = collections.OrderedDict() | 
					
						
							|  |  |  |         self.labels = [(0, "EMPTY")] | 
					
						
							|  |  |  |         self.keywords = collections.OrderedDict() | 
					
						
							|  |  |  |         self.tokens = collections.OrderedDict() | 
					
						
							|  |  |  |         self.symbol2label = collections.OrderedDict() | 
					
						
							|  |  |  |         self.start = 256 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def produce_graminit_h(self, writer): | 
					
						
							|  |  |  |         writer("/* Generated by Parser/pgen */\n\n") | 
					
						
							|  |  |  |         for number, symbol in self.number2symbol.items(): | 
					
						
							|  |  |  |             writer("#define {} {}\n".format(symbol, number)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def produce_graminit_c(self, writer): | 
					
						
							|  |  |  |         writer("/* Generated by Parser/pgen */\n\n") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         writer('#include "grammar.h"\n') | 
					
						
							|  |  |  |         writer("grammar _PyParser_Grammar;\n") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.print_dfas(writer) | 
					
						
							|  |  |  |         self.print_labels(writer) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         writer("grammar _PyParser_Grammar = {\n") | 
					
						
							|  |  |  |         writer("    {n_dfas},\n".format(n_dfas=len(self.dfas))) | 
					
						
							|  |  |  |         writer("    dfas,\n") | 
					
						
							|  |  |  |         writer("    {{{n_labels}, labels}},\n".format(n_labels=len(self.labels))) | 
					
						
							|  |  |  |         writer("    {start_number}\n".format(start_number=self.start)) | 
					
						
							|  |  |  |         writer("};\n") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def print_labels(self, writer): | 
					
						
							|  |  |  |         writer( | 
					
						
							| 
									
										
										
										
											2019-04-23 12:29:57 +03:00
										 |  |  |             "static const label labels[{n_labels}] = {{\n".format(n_labels=len(self.labels)) | 
					
						
							| 
									
										
										
										
											2019-03-01 15:34:44 -08:00
										 |  |  |         ) | 
					
						
							|  |  |  |         for label, name in self.labels: | 
					
						
							| 
									
										
										
										
											2019-03-04 07:26:13 +00:00
										 |  |  |             label_name = '"{}"'.format(name) if name is not None else 0 | 
					
						
							|  |  |  |             writer( | 
					
						
							|  |  |  |                 '    {{{label}, {label_name}}},\n'.format( | 
					
						
							|  |  |  |                     label=label, label_name=label_name | 
					
						
							| 
									
										
										
										
											2019-03-01 15:34:44 -08:00
										 |  |  |                 ) | 
					
						
							| 
									
										
										
										
											2019-03-04 07:26:13 +00:00
										 |  |  |             ) | 
					
						
							| 
									
										
										
										
											2019-03-01 15:34:44 -08:00
										 |  |  |         writer("};\n") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def print_dfas(self, writer): | 
					
						
							|  |  |  |         self.print_states(writer) | 
					
						
							| 
									
										
										
										
											2019-04-23 12:29:57 +03:00
										 |  |  |         writer("static const dfa dfas[{}] = {{\n".format(len(self.dfas))) | 
					
						
							| 
									
										
										
										
											2019-03-01 15:34:44 -08:00
										 |  |  |         for dfaindex, dfa_elem in enumerate(self.dfas.items()): | 
					
						
							|  |  |  |             symbol, (dfa, first_sets) = dfa_elem | 
					
						
							|  |  |  |             writer( | 
					
						
							|  |  |  |                 '    {{{dfa_symbol}, "{symbol_name}", '.format( | 
					
						
							|  |  |  |                     dfa_symbol=symbol, symbol_name=self.number2symbol[symbol] | 
					
						
							|  |  |  |                 ) | 
					
						
							| 
									
										
										
										
											2019-03-09 17:35:50 +02:00
										 |  |  |                 + "{n_states}, states_{dfa_index},\n".format( | 
					
						
							| 
									
										
										
										
											2019-03-01 15:34:44 -08:00
										 |  |  |                     n_states=len(dfa), dfa_index=dfaindex | 
					
						
							|  |  |  |                 ) | 
					
						
							| 
									
										
										
										
											2019-03-04 07:26:13 +00:00
										 |  |  |                 + '     "' | 
					
						
							| 
									
										
										
										
											2019-03-01 15:34:44 -08:00
										 |  |  |             ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             bitset = bytearray((len(self.labels) >> 3) + 1) | 
					
						
							|  |  |  |             for token in first_sets: | 
					
						
							|  |  |  |                 bitset[token >> 3] |= 1 << (token & 7) | 
					
						
							|  |  |  |             for byte in bitset: | 
					
						
							|  |  |  |                 writer("\\%03o" % (byte & 0xFF)) | 
					
						
							|  |  |  |             writer('"},\n') | 
					
						
							|  |  |  |         writer("};\n") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def print_states(self, write): | 
					
						
							|  |  |  |         for dfaindex, dfa in enumerate(self.states): | 
					
						
							|  |  |  |             self.print_arcs(write, dfaindex, dfa) | 
					
						
							|  |  |  |             write( | 
					
						
							|  |  |  |                 "static state states_{dfa_index}[{n_states}] = {{\n".format( | 
					
						
							|  |  |  |                     dfa_index=dfaindex, n_states=len(dfa) | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |             for stateindex, state in enumerate(dfa): | 
					
						
							|  |  |  |                 narcs = len(state) | 
					
						
							|  |  |  |                 write( | 
					
						
							|  |  |  |                     "    {{{n_arcs}, arcs_{dfa_index}_{state_index}}},\n".format( | 
					
						
							|  |  |  |                         n_arcs=narcs, dfa_index=dfaindex, state_index=stateindex | 
					
						
							|  |  |  |                     ) | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  |             write("};\n") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def print_arcs(self, write, dfaindex, states): | 
					
						
							|  |  |  |         for stateindex, state in enumerate(states): | 
					
						
							|  |  |  |             narcs = len(state) | 
					
						
							|  |  |  |             write( | 
					
						
							| 
									
										
										
										
											2019-04-23 12:29:57 +03:00
										 |  |  |                 "static const arc arcs_{dfa_index}_{state_index}[{n_arcs}] = {{\n".format( | 
					
						
							| 
									
										
										
										
											2019-03-01 15:34:44 -08:00
										 |  |  |                     dfa_index=dfaindex, state_index=stateindex, n_arcs=narcs | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |             for a, b in state: | 
					
						
							|  |  |  |                 write( | 
					
						
							|  |  |  |                     "    {{{from_label}, {to_state}}},\n".format( | 
					
						
							|  |  |  |                         from_label=a, to_state=b | 
					
						
							|  |  |  |                     ) | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  |             write("};\n") |