| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | import contextlib | 
					
						
							|  |  |  | from abc import abstractmethod | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from typing import AbstractSet, Dict, IO, Iterator, List, Optional, Set, Text, Tuple | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from pegen import sccutils | 
					
						
							|  |  |  | from pegen.grammar import ( | 
					
						
							|  |  |  |     Grammar, | 
					
						
							|  |  |  |     Rule, | 
					
						
							|  |  |  |     Rhs, | 
					
						
							|  |  |  |     Alt, | 
					
						
							|  |  |  |     NamedItem, | 
					
						
							|  |  |  |     Plain, | 
					
						
							|  |  |  |     NameLeaf, | 
					
						
							|  |  |  |     Gather, | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | from pegen.grammar import GrammarError, GrammarVisitor | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class RuleCheckingVisitor(GrammarVisitor): | 
					
						
							| 
									
										
										
										
											2020-05-01 23:14:12 +01:00
										 |  |  |     def __init__(self, rules: Dict[str, Rule], tokens: Dict[int, str]): | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |         self.rules = rules | 
					
						
							| 
									
										
										
										
											2020-05-01 23:14:12 +01:00
										 |  |  |         self.tokens = tokens | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def visit_NameLeaf(self, node: NameLeaf) -> None: | 
					
						
							| 
									
										
										
										
											2020-05-01 23:14:12 +01:00
										 |  |  |         if node.value not in self.rules and node.value not in self.tokens.values(): | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |             # TODO: Add line/col info to (leaf) nodes | 
					
						
							|  |  |  |             raise GrammarError(f"Dangling reference to rule {node.value!r}") | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-05-21 22:09:43 +01:00
										 |  |  |     def visit_NamedItem(self, node: NamedItem) -> None: | 
					
						
							| 
									
										
										
										
											2020-05-10 05:34:50 +01:00
										 |  |  |         if node.name and node.name.startswith("_"): | 
					
						
							|  |  |  |             raise GrammarError(f"Variable names cannot start with underscore: '{node.name}'") | 
					
						
							|  |  |  |         self.visit(node.item) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | class ParserGenerator: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     callmakervisitor: GrammarVisitor | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-05-01 23:14:12 +01:00
										 |  |  |     def __init__(self, grammar: Grammar, tokens: Dict[int, str], file: Optional[IO[Text]]): | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |         self.grammar = grammar | 
					
						
							| 
									
										
										
										
											2020-05-01 23:14:12 +01:00
										 |  |  |         self.tokens = tokens | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |         self.rules = grammar.rules | 
					
						
							| 
									
										
										
										
											2020-05-10 05:34:50 +01:00
										 |  |  |         self.validate_rule_names() | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |         if "trailer" not in grammar.metas and "start" not in self.rules: | 
					
						
							|  |  |  |             raise GrammarError("Grammar without a trailer must have a 'start' rule") | 
					
						
							| 
									
										
										
										
											2020-05-01 23:14:12 +01:00
										 |  |  |         checker = RuleCheckingVisitor(self.rules, self.tokens) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |         for rule in self.rules.values(): | 
					
						
							|  |  |  |             checker.visit(rule) | 
					
						
							|  |  |  |         self.file = file | 
					
						
							|  |  |  |         self.level = 0 | 
					
						
							|  |  |  |         compute_nullables(self.rules) | 
					
						
							|  |  |  |         self.first_graph, self.first_sccs = compute_left_recursives(self.rules) | 
					
						
							|  |  |  |         self.todo = self.rules.copy()  # Rules to generate | 
					
						
							|  |  |  |         self.counter = 0  # For name_rule()/name_loop() | 
					
						
							|  |  |  |         self.keyword_counter = 499  # For keyword_type() | 
					
						
							| 
									
										
										
										
											2020-05-01 12:32:26 +01:00
										 |  |  |         self.all_rules: Dict[str, Rule] = {}  # Rules + temporal rules | 
					
						
							| 
									
										
										
										
											2020-04-29 10:42:21 +01:00
										 |  |  |         self._local_variable_stack: List[List[str]] = [] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-05-21 22:09:43 +01:00
										 |  |  |     def validate_rule_names(self) -> None: | 
					
						
							| 
									
										
										
										
											2020-05-10 05:34:50 +01:00
										 |  |  |         for rule in self.rules: | 
					
						
							|  |  |  |             if rule.startswith("_"): | 
					
						
							|  |  |  |                 raise GrammarError(f"Rule names cannot start with underscore: '{rule}'") | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-29 10:42:21 +01:00
										 |  |  |     @contextlib.contextmanager | 
					
						
							|  |  |  |     def local_variable_context(self) -> Iterator[None]: | 
					
						
							|  |  |  |         self._local_variable_stack.append([]) | 
					
						
							|  |  |  |         yield | 
					
						
							|  |  |  |         self._local_variable_stack.pop() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @property | 
					
						
							|  |  |  |     def local_variable_names(self) -> List[str]: | 
					
						
							|  |  |  |         return self._local_variable_stack[-1] | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     @abstractmethod | 
					
						
							|  |  |  |     def generate(self, filename: str) -> None: | 
					
						
							|  |  |  |         raise NotImplementedError | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @contextlib.contextmanager | 
					
						
							|  |  |  |     def indent(self) -> Iterator[None]: | 
					
						
							|  |  |  |         self.level += 1 | 
					
						
							|  |  |  |         try: | 
					
						
							|  |  |  |             yield | 
					
						
							|  |  |  |         finally: | 
					
						
							|  |  |  |             self.level -= 1 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def print(self, *args: object) -> None: | 
					
						
							|  |  |  |         if not args: | 
					
						
							|  |  |  |             print(file=self.file) | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             print("    " * self.level, end="", file=self.file) | 
					
						
							|  |  |  |             print(*args, file=self.file) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def printblock(self, lines: str) -> None: | 
					
						
							|  |  |  |         for line in lines.splitlines(): | 
					
						
							|  |  |  |             self.print(line) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def collect_todo(self) -> None: | 
					
						
							|  |  |  |         done: Set[str] = set() | 
					
						
							|  |  |  |         while True: | 
					
						
							|  |  |  |             alltodo = list(self.todo) | 
					
						
							| 
									
										
										
										
											2020-05-01 12:32:26 +01:00
										 |  |  |             self.all_rules.update(self.todo) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |             todo = [i for i in alltodo if i not in done] | 
					
						
							|  |  |  |             if not todo: | 
					
						
							|  |  |  |                 break | 
					
						
							|  |  |  |             for rulename in todo: | 
					
						
							|  |  |  |                 self.todo[rulename].collect_todo(self) | 
					
						
							|  |  |  |             done = set(alltodo) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def keyword_type(self) -> int: | 
					
						
							|  |  |  |         self.keyword_counter += 1 | 
					
						
							|  |  |  |         return self.keyword_counter | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def name_node(self, rhs: Rhs) -> str: | 
					
						
							|  |  |  |         self.counter += 1 | 
					
						
							|  |  |  |         name = f"_tmp_{self.counter}"  # TODO: Pick a nicer name. | 
					
						
							|  |  |  |         self.todo[name] = Rule(name, None, rhs) | 
					
						
							|  |  |  |         return name | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def name_loop(self, node: Plain, is_repeat1: bool) -> str: | 
					
						
							|  |  |  |         self.counter += 1 | 
					
						
							|  |  |  |         if is_repeat1: | 
					
						
							|  |  |  |             prefix = "_loop1_" | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             prefix = "_loop0_" | 
					
						
							|  |  |  |         name = f"{prefix}{self.counter}"  # TODO: It's ugly to signal via the name. | 
					
						
							|  |  |  |         self.todo[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])])) | 
					
						
							|  |  |  |         return name | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def name_gather(self, node: Gather) -> str: | 
					
						
							|  |  |  |         self.counter += 1 | 
					
						
							|  |  |  |         name = f"_gather_{self.counter}" | 
					
						
							|  |  |  |         self.counter += 1 | 
					
						
							|  |  |  |         extra_function_name = f"_loop0_{self.counter}" | 
					
						
							|  |  |  |         extra_function_alt = Alt( | 
					
						
							| 
									
										
										
										
											2020-04-29 10:42:21 +01:00
										 |  |  |             [NamedItem(None, node.separator), NamedItem("elem", node.node)], action="elem", | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |         ) | 
					
						
							|  |  |  |         self.todo[extra_function_name] = Rule( | 
					
						
							|  |  |  |             extra_function_name, None, Rhs([extra_function_alt]), | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2020-04-29 10:42:21 +01:00
										 |  |  |         alt = Alt([NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |         self.todo[name] = Rule(name, None, Rhs([alt]),) | 
					
						
							|  |  |  |         return name | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-29 10:42:21 +01:00
										 |  |  |     def dedupe(self, name: str) -> str: | 
					
						
							|  |  |  |         origname = name | 
					
						
							|  |  |  |         counter = 0 | 
					
						
							|  |  |  |         while name in self.local_variable_names: | 
					
						
							|  |  |  |             counter += 1 | 
					
						
							|  |  |  |             name = f"{origname}_{counter}" | 
					
						
							|  |  |  |         self.local_variable_names.append(name) | 
					
						
							|  |  |  |         return name | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def compute_nullables(rules: Dict[str, Rule]) -> None: | 
					
						
							|  |  |  |     """Compute which rules in a grammar are nullable.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Thanks to TatSu (tatsu/leftrec.py) for inspiration. | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     for rule in rules.values(): | 
					
						
							|  |  |  |         rule.nullable_visit(rules) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def compute_left_recursives( | 
					
						
							|  |  |  |     rules: Dict[str, Rule] | 
					
						
							|  |  |  | ) -> Tuple[Dict[str, AbstractSet[str]], List[AbstractSet[str]]]: | 
					
						
							|  |  |  |     graph = make_first_graph(rules) | 
					
						
							|  |  |  |     sccs = list(sccutils.strongly_connected_components(graph.keys(), graph)) | 
					
						
							|  |  |  |     for scc in sccs: | 
					
						
							|  |  |  |         if len(scc) > 1: | 
					
						
							|  |  |  |             for name in scc: | 
					
						
							|  |  |  |                 rules[name].left_recursive = True | 
					
						
							|  |  |  |             # Try to find a leader such that all cycles go through it. | 
					
						
							|  |  |  |             leaders = set(scc) | 
					
						
							|  |  |  |             for start in scc: | 
					
						
							|  |  |  |                 for cycle in sccutils.find_cycles_in_scc(graph, scc, start): | 
					
						
							| 
									
										
										
										
											2020-04-29 10:42:21 +01:00
										 |  |  |                     # print("Cycle:", " -> ".join(cycle)) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |                     leaders -= scc - set(cycle) | 
					
						
							|  |  |  |                     if not leaders: | 
					
						
							|  |  |  |                         raise ValueError( | 
					
						
							|  |  |  |                             f"SCC {scc} has no leadership candidate (no element is included in all cycles)" | 
					
						
							|  |  |  |                         ) | 
					
						
							| 
									
										
										
										
											2020-04-29 10:42:21 +01:00
										 |  |  |             # print("Leaders:", leaders) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |             leader = min(leaders)  # Pick an arbitrary leader from the candidates. | 
					
						
							|  |  |  |             rules[leader].leader = True | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             name = min(scc)  # The only element. | 
					
						
							|  |  |  |             if name in graph[name]: | 
					
						
							|  |  |  |                 rules[name].left_recursive = True | 
					
						
							|  |  |  |                 rules[name].leader = True | 
					
						
							|  |  |  |     return graph, sccs | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]: | 
					
						
							|  |  |  |     """Compute the graph of left-invocations.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     There's an edge from A to B if A may invoke B at its initial | 
					
						
							|  |  |  |     position. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Note that this requires the nullable flags to have been computed. | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     graph = {} | 
					
						
							|  |  |  |     vertices: Set[str] = set() | 
					
						
							|  |  |  |     for rulename, rhs in rules.items(): | 
					
						
							|  |  |  |         graph[rulename] = names = rhs.initial_names() | 
					
						
							|  |  |  |         vertices |= names | 
					
						
							|  |  |  |     for vertex in vertices: | 
					
						
							|  |  |  |         graph.setdefault(vertex, set()) | 
					
						
							|  |  |  |     return graph |