mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 05:31:20 +00:00 
			
		
		
		
	 7178b1bb68
			
		
	
	
		7178b1bb68
		
			
		
	
	
	
	
		
			
			(cherry picked from commit d10fef35c6)
Co-authored-by: Pablo Galindo <Pablogsal@gmail.com>
		
	
			
		
			
				
	
	
		
			209 lines
		
	
	
	
		
			7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			209 lines
		
	
	
	
		
			7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import contextlib
 | |
| from abc import abstractmethod
 | |
| 
 | |
| from typing import AbstractSet, Dict, IO, Iterator, List, Optional, Set, Text, Tuple
 | |
| 
 | |
| from pegen import sccutils
 | |
| from pegen.grammar import (
 | |
|     Grammar,
 | |
|     Rule,
 | |
|     Rhs,
 | |
|     Alt,
 | |
|     NamedItem,
 | |
|     Plain,
 | |
|     NameLeaf,
 | |
|     Gather,
 | |
| )
 | |
| from pegen.grammar import GrammarError, GrammarVisitor
 | |
| 
 | |
| 
 | |
| class RuleCheckingVisitor(GrammarVisitor):
 | |
|     def __init__(self, rules: Dict[str, Rule], tokens: Dict[int, str]):
 | |
|         self.rules = rules
 | |
|         self.tokens = tokens
 | |
| 
 | |
|     def visit_NameLeaf(self, node: NameLeaf) -> None:
 | |
|         if node.value not in self.rules and node.value not in self.tokens.values():
 | |
|             # TODO: Add line/col info to (leaf) nodes
 | |
|             raise GrammarError(f"Dangling reference to rule {node.value!r}")
 | |
| 
 | |
|     def visit_NamedItem(self, node: NamedItem) -> None:
 | |
|         if node.name and node.name.startswith("_"):
 | |
|             raise GrammarError(f"Variable names cannot start with underscore: '{node.name}'")
 | |
|         self.visit(node.item)
 | |
| 
 | |
| 
 | |
| class ParserGenerator:
 | |
| 
 | |
|     callmakervisitor: GrammarVisitor
 | |
| 
 | |
|     def __init__(self, grammar: Grammar, tokens: Dict[int, str], file: Optional[IO[Text]]):
 | |
|         self.grammar = grammar
 | |
|         self.tokens = tokens
 | |
|         self.rules = grammar.rules
 | |
|         self.validate_rule_names()
 | |
|         if "trailer" not in grammar.metas and "start" not in self.rules:
 | |
|             raise GrammarError("Grammar without a trailer must have a 'start' rule")
 | |
|         checker = RuleCheckingVisitor(self.rules, self.tokens)
 | |
|         for rule in self.rules.values():
 | |
|             checker.visit(rule)
 | |
|         self.file = file
 | |
|         self.level = 0
 | |
|         compute_nullables(self.rules)
 | |
|         self.first_graph, self.first_sccs = compute_left_recursives(self.rules)
 | |
|         self.todo = self.rules.copy()  # Rules to generate
 | |
|         self.counter = 0  # For name_rule()/name_loop()
 | |
|         self.keyword_counter = 499  # For keyword_type()
 | |
|         self.all_rules: Dict[str, Rule] = {}  # Rules + temporal rules
 | |
|         self._local_variable_stack: List[List[str]] = []
 | |
| 
 | |
|     def validate_rule_names(self) -> None:
 | |
|         for rule in self.rules:
 | |
|             if rule.startswith("_"):
 | |
|                 raise GrammarError(f"Rule names cannot start with underscore: '{rule}'")
 | |
| 
 | |
|     @contextlib.contextmanager
 | |
|     def local_variable_context(self) -> Iterator[None]:
 | |
|         self._local_variable_stack.append([])
 | |
|         yield
 | |
|         self._local_variable_stack.pop()
 | |
| 
 | |
|     @property
 | |
|     def local_variable_names(self) -> List[str]:
 | |
|         return self._local_variable_stack[-1]
 | |
| 
 | |
|     @abstractmethod
 | |
|     def generate(self, filename: str) -> None:
 | |
|         raise NotImplementedError
 | |
| 
 | |
|     @contextlib.contextmanager
 | |
|     def indent(self) -> Iterator[None]:
 | |
|         self.level += 1
 | |
|         try:
 | |
|             yield
 | |
|         finally:
 | |
|             self.level -= 1
 | |
| 
 | |
|     def print(self, *args: object) -> None:
 | |
|         if not args:
 | |
|             print(file=self.file)
 | |
|         else:
 | |
|             print("    " * self.level, end="", file=self.file)
 | |
|             print(*args, file=self.file)
 | |
| 
 | |
|     def printblock(self, lines: str) -> None:
 | |
|         for line in lines.splitlines():
 | |
|             self.print(line)
 | |
| 
 | |
|     def collect_todo(self) -> None:
 | |
|         done: Set[str] = set()
 | |
|         while True:
 | |
|             alltodo = list(self.todo)
 | |
|             self.all_rules.update(self.todo)
 | |
|             todo = [i for i in alltodo if i not in done]
 | |
|             if not todo:
 | |
|                 break
 | |
|             for rulename in todo:
 | |
|                 self.todo[rulename].collect_todo(self)
 | |
|             done = set(alltodo)
 | |
| 
 | |
|     def keyword_type(self) -> int:
 | |
|         self.keyword_counter += 1
 | |
|         return self.keyword_counter
 | |
| 
 | |
|     def name_node(self, rhs: Rhs) -> str:
 | |
|         self.counter += 1
 | |
|         name = f"_tmp_{self.counter}"  # TODO: Pick a nicer name.
 | |
|         self.todo[name] = Rule(name, None, rhs)
 | |
|         return name
 | |
| 
 | |
|     def name_loop(self, node: Plain, is_repeat1: bool) -> str:
 | |
|         self.counter += 1
 | |
|         if is_repeat1:
 | |
|             prefix = "_loop1_"
 | |
|         else:
 | |
|             prefix = "_loop0_"
 | |
|         name = f"{prefix}{self.counter}"  # TODO: It's ugly to signal via the name.
 | |
|         self.todo[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])]))
 | |
|         return name
 | |
| 
 | |
|     def name_gather(self, node: Gather) -> str:
 | |
|         self.counter += 1
 | |
|         name = f"_gather_{self.counter}"
 | |
|         self.counter += 1
 | |
|         extra_function_name = f"_loop0_{self.counter}"
 | |
|         extra_function_alt = Alt(
 | |
|             [NamedItem(None, node.separator), NamedItem("elem", node.node)], action="elem",
 | |
|         )
 | |
|         self.todo[extra_function_name] = Rule(
 | |
|             extra_function_name, None, Rhs([extra_function_alt]),
 | |
|         )
 | |
|         alt = Alt([NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],)
 | |
|         self.todo[name] = Rule(name, None, Rhs([alt]),)
 | |
|         return name
 | |
| 
 | |
|     def dedupe(self, name: str) -> str:
 | |
|         origname = name
 | |
|         counter = 0
 | |
|         while name in self.local_variable_names:
 | |
|             counter += 1
 | |
|             name = f"{origname}_{counter}"
 | |
|         self.local_variable_names.append(name)
 | |
|         return name
 | |
| 
 | |
| 
 | |
| def compute_nullables(rules: Dict[str, Rule]) -> None:
 | |
|     """Compute which rules in a grammar are nullable.
 | |
| 
 | |
|     Thanks to TatSu (tatsu/leftrec.py) for inspiration.
 | |
|     """
 | |
|     for rule in rules.values():
 | |
|         rule.nullable_visit(rules)
 | |
| 
 | |
| 
 | |
| def compute_left_recursives(
 | |
|     rules: Dict[str, Rule]
 | |
| ) -> Tuple[Dict[str, AbstractSet[str]], List[AbstractSet[str]]]:
 | |
|     graph = make_first_graph(rules)
 | |
|     sccs = list(sccutils.strongly_connected_components(graph.keys(), graph))
 | |
|     for scc in sccs:
 | |
|         if len(scc) > 1:
 | |
|             for name in scc:
 | |
|                 rules[name].left_recursive = True
 | |
|             # Try to find a leader such that all cycles go through it.
 | |
|             leaders = set(scc)
 | |
|             for start in scc:
 | |
|                 for cycle in sccutils.find_cycles_in_scc(graph, scc, start):
 | |
|                     # print("Cycle:", " -> ".join(cycle))
 | |
|                     leaders -= scc - set(cycle)
 | |
|                     if not leaders:
 | |
|                         raise ValueError(
 | |
|                             f"SCC {scc} has no leadership candidate (no element is included in all cycles)"
 | |
|                         )
 | |
|             # print("Leaders:", leaders)
 | |
|             leader = min(leaders)  # Pick an arbitrary leader from the candidates.
 | |
|             rules[leader].leader = True
 | |
|         else:
 | |
|             name = min(scc)  # The only element.
 | |
|             if name in graph[name]:
 | |
|                 rules[name].left_recursive = True
 | |
|                 rules[name].leader = True
 | |
|     return graph, sccs
 | |
| 
 | |
| 
 | |
| def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]:
 | |
|     """Compute the graph of left-invocations.
 | |
| 
 | |
|     There's an edge from A to B if A may invoke B at its initial
 | |
|     position.
 | |
| 
 | |
|     Note that this requires the nullable flags to have been computed.
 | |
|     """
 | |
|     graph = {}
 | |
|     vertices: Set[str] = set()
 | |
|     for rulename, rhs in rules.items():
 | |
|         graph[rulename] = names = rhs.initial_names()
 | |
|         vertices |= names
 | |
|     for vertex in vertices:
 | |
|         graph.setdefault(vertex, set())
 | |
|     return graph
 |