gh-124889: Rework Python generator cache (#125816)

This commit is contained in:
Mikhail Efimov 2024-10-22 11:42:56 +03:00 committed by GitHub
parent 4efe64aa56
commit c1bdbe84c8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1,6 +1,6 @@
import os.path
import token
from typing import IO, Any, Dict, Optional, Sequence, Set, Text, Tuple
from typing import IO, Any, Callable, Dict, Optional, Sequence, Set, Text, Tuple
from pegen import grammar
from pegen.grammar import (
@ -93,7 +93,7 @@ def visit_Forced(self, node: Forced) -> bool:
class PythonCallMakerVisitor(GrammarVisitor):
def __init__(self, parser_generator: ParserGenerator):
self.gen = parser_generator
self.cache: Dict[Any, Any] = {}
self.cache: Dict[str, Tuple[str, str]] = {}
def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]:
name = node.value
@ -110,16 +110,6 @@ def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]:
def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]:
return "literal", f"self.expect({node.value})"
def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]:
if node in self.cache:
return self.cache[node]
if len(node.alts) == 1 and len(node.alts[0].items) == 1:
self.cache[node] = self.visit(node.alts[0].items[0])
else:
name = self.gen.artificial_rule_from_rhs(node)
self.cache[node] = name, f"self.{name}()"
return self.cache[node]
def visit_NamedItem(self, node: NamedItem) -> Tuple[Optional[str], str]:
name, call = self.visit(node.item)
if node.name:
@ -151,26 +141,57 @@ def visit_Opt(self, node: Opt) -> Tuple[str, str]:
else:
return "opt", f"{call},"
def _generate_artificial_rule_call(
self,
node: Any,
prefix: str,
call_by_name_func: Callable[[str], str],
rule_generation_func: Callable[[], str],
) -> Tuple[str, str]:
node_str = f"{node}"
key = f"{prefix}_{node_str}"
if key in self.cache:
return self.cache[key]
name = rule_generation_func()
call = call_by_name_func(name)
self.cache[key] = name, call
return self.cache[key]
def visit_Rhs(self, node: Rhs) -> Tuple[str, str]:
if len(node.alts) == 1 and len(node.alts[0].items) == 1:
return self.visit(node.alts[0].items[0])
return self._generate_artificial_rule_call(
node,
"rhs",
lambda name: f"self.{name}()",
lambda: self.gen.artificial_rule_from_rhs(node),
)
def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]:
if node in self.cache:
return self.cache[node]
name = self.gen.artificial_rule_from_repeat(node.node, False)
self.cache[node] = name, f"self.{name}()," # Also a trailing comma!
return self.cache[node]
return self._generate_artificial_rule_call(
node,
"repeat0",
lambda name: f"self.{name}(),", # Also a trailing comma!
lambda: self.gen.artificial_rule_from_repeat(node.node, is_repeat1=False),
)
def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]:
if node in self.cache:
return self.cache[node]
name = self.gen.artificial_rule_from_repeat(node.node, True)
self.cache[node] = name, f"self.{name}()" # But no trailing comma here!
return self.cache[node]
return self._generate_artificial_rule_call(
node,
"repeat1",
lambda name: f"self.{name}()", # But no trailing comma here!
lambda: self.gen.artificial_rule_from_repeat(node.node, is_repeat1=True),
)
def visit_Gather(self, node: Gather) -> Tuple[str, str]:
if node in self.cache:
return self.cache[node]
name = self.gen.artificial_rule_from_gather(node)
self.cache[node] = name, f"self.{name}()" # No trailing comma here either!
return self.cache[node]
return self._generate_artificial_rule_call(
node,
"gather",
lambda name: f"self.{name}()", # No trailing comma here either!
lambda: self.gen.artificial_rule_from_gather(node),
)
def visit_Group(self, node: Group) -> Tuple[Optional[str], str]:
return self.visit(node.rhs)