2026-01-13 13:21:59 +01:00
|
|
|
from typing import Any
|
|
|
|
|
|
2020-12-26 19:11:29 +00:00
|
|
|
from pegen import grammar
|
2021-09-05 14:58:52 +01:00
|
|
|
from pegen.grammar import Alt, GrammarVisitor, Rhs, Rule
|
2020-12-26 19:11:29 +00:00
|
|
|
|
2021-08-12 17:37:30 +01:00
|
|
|
|
2020-12-26 19:11:29 +00:00
|
|
|
class ValidationError(Exception):
|
|
|
|
|
pass
|
|
|
|
|
|
2021-08-12 17:37:30 +01:00
|
|
|
|
2020-12-26 19:11:29 +00:00
|
|
|
class GrammarValidator(GrammarVisitor):
|
2021-08-12 17:37:30 +01:00
|
|
|
def __init__(self, grammar: grammar.Grammar) -> None:
|
2020-12-26 19:11:29 +00:00
|
|
|
self.grammar = grammar
|
2025-09-03 20:45:29 +03:00
|
|
|
self.rulename: str | None = None
|
2020-12-26 19:11:29 +00:00
|
|
|
|
2021-08-12 17:37:30 +01:00
|
|
|
def validate_rule(self, rulename: str, node: Rule) -> None:
|
2020-12-26 19:11:29 +00:00
|
|
|
self.rulename = rulename
|
|
|
|
|
self.visit(node)
|
|
|
|
|
self.rulename = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class SubRuleValidator(GrammarValidator):
|
2021-08-12 17:37:30 +01:00
|
|
|
def visit_Rhs(self, node: Rhs) -> None:
|
2020-12-26 19:11:29 +00:00
|
|
|
for index, alt in enumerate(node.alts):
|
2021-08-12 17:37:30 +01:00
|
|
|
alts_to_consider = node.alts[index + 1 :]
|
2020-12-26 19:11:29 +00:00
|
|
|
for other_alt in alts_to_consider:
|
|
|
|
|
self.check_intersection(alt, other_alt)
|
|
|
|
|
|
2021-08-12 17:37:30 +01:00
|
|
|
def check_intersection(self, first_alt: Alt, second_alt: Alt) -> None:
|
2020-12-26 19:11:29 +00:00
|
|
|
if str(second_alt).startswith(str(first_alt)):
|
|
|
|
|
raise ValidationError(
|
2021-08-12 17:37:30 +01:00
|
|
|
f"In {self.rulename} there is an alternative that will "
|
|
|
|
|
f"never be visited:\n{second_alt}"
|
|
|
|
|
)
|
|
|
|
|
|
2020-12-26 19:11:29 +00:00
|
|
|
|
2024-05-30 09:27:32 +02:00
|
|
|
class RaiseRuleValidator(GrammarValidator):
|
|
|
|
|
def visit_Alt(self, node: Alt) -> None:
|
|
|
|
|
if self.rulename and self.rulename.startswith('invalid'):
|
|
|
|
|
# raising is allowed in invalid rules
|
|
|
|
|
return
|
|
|
|
|
if node.action and 'RAISE_SYNTAX_ERROR' in node.action:
|
|
|
|
|
raise ValidationError(
|
|
|
|
|
f"In {self.rulename!r} there is an alternative that contains "
|
|
|
|
|
f"RAISE_SYNTAX_ERROR; this is only allowed in invalid_ rules"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
2026-01-13 13:21:59 +01:00
|
|
|
class CutValidator(GrammarValidator):
|
|
|
|
|
"""Fail if Cut is not directly in a rule.
|
|
|
|
|
|
|
|
|
|
For simplicity, we currently document that a Cut affects alternatives
|
|
|
|
|
of the *rule* it is in.
|
|
|
|
|
However, the implementation makes cuts local to enclosing Rhs
|
|
|
|
|
(e.g. parenthesized list of choices).
|
|
|
|
|
Additionally, in academic papers about PEG, repeats and optional items
|
|
|
|
|
are "desugared" to choices with an empty alternative, and thus contain
|
|
|
|
|
a Cut's effect.
|
|
|
|
|
|
|
|
|
|
Please update documentation and tests when adding this cut,
|
|
|
|
|
then get rid of this validator.
|
|
|
|
|
|
|
|
|
|
See gh-143054.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def visit(self, node: Any, parents: tuple[Any, ...] = ()) -> None:
|
|
|
|
|
super().visit(node, parents=(*parents, node))
|
|
|
|
|
|
|
|
|
|
def visit_Cut(self, node: Alt, parents: tuple[Any, ...] = ()) -> None:
|
|
|
|
|
parent_types = [type(p).__name__ for p in parents]
|
|
|
|
|
if parent_types != ['Rule', 'Rhs', 'Alt', 'NamedItem', 'Cut']:
|
|
|
|
|
raise ValidationError(
|
|
|
|
|
f"Rule {self.rulename!r} contains cut that's not on the "
|
|
|
|
|
"top level. "
|
|
|
|
|
"The intended semantics of such cases need "
|
|
|
|
|
"to be clarified; see the CutValidator docstring."
|
|
|
|
|
f"\nThe cut is inside: {parent_types}"
|
|
|
|
|
)
|
|
|
|
|
|
2021-08-12 17:37:30 +01:00
|
|
|
def validate_grammar(the_grammar: grammar.Grammar) -> None:
|
2020-12-26 19:11:29 +00:00
|
|
|
for validator_cls in GrammarValidator.__subclasses__():
|
|
|
|
|
validator = validator_cls(the_grammar)
|
|
|
|
|
for rule_name, rule in the_grammar.rules.items():
|
|
|
|
|
validator.validate_rule(rule_name, rule)
|