[3.14] gh-138281: Run ruff on Tools/peg_generator (GH-138282) (#138469)

Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
This commit is contained in:
sobolevn 2025-09-04 14:13:36 +03:00 committed by GitHub
parent 2583646288
commit 8e1c2fe3d3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 251 additions and 237 deletions

View file

@ -18,6 +18,10 @@ repos:
name: Run Ruff (lint) on Argument Clinic
args: [--exit-non-zero-on-fix, --config=Tools/clinic/.ruff.toml]
files: ^Tools/clinic/|Lib/test/test_clinic.py
- id: ruff
name: Run Ruff (lint) on Tools/peg_generator/
args: [--exit-non-zero-on-fix, --config=Tools/peg_generator/.ruff.toml]
files: ^Tools/peg_generator/
- id: ruff-format
name: Run Ruff (format) on Doc/
args: [--check]

View file

@ -0,0 +1,27 @@
extend = "../../.ruff.toml" # Inherit the project-wide settings
extend-exclude = [
# Generated files:
"Tools/peg_generator/pegen/grammar_parser.py",
]
[lint]
select = [
"F", # pyflakes
"I", # isort
"UP", # pyupgrade
"RUF100", # Ban unused `# noqa` comments
"PGH004", # Ban blanket `# noqa` comments (only ignore specific error codes)
]
ignore = [
# Use PEP-604 unions rather than tuples for isinstance() checks.
# Makes code slower and more verbose. https://github.com/astral-sh/ruff/issues/7871.
"UP038",
]
unfixable = [
# The autofixes sometimes do the wrong things for these;
# it's better to have to manually look at the code and see how it needs fixing
"F841", # Detects unused variables
"F601", # Detects dictionaries that have duplicate keys
"F602", # Also detects dictionaries that have duplicate keys
]

View file

@ -10,7 +10,6 @@
import time
import token
import traceback
from typing import Tuple
from pegen.grammar import Grammar
from pegen.parser import Parser
@ -21,7 +20,7 @@
def generate_c_code(
args: argparse.Namespace,
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
) -> tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
from pegen.build import build_c_parser_and_generator
verbose = args.verbose
@ -50,7 +49,7 @@ def generate_c_code(
def generate_python_code(
args: argparse.Namespace,
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
) -> tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
from pegen.build import build_python_parser_and_generator
verbose = args.verbose
@ -188,7 +187,7 @@ def main() -> None:
if __name__ == "__main__":
if sys.version_info < (3, 8):
if sys.version_info < (3, 8): # noqa: UP036
print("ERROR: using pegen requires at least Python 3.8!", file=sys.stderr)
sys.exit(1)
main()

View file

@ -6,7 +6,7 @@
TODO: Remove the above-described hack.
"""
from typing import Any, Optional, Tuple
from typing import Any
def ast_dump(
@ -14,9 +14,9 @@ def ast_dump(
annotate_fields: bool = True,
include_attributes: bool = False,
*,
indent: Optional[str] = None,
indent: str | None = None,
) -> str:
def _format(node: Any, level: int = 0) -> Tuple[str, bool]:
def _format(node: Any, level: int = 0) -> tuple[str, bool]:
if indent is not None:
level += 1
prefix = "\n" + indent * level
@ -41,7 +41,7 @@ def _format(node: Any, level: int = 0) -> Tuple[str, bool]:
value, simple = _format(value, level)
allsimple = allsimple and simple
if keywords:
args.append("%s=%s" % (name, value))
args.append(f"{name}={value}")
else:
args.append(value)
if include_attributes and node._attributes:
@ -54,16 +54,16 @@ def _format(node: Any, level: int = 0) -> Tuple[str, bool]:
continue
value, simple = _format(value, level)
allsimple = allsimple and simple
args.append("%s=%s" % (name, value))
args.append(f"{name}={value}")
if allsimple and len(args) <= 3:
return "%s(%s)" % (node.__class__.__name__, ", ".join(args)), not args
return "%s(%s%s)" % (node.__class__.__name__, prefix, sep.join(args)), False
return "{}({})".format(node.__class__.__name__, ", ".join(args)), not args
return f"{node.__class__.__name__}({prefix}{sep.join(args)})", False
elif isinstance(node, list):
if not node:
return "[]", True
return "[%s%s]" % (prefix, sep.join(_format(x, level)[0] for x in node)), False
return f"[{prefix}{sep.join(_format(x, level)[0] for x in node)}]", False
return repr(node), True
if all(cls.__name__ != "AST" for cls in node.__class__.__mro__):
raise TypeError("expected AST, got %r" % node.__class__.__name__)
raise TypeError(f"expected AST, got {node.__class__.__name__!r}")
return _format(node)[0]

View file

@ -6,7 +6,7 @@
import sysconfig
import tempfile
import tokenize
from typing import IO, Any, Dict, List, Optional, Set, Tuple
from typing import IO, Any
from pegen.c_generator import CParserGenerator
from pegen.grammar import Grammar
@ -18,11 +18,11 @@
MOD_DIR = pathlib.Path(__file__).resolve().parent
TokenDefinitions = Tuple[Dict[int, str], Dict[str, int], Set[str]]
TokenDefinitions = tuple[dict[int, str], dict[str, int], set[str]]
Incomplete = Any # TODO: install `types-setuptools` and remove this alias
def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> List[str]:
def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> list[str]:
flags = sysconfig.get_config_var(compiler_flags)
py_flags_nodist = sysconfig.get_config_var(compiler_py_flags_nodist)
if flags is None or py_flags_nodist is None:
@ -71,11 +71,11 @@ def fixup_build_ext(cmd: Incomplete) -> None:
def compile_c_extension(
generated_source_path: str,
build_dir: Optional[str] = None,
build_dir: str | None = None,
verbose: bool = False,
keep_asserts: bool = True,
disable_optimization: bool = False,
library_dir: Optional[str] = None,
library_dir: str | None = None,
) -> pathlib.Path:
"""Compile the generated source for a parser generator into an extension module.
@ -93,11 +93,10 @@ def compile_c_extension(
"""
import setuptools.command.build_ext
import setuptools.logging
from setuptools import Extension, Distribution
from setuptools.modified import newer_group
from setuptools import Distribution, Extension
from setuptools._distutils.ccompiler import new_compiler
from setuptools._distutils.sysconfig import customize_compiler
from setuptools.modified import newer_group
if verbose:
setuptools.logging.set_threshold(logging.DEBUG)
@ -241,7 +240,7 @@ def compile_c_extension(
def build_parser(
grammar_file: str, verbose_tokenizer: bool = False, verbose_parser: bool = False
) -> Tuple[Grammar, Parser, Tokenizer]:
) -> tuple[Grammar, Parser, Tokenizer]:
with open(grammar_file) as file:
tokenizer = Tokenizer(tokenize.generate_tokens(file.readline), verbose=verbose_tokenizer)
parser = GrammarParser(tokenizer, verbose=verbose_parser)
@ -292,7 +291,7 @@ def build_c_generator(
keep_asserts_in_extension: bool = True,
skip_actions: bool = False,
) -> ParserGenerator:
with open(tokens_file, "r") as tok_file:
with open(tokens_file) as tok_file:
all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)
with open(output_file, "w") as file:
gen: ParserGenerator = CParserGenerator(
@ -333,7 +332,7 @@ def build_c_parser_and_generator(
verbose_c_extension: bool = False,
keep_asserts_in_extension: bool = True,
skip_actions: bool = False,
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
) -> tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
"""Generate rules, C parser, tokenizer, parser generator for a given grammar
Args:
@ -373,7 +372,7 @@ def build_python_parser_and_generator(
verbose_tokenizer: bool = False,
verbose_parser: bool = False,
skip_actions: bool = False,
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
) -> tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
"""Generate rules, python parser, tokenizer, parser generator for a given grammar
Args:

View file

@ -1,9 +1,10 @@
import ast
import os.path
import re
from collections.abc import Callable
from dataclasses import dataclass, field
from enum import Enum
from typing import IO, Any, Callable, Dict, List, Optional, Set, Text, Tuple
from typing import IO, Any
from pegen import grammar
from pegen.grammar import (
@ -86,13 +87,13 @@ class NodeTypes(Enum):
@dataclass
class FunctionCall:
function: str
arguments: List[Any] = field(default_factory=list)
assigned_variable: Optional[str] = None
assigned_variable_type: Optional[str] = None
return_type: Optional[str] = None
nodetype: Optional[NodeTypes] = None
arguments: list[Any] = field(default_factory=list)
assigned_variable: str | None = None
assigned_variable_type: str | None = None
return_type: str | None = None
nodetype: NodeTypes | None = None
force_true: bool = False
comment: Optional[str] = None
comment: str | None = None
def __str__(self) -> str:
parts = []
@ -124,14 +125,14 @@ class CCallMakerVisitor(GrammarVisitor):
def __init__(
self,
parser_generator: ParserGenerator,
exact_tokens: Dict[str, int],
non_exact_tokens: Set[str],
exact_tokens: dict[str, int],
non_exact_tokens: set[str],
):
self.gen = parser_generator
self.exact_tokens = exact_tokens
self.non_exact_tokens = non_exact_tokens
self.cache: Dict[str, str] = {}
self.cleanup_statements: List[str] = []
self.cache: dict[str, str] = {}
self.cleanup_statements: list[str] = []
def keyword_helper(self, keyword: str) -> FunctionCall:
return FunctionCall(
@ -167,7 +168,7 @@ def visit_NameLeaf(self, node: NameLeaf) -> FunctionCall:
)
return FunctionCall(
assigned_variable=f"{name.lower()}_var",
function=f"_PyPegen_expect_token",
function="_PyPegen_expect_token",
arguments=["p", name],
nodetype=NodeTypes.GENERIC_TOKEN,
return_type="Token *",
@ -199,7 +200,7 @@ def visit_StringLeaf(self, node: StringLeaf) -> FunctionCall:
type = self.exact_tokens[val]
return FunctionCall(
assigned_variable="_literal",
function=f"_PyPegen_expect_token",
function="_PyPegen_expect_token",
arguments=["p", type],
nodetype=NodeTypes.GENERIC_TOKEN,
return_type="Token *",
@ -271,7 +272,7 @@ def visit_Forced(self, node: Forced) -> FunctionCall:
type = self.exact_tokens[val]
return FunctionCall(
assigned_variable="_literal",
function=f"_PyPegen_expect_forced_token",
function="_PyPegen_expect_forced_token",
arguments=["p", type, f'"{val}"'],
nodetype=NodeTypes.GENERIC_TOKEN,
return_type="Token *",
@ -283,7 +284,7 @@ def visit_Forced(self, node: Forced) -> FunctionCall:
call.comment = None
return FunctionCall(
assigned_variable="_literal",
function=f"_PyPegen_expect_forced_result",
function="_PyPegen_expect_forced_result",
arguments=["p", str(call), f'"{node.node.rhs!s}"'],
return_type="void *",
comment=f"forced_token=({node.node.rhs!s})",
@ -306,7 +307,7 @@ def _generate_artificial_rule_call(
node: Any,
prefix: str,
rule_generation_func: Callable[[], str],
return_type: Optional[str] = None,
return_type: str | None = None,
) -> FunctionCall:
node_str = f"{node}"
key = f"{prefix}_{node_str}"
@ -377,10 +378,10 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
def __init__(
self,
grammar: grammar.Grammar,
tokens: Dict[int, str],
exact_tokens: Dict[str, int],
non_exact_tokens: Set[str],
file: Optional[IO[Text]],
tokens: dict[int, str],
exact_tokens: dict[str, int],
non_exact_tokens: set[str],
file: IO[str] | None,
debug: bool = False,
skip_actions: bool = False,
):
@ -391,7 +392,7 @@ def __init__(
self._varname_counter = 0
self.debug = debug
self.skip_actions = skip_actions
self.cleanup_statements: List[str] = []
self.cleanup_statements: list[str] = []
def add_level(self) -> None:
self.print("if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) {")
@ -427,12 +428,12 @@ def call_with_errorcheck_goto(self, call_text: str, goto_target: str) -> None:
self.print(f"if ({error_var}) {{")
with self.indent():
self.print(f"goto {goto_target};")
self.print(f"}}")
self.print("}")
def out_of_memory_return(
self,
expr: str,
cleanup_code: Optional[str] = None,
cleanup_code: str | None = None,
) -> None:
self.print(f"if ({expr}) {{")
with self.indent():
@ -441,14 +442,14 @@ def out_of_memory_return(
self.print("p->error_indicator = 1;")
self.print("PyErr_NoMemory();")
self.add_return("NULL")
self.print(f"}}")
self.print("}")
def out_of_memory_goto(self, expr: str, goto_target: str) -> None:
self.print(f"if ({expr}) {{")
with self.indent():
self.print("PyErr_NoMemory();")
self.print(f"goto {goto_target};")
self.print(f"}}")
self.print("}")
def generate(self, filename: str) -> None:
self.collect_rules()
@ -491,8 +492,8 @@ def generate(self, filename: str) -> None:
if trailer:
self.print(trailer.rstrip("\n") % dict(mode=mode, modulename=modulename))
def _group_keywords_by_length(self) -> Dict[int, List[Tuple[str, int]]]:
groups: Dict[int, List[Tuple[str, int]]] = {}
def _group_keywords_by_length(self) -> dict[int, list[tuple[str, int]]]:
groups: dict[int, list[tuple[str, int]]] = {}
for keyword_str, keyword_type in self.keywords.items():
length = len(keyword_str)
if length in groups:
@ -584,10 +585,10 @@ def _set_up_rule_memoization(self, node: Rule, result_type: str) -> None:
self.print("if (_raw == NULL || p->mark <= _resmark)")
with self.indent():
self.print("break;")
self.print(f"_resmark = p->mark;")
self.print("_resmark = p->mark;")
self.print("_res = _raw;")
self.print("}")
self.print(f"p->mark = _resmark;")
self.print("p->mark = _resmark;")
self.add_return("_res")
self.print("}")
self.print(f"static {result_type}")
@ -643,7 +644,7 @@ def _handle_loop_rule_body(self, node: Rule, rhs: Rhs) -> None:
if memoize:
self.print("int _start_mark = p->mark;")
self.print("void **_children = PyMem_Malloc(sizeof(void *));")
self.out_of_memory_return(f"!_children")
self.out_of_memory_return("!_children")
self.print("Py_ssize_t _children_capacity = 1;")
self.print("Py_ssize_t _n = 0;")
if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts):
@ -661,7 +662,7 @@ def _handle_loop_rule_body(self, node: Rule, rhs: Rhs) -> None:
self.add_return("NULL")
self.print("}")
self.print("asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena);")
self.out_of_memory_return(f"!_seq", cleanup_code="PyMem_Free(_children);")
self.out_of_memory_return("!_seq", cleanup_code="PyMem_Free(_children);")
self.print("for (Py_ssize_t i = 0; i < _n; i++) asdl_seq_SET_UNTYPED(_seq, i, _children[i]);")
self.print("PyMem_Free(_children);")
if memoize and node.name:
@ -715,7 +716,7 @@ def visit_NamedItem(self, node: NamedItem) -> None:
self.print(call)
def visit_Rhs(
self, node: Rhs, is_loop: bool, is_gather: bool, rulename: Optional[str]
self, node: Rhs, is_loop: bool, is_gather: bool, rulename: str | None
) -> None:
if is_loop:
assert len(node.alts) == 1
@ -734,7 +735,7 @@ def join_conditions(self, keyword: str, node: Any) -> None:
self.visit(item)
self.print(")")
def emit_action(self, node: Alt, cleanup_code: Optional[str] = None) -> None:
def emit_action(self, node: Alt, cleanup_code: str | None = None) -> None:
self.print(f"_res = {node.action};")
self.print("if (_res == NULL && PyErr_Occurred()) {")
@ -776,7 +777,7 @@ def emit_default_action(self, is_gather: bool, node: Alt) -> None:
def emit_dummy_action(self) -> None:
self.print("_res = _PyPegen_dummy_name(p);")
def handle_alt_normal(self, node: Alt, is_gather: bool, rulename: Optional[str]) -> None:
def handle_alt_normal(self, node: Alt, is_gather: bool, rulename: str | None) -> None:
self.join_conditions(keyword="if", node=node)
self.print("{")
# We have parsed successfully all the conditions for the option.
@ -796,10 +797,10 @@ def handle_alt_normal(self, node: Alt, is_gather: bool, rulename: Optional[str])
self.emit_default_action(is_gather, node)
# As the current option has parsed correctly, do not continue with the rest.
self.print(f"goto done;")
self.print("goto done;")
self.print("}")
def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: Optional[str]) -> None:
def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: str | None) -> None:
# Condition of the main body of the alternative
self.join_conditions(keyword="while", node=node)
self.print("{")
@ -823,7 +824,7 @@ def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: Optional[str]) -
self.print(
"void **_new_children = PyMem_Realloc(_children, _children_capacity*sizeof(void *));"
)
self.out_of_memory_return(f"!_new_children", cleanup_code="PyMem_Free(_children);")
self.out_of_memory_return("!_new_children", cleanup_code="PyMem_Free(_children);")
self.print("_children = _new_children;")
self.print("}")
self.print("_children[_n++] = _res;")
@ -831,7 +832,7 @@ def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: Optional[str]) -
self.print("}")
def visit_Alt(
self, node: Alt, is_loop: bool, is_gather: bool, rulename: Optional[str]
self, node: Alt, is_loop: bool, is_gather: bool, rulename: str | None
) -> None:
if len(node.items) == 1 and str(node.items[0]).startswith("invalid_"):
self.print(f"if (p->call_invalid_rules) {{ // {node}")
@ -875,7 +876,7 @@ def visit_Alt(
self.print("}")
self.print("}")
def collect_vars(self, node: Alt) -> Dict[Optional[str], Optional[str]]:
def collect_vars(self, node: Alt) -> dict[str | None, str | None]:
types = {}
with self.local_variable_context():
for item in node.items:
@ -883,7 +884,7 @@ def collect_vars(self, node: Alt) -> Dict[Optional[str], Optional[str]]:
types[name] = type
return types
def add_var(self, node: NamedItem) -> Tuple[Optional[str], Optional[str]]:
def add_var(self, node: NamedItem) -> tuple[str | None, str | None]:
call = self.callmakervisitor.generate_call(node.item)
name = node.name if node.name else call.assigned_variable
if name is not None:

View file

@ -3,7 +3,6 @@
import argparse
import pprint
import sys
from typing import Dict, Set
from pegen.build import build_parser
from pegen.grammar import (
@ -33,20 +32,20 @@
class FirstSetCalculator(GrammarVisitor):
def __init__(self, rules: Dict[str, Rule]) -> None:
def __init__(self, rules: dict[str, Rule]) -> None:
self.rules = rules
self.nullables = compute_nullables(rules)
self.first_sets: Dict[str, Set[str]] = dict()
self.in_process: Set[str] = set()
self.first_sets: dict[str, set[str]] = dict()
self.in_process: set[str] = set()
def calculate(self) -> Dict[str, Set[str]]:
def calculate(self) -> dict[str, set[str]]:
for name, rule in self.rules.items():
self.visit(rule)
return self.first_sets
def visit_Alt(self, item: Alt) -> Set[str]:
result: Set[str] = set()
to_remove: Set[str] = set()
def visit_Alt(self, item: Alt) -> set[str]:
result: set[str] = set()
to_remove: set[str] = set()
for other in item.items:
new_terminals = self.visit(other)
if isinstance(other.item, NegativeLookahead):
@ -71,34 +70,34 @@ def visit_Alt(self, item: Alt) -> Set[str]:
return result
def visit_Cut(self, item: Cut) -> Set[str]:
def visit_Cut(self, item: Cut) -> set[str]:
return set()
def visit_Group(self, item: Group) -> Set[str]:
def visit_Group(self, item: Group) -> set[str]:
return self.visit(item.rhs)
def visit_PositiveLookahead(self, item: Lookahead) -> Set[str]:
def visit_PositiveLookahead(self, item: Lookahead) -> set[str]:
return self.visit(item.node)
def visit_NegativeLookahead(self, item: NegativeLookahead) -> Set[str]:
def visit_NegativeLookahead(self, item: NegativeLookahead) -> set[str]:
return self.visit(item.node)
def visit_NamedItem(self, item: NamedItem) -> Set[str]:
def visit_NamedItem(self, item: NamedItem) -> set[str]:
return self.visit(item.item)
def visit_Opt(self, item: Opt) -> Set[str]:
def visit_Opt(self, item: Opt) -> set[str]:
return self.visit(item.node)
def visit_Gather(self, item: Gather) -> Set[str]:
def visit_Gather(self, item: Gather) -> set[str]:
return self.visit(item.node)
def visit_Repeat0(self, item: Repeat0) -> Set[str]:
def visit_Repeat0(self, item: Repeat0) -> set[str]:
return self.visit(item.node)
def visit_Repeat1(self, item: Repeat1) -> Set[str]:
def visit_Repeat1(self, item: Repeat1) -> set[str]:
return self.visit(item.node)
def visit_NameLeaf(self, item: NameLeaf) -> Set[str]:
def visit_NameLeaf(self, item: NameLeaf) -> set[str]:
if item.value not in self.rules:
return {item.value}
@ -110,16 +109,16 @@ def visit_NameLeaf(self, item: NameLeaf) -> Set[str]:
return self.first_sets[item.value]
def visit_StringLeaf(self, item: StringLeaf) -> Set[str]:
def visit_StringLeaf(self, item: StringLeaf) -> set[str]:
return {item.value}
def visit_Rhs(self, item: Rhs) -> Set[str]:
result: Set[str] = set()
def visit_Rhs(self, item: Rhs) -> set[str]:
result: set[str] = set()
for alt in item.alts:
result |= self.visit(alt)
return result
def visit_Rule(self, item: Rule) -> Set[str]:
def visit_Rule(self, item: Rule) -> set[str]:
if item.name in self.in_process:
return set()
elif item.name not in self.first_sets:
@ -138,7 +137,7 @@ def main() -> None:
try:
grammar, parser, tokenizer = build_parser(args.grammar_file)
except Exception as err:
print("ERROR: Failed to parse grammar file", file=sys.stderr)
print("ERROR: Failed to parse grammar file", err, file=sys.stderr)
sys.exit(1)
firs_sets = FirstSetCalculator(grammar.rules).calculate()

View file

@ -1,15 +1,7 @@
from __future__ import annotations
from typing import (
AbstractSet,
Any,
Iterable,
Iterator,
List,
Optional,
Tuple,
Union,
)
from collections.abc import Iterable, Iterator, Set
from typing import Any
class GrammarError(Exception):
@ -34,7 +26,7 @@ def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Any:
class Grammar:
def __init__(self, rules: Iterable[Rule], metas: Iterable[Tuple[str, Optional[str]]]):
def __init__(self, rules: Iterable[Rule], metas: Iterable[tuple[str, str | None]]):
# Check if there are repeated rules in "rules"
all_rules = {}
for rule in rules:
@ -66,7 +58,7 @@ def __iter__(self) -> Iterator[Rule]:
class Rule:
def __init__(self, name: str, type: Optional[str], rhs: Rhs, memo: Optional[object] = None):
def __init__(self, name: str, type: str | None, rhs: Rhs, memo: object | None = None):
self.name = name
self.type = type
self.rhs = rhs
@ -141,9 +133,9 @@ def __repr__(self) -> str:
class Rhs:
def __init__(self, alts: List[Alt]):
def __init__(self, alts: list[Alt]):
self.alts = alts
self.memo: Optional[Tuple[Optional[str], str]] = None
self.memo: tuple[str | None, str] | None = None
def __str__(self) -> str:
return " | ".join(str(alt) for alt in self.alts)
@ -151,7 +143,7 @@ def __str__(self) -> str:
def __repr__(self) -> str:
return f"Rhs({self.alts!r})"
def __iter__(self) -> Iterator[List[Alt]]:
def __iter__(self) -> Iterator[list[Alt]]:
yield self.alts
@property
@ -165,7 +157,7 @@ def can_be_inlined(self) -> bool:
class Alt:
def __init__(self, items: List[NamedItem], *, icut: int = -1, action: Optional[str] = None):
def __init__(self, items: list[NamedItem], *, icut: int = -1, action: str | None = None):
self.items = items
self.icut = icut
self.action = action
@ -185,12 +177,12 @@ def __repr__(self) -> str:
args.append(f"action={self.action!r}")
return f"Alt({', '.join(args)})"
def __iter__(self) -> Iterator[List[NamedItem]]:
def __iter__(self) -> Iterator[list[NamedItem]]:
yield self.items
class NamedItem:
def __init__(self, name: Optional[str], item: Item, type: Optional[str] = None):
def __init__(self, name: str | None, item: Item, type: str | None = None):
self.name = name
self.item = item
self.type = type
@ -271,7 +263,7 @@ class Repeat:
def __init__(self, node: Plain):
self.node = node
self.memo: Optional[Tuple[Optional[str], str]] = None
self.memo: tuple[str | None, str] | None = None
def __iter__(self) -> Iterator[Plain]:
yield self.node
@ -334,12 +326,12 @@ def __init__(self) -> None:
pass
def __repr__(self) -> str:
return f"Cut()"
return "Cut()"
def __str__(self) -> str:
return f"~"
return "~"
def __iter__(self) -> Iterator[Tuple[str, str]]:
def __iter__(self) -> Iterator[tuple[str, str]]:
yield from ()
def __eq__(self, other: object) -> bool:
@ -347,15 +339,15 @@ def __eq__(self, other: object) -> bool:
return NotImplemented
return True
def initial_names(self) -> AbstractSet[str]:
def initial_names(self) -> Set[str]:
return set()
Plain = Union[Leaf, Group]
Item = Union[Plain, Opt, Repeat, Forced, Lookahead, Rhs, Cut]
RuleName = Tuple[str, Optional[str]]
MetaTuple = Tuple[str, Optional[str]]
MetaList = List[MetaTuple]
RuleList = List[Rule]
NamedItemList = List[NamedItem]
LookaheadOrCut = Union[Lookahead, Cut]
Plain = Leaf | Group
Item = Plain | Opt | Repeat | Forced | Lookahead | Rhs | Cut
RuleName = tuple[str, str | None]
MetaTuple = tuple[str, str | None]
MetaList = list[MetaTuple]
RuleList = list[Rule]
NamedItemList = list[NamedItem]
LookaheadOrCut = Lookahead | Cut

View file

@ -1,6 +1,7 @@
import argparse
import sys
from typing import Any, Callable, Iterator
from collections.abc import Callable, Iterator
from typing import Any
from pegen.build import build_parser
from pegen.grammar import Grammar, Rule
@ -52,7 +53,7 @@ def main() -> None:
try:
grammar, parser, tokenizer = build_parser(args.filename)
except Exception as err:
print("ERROR: Failed to parse grammar file", file=sys.stderr)
print("ERROR: Failed to parse grammar file", err, file=sys.stderr)
sys.exit(1)
visitor = ASTGrammarPrinter()

View file

@ -5,7 +5,8 @@
import tokenize
import traceback
from abc import abstractmethod
from typing import Any, Callable, ClassVar, Dict, Optional, Tuple, Type, TypeVar, cast
from collections.abc import Callable
from typing import Any, ClassVar, TypeVar, cast
from pegen.tokenizer import Mark, Tokenizer, exact_token_types
@ -74,12 +75,12 @@ def memoize_wrapper(self: "Parser", *args: object) -> Any:
def memoize_left_rec(
method: Callable[["Parser"], Optional[T]]
) -> Callable[["Parser"], Optional[T]]:
method: Callable[["Parser"], T | None]
) -> Callable[["Parser"], T | None]:
"""Memoize a left-recursive symbol method."""
method_name = method.__name__
def memoize_left_rec_wrapper(self: "Parser") -> Optional[T]:
def memoize_left_rec_wrapper(self: "Parser") -> T | None:
mark = self._mark()
key = mark, method_name, ()
# Fast path: cache hit, and not verbose.
@ -160,15 +161,15 @@ def memoize_left_rec_wrapper(self: "Parser") -> Optional[T]:
class Parser:
"""Parsing base class."""
KEYWORDS: ClassVar[Tuple[str, ...]]
KEYWORDS: ClassVar[tuple[str, ...]]
SOFT_KEYWORDS: ClassVar[Tuple[str, ...]]
SOFT_KEYWORDS: ClassVar[tuple[str, ...]]
def __init__(self, tokenizer: Tokenizer, *, verbose: bool = False):
self._tokenizer = tokenizer
self._verbose = verbose
self._level = 0
self._cache: Dict[Tuple[Mark, str, Tuple[Any, ...]], Tuple[Any, Mark]] = {}
self._cache: dict[tuple[Mark, str, tuple[Any, ...]], tuple[Any, Mark]] = {}
# Integer tracking whether we are in a left recursive rule or not. Can be useful
# for error reporting.
self.in_recursive_rule = 0
@ -185,28 +186,28 @@ def showpeek(self) -> str:
return f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}"
@memoize
def name(self) -> Optional[tokenize.TokenInfo]:
def name(self) -> tokenize.TokenInfo | None:
tok = self._tokenizer.peek()
if tok.type == token.NAME and tok.string not in self.KEYWORDS:
return self._tokenizer.getnext()
return None
@memoize
def number(self) -> Optional[tokenize.TokenInfo]:
def number(self) -> tokenize.TokenInfo | None:
tok = self._tokenizer.peek()
if tok.type == token.NUMBER:
return self._tokenizer.getnext()
return None
@memoize
def string(self) -> Optional[tokenize.TokenInfo]:
def string(self) -> tokenize.TokenInfo | None:
tok = self._tokenizer.peek()
if tok.type == token.STRING:
return self._tokenizer.getnext()
return None
@memoize
def fstring_start(self) -> Optional[tokenize.TokenInfo]:
def fstring_start(self) -> tokenize.TokenInfo | None:
FSTRING_START = getattr(token, "FSTRING_START", None)
if not FSTRING_START:
return None
@ -216,7 +217,7 @@ def fstring_start(self) -> Optional[tokenize.TokenInfo]:
return None
@memoize
def fstring_middle(self) -> Optional[tokenize.TokenInfo]:
def fstring_middle(self) -> tokenize.TokenInfo | None:
FSTRING_MIDDLE = getattr(token, "FSTRING_MIDDLE", None)
if not FSTRING_MIDDLE:
return None
@ -226,7 +227,7 @@ def fstring_middle(self) -> Optional[tokenize.TokenInfo]:
return None
@memoize
def fstring_end(self) -> Optional[tokenize.TokenInfo]:
def fstring_end(self) -> tokenize.TokenInfo | None:
FSTRING_END = getattr(token, "FSTRING_END", None)
if not FSTRING_END:
return None
@ -236,28 +237,28 @@ def fstring_end(self) -> Optional[tokenize.TokenInfo]:
return None
@memoize
def op(self) -> Optional[tokenize.TokenInfo]:
def op(self) -> tokenize.TokenInfo | None:
tok = self._tokenizer.peek()
if tok.type == token.OP:
return self._tokenizer.getnext()
return None
@memoize
def type_comment(self) -> Optional[tokenize.TokenInfo]:
def type_comment(self) -> tokenize.TokenInfo | None:
tok = self._tokenizer.peek()
if tok.type == token.TYPE_COMMENT:
return self._tokenizer.getnext()
return None
@memoize
def soft_keyword(self) -> Optional[tokenize.TokenInfo]:
def soft_keyword(self) -> tokenize.TokenInfo | None:
tok = self._tokenizer.peek()
if tok.type == token.NAME and tok.string in self.SOFT_KEYWORDS:
return self._tokenizer.getnext()
return None
@memoize
def expect(self, type: str) -> Optional[tokenize.TokenInfo]:
def expect(self, type: str) -> tokenize.TokenInfo | None:
tok = self._tokenizer.peek()
if tok.string == type:
return self._tokenizer.getnext()
@ -271,7 +272,7 @@ def expect(self, type: str) -> Optional[tokenize.TokenInfo]:
return self._tokenizer.getnext()
return None
def expect_forced(self, res: Any, expectation: str) -> Optional[tokenize.TokenInfo]:
def expect_forced(self, res: Any, expectation: str) -> tokenize.TokenInfo | None:
if res is None:
raise self.make_syntax_error(f"expected {expectation}")
return res
@ -293,7 +294,7 @@ def make_syntax_error(self, message: str, filename: str = "<unknown>") -> Syntax
return SyntaxError(message, (filename, tok.start[0], 1 + tok.start[1], tok.line))
def simple_parser_main(parser_class: Type[Parser]) -> None:
def simple_parser_main(parser_class: type[Parser]) -> None:
argparser = argparse.ArgumentParser()
argparser.add_argument(
"-v",
@ -330,7 +331,7 @@ def simple_parser_main(parser_class: Type[Parser]) -> None:
endpos = 0
else:
endpos = file.tell()
except IOError:
except OSError:
endpos = 0
finally:
if file is not sys.stdin:

View file

@ -1,22 +1,10 @@
import sys
import ast
import contextlib
import re
import sys
from abc import abstractmethod
from typing import (
IO,
AbstractSet,
Any,
Dict,
Iterable,
Iterator,
List,
Optional,
Set,
Text,
Tuple,
Union,
)
from collections.abc import Iterable, Iterator, Set
from typing import IO, Any
from pegen import sccutils
from pegen.grammar import (
@ -44,7 +32,7 @@
class RuleCollectorVisitor(GrammarVisitor):
"""Visitor that invokes a provided callmaker visitor with just the NamedItem nodes"""
def __init__(self, rules: Dict[str, Rule], callmakervisitor: GrammarVisitor) -> None:
def __init__(self, rules: dict[str, Rule], callmakervisitor: GrammarVisitor) -> None:
self.rules = rules
self.callmaker = callmakervisitor
@ -58,7 +46,7 @@ def visit_NamedItem(self, item: NamedItem) -> None:
class KeywordCollectorVisitor(GrammarVisitor):
"""Visitor that collects all the keywords and soft keywords in the Grammar"""
def __init__(self, gen: "ParserGenerator", keywords: Dict[str, int], soft_keywords: Set[str]):
def __init__(self, gen: "ParserGenerator", keywords: dict[str, int], soft_keywords: set[str]):
self.generator = gen
self.keywords = keywords
self.soft_keywords = soft_keywords
@ -73,7 +61,7 @@ def visit_StringLeaf(self, node: StringLeaf) -> None:
class RuleCheckingVisitor(GrammarVisitor):
def __init__(self, rules: Dict[str, Rule], tokens: Set[str]):
def __init__(self, rules: dict[str, Rule], tokens: set[str]):
self.rules = rules
self.tokens = tokens
# If python < 3.12 add the virtual fstring tokens
@ -100,11 +88,11 @@ def visit_NamedItem(self, node: NamedItem) -> None:
class ParserGenerator:
callmakervisitor: GrammarVisitor
def __init__(self, grammar: Grammar, tokens: Set[str], file: Optional[IO[Text]]):
def __init__(self, grammar: Grammar, tokens: set[str], file: IO[str] | None):
self.grammar = grammar
self.tokens = tokens
self.keywords: Dict[str, int] = {}
self.soft_keywords: Set[str] = set()
self.keywords: dict[str, int] = {}
self.soft_keywords: set[str] = set()
self.rules = grammar.rules
self.validate_rule_names()
if "trailer" not in grammar.metas and "start" not in self.rules:
@ -117,8 +105,8 @@ def __init__(self, grammar: Grammar, tokens: Set[str], file: Optional[IO[Text]])
self.first_graph, self.first_sccs = compute_left_recursives(self.rules)
self.counter = 0 # For name_rule()/name_loop()
self.keyword_counter = 499 # For keyword_type()
self.all_rules: Dict[str, Rule] = self.rules.copy() # Rules + temporal rules
self._local_variable_stack: List[List[str]] = []
self.all_rules: dict[str, Rule] = self.rules.copy() # Rules + temporal rules
self._local_variable_stack: list[list[str]] = []
def validate_rule_names(self) -> None:
for rule in self.rules:
@ -132,7 +120,7 @@ def local_variable_context(self) -> Iterator[None]:
self._local_variable_stack.pop()
@property
def local_variable_names(self) -> List[str]:
def local_variable_names(self) -> list[str]:
return self._local_variable_stack[-1]
@abstractmethod
@ -164,7 +152,7 @@ def collect_rules(self) -> None:
keyword_collector.visit(rule)
rule_collector = RuleCollectorVisitor(self.rules, self.callmakervisitor)
done: Set[str] = set()
done: set[str] = set()
while True:
computed_rules = list(self.all_rules)
todo = [i for i in computed_rules if i not in done]
@ -229,10 +217,10 @@ def dedupe(self, name: str) -> str:
class NullableVisitor(GrammarVisitor):
def __init__(self, rules: Dict[str, Rule]) -> None:
def __init__(self, rules: dict[str, Rule]) -> None:
self.rules = rules
self.visited: Set[Any] = set()
self.nullables: Set[Union[Rule, NamedItem]] = set()
self.visited: set[Any] = set()
self.nullables: set[Rule | NamedItem] = set()
def visit_Rule(self, rule: Rule) -> bool:
if rule in self.visited:
@ -294,7 +282,7 @@ def visit_StringLeaf(self, node: StringLeaf) -> bool:
return not node.value
def compute_nullables(rules: Dict[str, Rule]) -> Set[Any]:
def compute_nullables(rules: dict[str, Rule]) -> set[Any]:
"""Compute which rules in a grammar are nullable.
Thanks to TatSu (tatsu/leftrec.py) for inspiration.
@ -306,12 +294,12 @@ def compute_nullables(rules: Dict[str, Rule]) -> Set[Any]:
class InitialNamesVisitor(GrammarVisitor):
def __init__(self, rules: Dict[str, Rule]) -> None:
def __init__(self, rules: dict[str, Rule]) -> None:
self.rules = rules
self.nullables = compute_nullables(rules)
def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Set[Any]:
names: Set[str] = set()
def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> set[Any]:
names: set[str] = set()
for value in node:
if isinstance(value, list):
for item in value:
@ -320,33 +308,33 @@ def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Set[A
names |= self.visit(value, *args, **kwargs)
return names
def visit_Alt(self, alt: Alt) -> Set[Any]:
names: Set[str] = set()
def visit_Alt(self, alt: Alt) -> set[Any]:
names: set[str] = set()
for item in alt.items:
names |= self.visit(item)
if item not in self.nullables:
break
return names
def visit_Forced(self, force: Forced) -> Set[Any]:
def visit_Forced(self, force: Forced) -> set[Any]:
return set()
def visit_LookAhead(self, lookahead: Lookahead) -> Set[Any]:
def visit_LookAhead(self, lookahead: Lookahead) -> set[Any]:
return set()
def visit_Cut(self, cut: Cut) -> Set[Any]:
def visit_Cut(self, cut: Cut) -> set[Any]:
return set()
def visit_NameLeaf(self, node: NameLeaf) -> Set[Any]:
def visit_NameLeaf(self, node: NameLeaf) -> set[Any]:
return {node.value}
def visit_StringLeaf(self, node: StringLeaf) -> Set[Any]:
def visit_StringLeaf(self, node: StringLeaf) -> set[Any]:
return set()
def compute_left_recursives(
rules: Dict[str, Rule]
) -> Tuple[Dict[str, AbstractSet[str]], List[AbstractSet[str]]]:
rules: dict[str, Rule]
) -> tuple[dict[str, Set[str]], list[Set[str]]]:
graph = make_first_graph(rules)
sccs = list(sccutils.strongly_connected_components(graph.keys(), graph))
for scc in sccs:
@ -374,7 +362,7 @@ def compute_left_recursives(
return graph, sccs
def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]:
def make_first_graph(rules: dict[str, Rule]) -> dict[str, Set[str]]:
"""Compute the graph of left-invocations.
There's an edge from A to B if A may invoke B at its initial
@ -384,7 +372,7 @@ def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]:
"""
initial_name_visitor = InitialNamesVisitor(rules)
graph = {}
vertices: Set[str] = set()
vertices: set[str] = set()
for rulename, rhs in rules.items():
graph[rulename] = names = initial_name_visitor.visit(rhs)
vertices |= names

View file

@ -1,6 +1,7 @@
import os.path
import token
from typing import IO, Any, Callable, Dict, Optional, Sequence, Set, Text, Tuple
from collections.abc import Callable, Sequence
from typing import IO, Any
from pegen import grammar
from pegen.grammar import (
@ -74,10 +75,10 @@ def visit_NegativeLookahead(self, node: NegativeLookahead) -> bool:
def visit_Opt(self, node: Opt) -> bool:
return self.visit(node.node)
def visit_Repeat(self, node: Repeat0) -> Tuple[str, str]:
def visit_Repeat(self, node: Repeat0) -> tuple[str, str]:
return self.visit(node.node)
def visit_Gather(self, node: Gather) -> Tuple[str, str]:
def visit_Gather(self, node: Gather) -> tuple[str, str]:
return self.visit(node.node)
def visit_Group(self, node: Group) -> bool:
@ -93,9 +94,9 @@ def visit_Forced(self, node: Forced) -> bool:
class PythonCallMakerVisitor(GrammarVisitor):
def __init__(self, parser_generator: ParserGenerator):
self.gen = parser_generator
self.cache: Dict[str, Tuple[str, str]] = {}
self.cache: dict[str, tuple[str, str]] = {}
def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]:
def visit_NameLeaf(self, node: NameLeaf) -> tuple[str | None, str]:
name = node.value
if name == "SOFT_KEYWORD":
return "soft_keyword", "self.soft_keyword()"
@ -108,31 +109,31 @@ def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]:
return "_" + name.lower(), f"self.expect({name!r})"
return name, f"self.{name}()"
def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]:
def visit_StringLeaf(self, node: StringLeaf) -> tuple[str, str]:
return "literal", f"self.expect({node.value})"
def visit_NamedItem(self, node: NamedItem) -> Tuple[Optional[str], str]:
def visit_NamedItem(self, node: NamedItem) -> tuple[str | None, str]:
name, call = self.visit(node.item)
if node.name:
name = node.name
return name, call
def lookahead_call_helper(self, node: Lookahead) -> Tuple[str, str]:
def lookahead_call_helper(self, node: Lookahead) -> tuple[str, str]:
name, call = self.visit(node.node)
head, tail = call.split("(", 1)
assert tail[-1] == ")"
tail = tail[:-1]
return head, tail
def visit_PositiveLookahead(self, node: PositiveLookahead) -> Tuple[None, str]:
def visit_PositiveLookahead(self, node: PositiveLookahead) -> tuple[None, str]:
head, tail = self.lookahead_call_helper(node)
return None, f"self.positive_lookahead({head}, {tail})"
def visit_NegativeLookahead(self, node: NegativeLookahead) -> Tuple[None, str]:
def visit_NegativeLookahead(self, node: NegativeLookahead) -> tuple[None, str]:
head, tail = self.lookahead_call_helper(node)
return None, f"self.negative_lookahead({head}, {tail})"
def visit_Opt(self, node: Opt) -> Tuple[str, str]:
def visit_Opt(self, node: Opt) -> tuple[str, str]:
name, call = self.visit(node.node)
# Note trailing comma (the call may already have one comma
# at the end, for example when rules have both repeat0 and optional
@ -148,7 +149,7 @@ def _generate_artificial_rule_call(
prefix: str,
call_by_name_func: Callable[[str], str],
rule_generation_func: Callable[[], str],
) -> Tuple[str, str]:
) -> tuple[str, str]:
node_str = f"{node}"
key = f"{prefix}_{node_str}"
if key in self.cache:
@ -159,7 +160,7 @@ def _generate_artificial_rule_call(
self.cache[key] = name, call
return self.cache[key]
def visit_Rhs(self, node: Rhs) -> Tuple[str, str]:
def visit_Rhs(self, node: Rhs) -> tuple[str, str]:
if len(node.alts) == 1 and len(node.alts[0].items) == 1:
return self.visit(node.alts[0].items[0])
@ -170,7 +171,7 @@ def visit_Rhs(self, node: Rhs) -> Tuple[str, str]:
lambda: self.gen.artificial_rule_from_rhs(node),
)
def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]:
def visit_Repeat0(self, node: Repeat0) -> tuple[str, str]:
return self._generate_artificial_rule_call(
node,
"repeat0",
@ -178,7 +179,7 @@ def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]:
lambda: self.gen.artificial_rule_from_repeat(node.node, is_repeat1=False),
)
def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]:
def visit_Repeat1(self, node: Repeat1) -> tuple[str, str]:
return self._generate_artificial_rule_call(
node,
"repeat1",
@ -186,7 +187,7 @@ def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]:
lambda: self.gen.artificial_rule_from_repeat(node.node, is_repeat1=True),
)
def visit_Gather(self, node: Gather) -> Tuple[str, str]:
def visit_Gather(self, node: Gather) -> tuple[str, str]:
return self._generate_artificial_rule_call(
node,
"gather",
@ -194,13 +195,13 @@ def visit_Gather(self, node: Gather) -> Tuple[str, str]:
lambda: self.gen.artificial_rule_from_gather(node),
)
def visit_Group(self, node: Group) -> Tuple[Optional[str], str]:
def visit_Group(self, node: Group) -> tuple[str | None, str]:
return self.visit(node.rhs)
def visit_Cut(self, node: Cut) -> Tuple[str, str]:
def visit_Cut(self, node: Cut) -> tuple[str, str]:
return "cut", "True"
def visit_Forced(self, node: Forced) -> Tuple[str, str]:
def visit_Forced(self, node: Forced) -> tuple[str, str]:
if isinstance(node.node, Group):
_, val = self.visit(node.node.rhs)
return "forced", f"self.expect_forced({val}, '''({node.node.rhs!s})''')"
@ -215,10 +216,10 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
def __init__(
self,
grammar: grammar.Grammar,
file: Optional[IO[Text]],
tokens: Set[str] = set(token.tok_name.values()),
location_formatting: Optional[str] = None,
unreachable_formatting: Optional[str] = None,
file: IO[str] | None,
tokens: set[str] = set(token.tok_name.values()),
location_formatting: str | None = None,
unreachable_formatting: str | None = None,
):
tokens.add("SOFT_KEYWORD")
super().__init__(grammar, tokens, file)
@ -355,7 +356,7 @@ def visit_Alt(self, node: Alt, is_loop: bool, is_gather: bool) -> None:
if is_loop:
self.print(f"children.append({action})")
self.print(f"mark = self._mark()")
self.print("mark = self._mark()")
else:
if "UNREACHABLE" in action:
action = action.replace("UNREACHABLE", self.unreachable_formatting)

View file

@ -1,11 +1,11 @@
# Adapted from mypy (mypy/build.py) under the MIT license.
from typing import *
from collections.abc import Iterable, Iterator, Set
def strongly_connected_components(
vertices: AbstractSet[str], edges: Dict[str, AbstractSet[str]]
) -> Iterator[AbstractSet[str]]:
vertices: Set[str], edges: dict[str, Set[str]]
) -> Iterator[Set[str]]:
"""Compute Strongly Connected Components of a directed graph.
Args:
@ -20,12 +20,12 @@ def strongly_connected_components(
From https://code.activestate.com/recipes/578507-strongly-connected-components-of-a-directed-graph/.
"""
identified: Set[str] = set()
stack: List[str] = []
index: Dict[str, int] = {}
boundaries: List[int] = []
identified: set[str] = set()
stack: list[str] = []
index: dict[str, int] = {}
boundaries: list[int] = []
def dfs(v: str) -> Iterator[Set[str]]:
def dfs(v: str) -> Iterator[set[str]]:
index[v] = len(stack)
stack.append(v)
boundaries.append(index[v])
@ -50,8 +50,8 @@ def dfs(v: str) -> Iterator[Set[str]]:
def topsort(
data: Dict[AbstractSet[str], Set[AbstractSet[str]]]
) -> Iterable[AbstractSet[AbstractSet[str]]]:
data: dict[Set[str], set[Set[str]]]
) -> Iterable[Set[Set[str]]]:
"""Topological sort.
Args:
@ -94,12 +94,12 @@ def topsort(
break
yield ready
data = {item: (dep - ready) for item, dep in data.items() if item not in ready}
assert not data, "A cyclic dependency exists amongst %r" % data
assert not data, f"A cyclic dependency exists amongst {data}"
def find_cycles_in_scc(
graph: Dict[str, AbstractSet[str]], scc: AbstractSet[str], start: str
) -> Iterable[List[str]]:
graph: dict[str, Set[str]], scc: Set[str], start: str
) -> Iterable[list[str]]:
"""Find cycles in SCC emanating from start.
Yields lists of the form ['A', 'B', 'C', 'A'], which means there's
@ -117,7 +117,7 @@ def find_cycles_in_scc(
assert start in graph
# Recursive helper that yields cycles.
def dfs(node: str, path: List[str]) -> Iterator[List[str]]:
def dfs(node: str, path: list[str]) -> Iterator[list[str]]:
if node in path:
yield path + [node]
return

View file

@ -6,7 +6,7 @@
import textwrap
import token
import tokenize
from typing import IO, Any, Dict, Final, Optional, Type, cast
from typing import IO, Any, Final, cast
from pegen.build import compile_c_extension
from pegen.c_generator import CParserGenerator
@ -23,19 +23,19 @@
}
def generate_parser(grammar: Grammar) -> Type[Parser]:
def generate_parser(grammar: Grammar) -> type[Parser]:
# Generate a parser.
out = io.StringIO()
genr = PythonParserGenerator(grammar, out)
genr.generate("<string>")
# Load the generated parser class.
ns: Dict[str, Any] = {}
ns: dict[str, Any] = {}
exec(out.getvalue(), ns)
return ns["GeneratedParser"]
def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = False) -> Any:
def run_parser(file: IO[bytes], parser_class: type[Parser], *, verbose: bool = False) -> Any:
# Run a parser on a file (stream).
tokenizer = Tokenizer(tokenize.generate_tokens(file.readline)) # type: ignore[arg-type] # typeshed issue #3515
parser = parser_class(tokenizer, verbose=verbose)
@ -46,7 +46,7 @@ def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = F
def parse_string(
source: str, parser_class: Type[Parser], *, dedent: bool = True, verbose: bool = False
source: str, parser_class: type[Parser], *, dedent: bool = True, verbose: bool = False
) -> Any:
# Run the parser on a string.
if dedent:
@ -55,7 +55,7 @@ def parse_string(
return run_parser(file, parser_class, verbose=verbose) # type: ignore[arg-type] # typeshed issue #3515
def make_parser(source: str) -> Type[Parser]:
def make_parser(source: str) -> type[Parser]:
# Combine parse_string() and generate_parser().
grammar = parse_string(source, GrammarParser)
return generate_parser(grammar)
@ -86,7 +86,7 @@ def generate_parser_c_extension(
grammar: Grammar,
path: pathlib.PurePath,
debug: bool = False,
library_dir: Optional[str] = None,
library_dir: str | None = None,
) -> Any:
"""Generate a parser c extension for the given grammar in the given path

View file

@ -1,6 +1,6 @@
import token
import tokenize
from typing import Dict, Iterator, List
from collections.abc import Iterator
Mark = int # NewType('Mark', int)
@ -8,7 +8,11 @@
def shorttok(tok: tokenize.TokenInfo) -> str:
return "%-25.25s" % f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}"
formatted = (
f"{tok.start[0]}.{tok.start[1]}: "
f"{token.tok_name[tok.type]}:{tok.string!r}"
)
return f"{formatted:<25.25}"
class Tokenizer:
@ -17,7 +21,7 @@ class Tokenizer:
This is pretty tied to Python's syntax.
"""
_tokens: List[tokenize.TokenInfo]
_tokens: list[tokenize.TokenInfo]
def __init__(
self, tokengen: Iterator[tokenize.TokenInfo], *, path: str = "", verbose: bool = False
@ -26,7 +30,7 @@ def __init__(
self._tokens = []
self._index = 0
self._verbose = verbose
self._lines: Dict[int, str] = {}
self._lines: dict[int, str] = {}
self._path = path
if verbose:
self.report(False, False)
@ -72,7 +76,7 @@ def get_last_non_whitespace_token(self) -> tokenize.TokenInfo:
break
return tok
def get_lines(self, line_numbers: List[int]) -> List[str]:
def get_lines(self, line_numbers: list[int]) -> list[str]:
"""Retrieve source lines corresponding to line numbers."""
if self._lines:
lines = self._lines

View file

@ -1,5 +1,3 @@
from typing import Optional
from pegen import grammar
from pegen.grammar import Alt, GrammarVisitor, Rhs, Rule
@ -11,7 +9,7 @@ class ValidationError(Exception):
class GrammarValidator(GrammarVisitor):
def __init__(self, grammar: grammar.Grammar) -> None:
self.grammar = grammar
self.rulename: Optional[str] = None
self.rulename: str | None = None
def validate_rule(self, rulename: str, node: Rule) -> None:
self.rulename = rulename