[3.14] gh-138281: Run ruff on Tools/peg_generator (GH-138282) (#138469)

Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
This commit is contained in:
sobolevn 2025-09-04 14:13:36 +03:00 committed by GitHub
parent 2583646288
commit 8e1c2fe3d3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 251 additions and 237 deletions

View file

@ -18,6 +18,10 @@ repos:
name: Run Ruff (lint) on Argument Clinic name: Run Ruff (lint) on Argument Clinic
args: [--exit-non-zero-on-fix, --config=Tools/clinic/.ruff.toml] args: [--exit-non-zero-on-fix, --config=Tools/clinic/.ruff.toml]
files: ^Tools/clinic/|Lib/test/test_clinic.py files: ^Tools/clinic/|Lib/test/test_clinic.py
- id: ruff
name: Run Ruff (lint) on Tools/peg_generator/
args: [--exit-non-zero-on-fix, --config=Tools/peg_generator/.ruff.toml]
files: ^Tools/peg_generator/
- id: ruff-format - id: ruff-format
name: Run Ruff (format) on Doc/ name: Run Ruff (format) on Doc/
args: [--check] args: [--check]

View file

@ -0,0 +1,27 @@
extend = "../../.ruff.toml" # Inherit the project-wide settings
extend-exclude = [
# Generated files:
"Tools/peg_generator/pegen/grammar_parser.py",
]
[lint]
select = [
"F", # pyflakes
"I", # isort
"UP", # pyupgrade
"RUF100", # Ban unused `# noqa` comments
"PGH004", # Ban blanket `# noqa` comments (only ignore specific error codes)
]
ignore = [
# Use PEP-604 unions rather than tuples for isinstance() checks.
# Makes code slower and more verbose. https://github.com/astral-sh/ruff/issues/7871.
"UP038",
]
unfixable = [
# The autofixes sometimes do the wrong things for these;
# it's better to have to manually look at the code and see how it needs fixing
"F841", # Detects unused variables
"F601", # Detects dictionaries that have duplicate keys
"F602", # Also detects dictionaries that have duplicate keys
]

View file

@ -10,7 +10,6 @@
import time import time
import token import token
import traceback import traceback
from typing import Tuple
from pegen.grammar import Grammar from pegen.grammar import Grammar
from pegen.parser import Parser from pegen.parser import Parser
@ -21,7 +20,7 @@
def generate_c_code( def generate_c_code(
args: argparse.Namespace, args: argparse.Namespace,
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: ) -> tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
from pegen.build import build_c_parser_and_generator from pegen.build import build_c_parser_and_generator
verbose = args.verbose verbose = args.verbose
@ -50,7 +49,7 @@ def generate_c_code(
def generate_python_code( def generate_python_code(
args: argparse.Namespace, args: argparse.Namespace,
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: ) -> tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
from pegen.build import build_python_parser_and_generator from pegen.build import build_python_parser_and_generator
verbose = args.verbose verbose = args.verbose
@ -188,7 +187,7 @@ def main() -> None:
if __name__ == "__main__": if __name__ == "__main__":
if sys.version_info < (3, 8): if sys.version_info < (3, 8): # noqa: UP036
print("ERROR: using pegen requires at least Python 3.8!", file=sys.stderr) print("ERROR: using pegen requires at least Python 3.8!", file=sys.stderr)
sys.exit(1) sys.exit(1)
main() main()

View file

@ -6,7 +6,7 @@
TODO: Remove the above-described hack. TODO: Remove the above-described hack.
""" """
from typing import Any, Optional, Tuple from typing import Any
def ast_dump( def ast_dump(
@ -14,9 +14,9 @@ def ast_dump(
annotate_fields: bool = True, annotate_fields: bool = True,
include_attributes: bool = False, include_attributes: bool = False,
*, *,
indent: Optional[str] = None, indent: str | None = None,
) -> str: ) -> str:
def _format(node: Any, level: int = 0) -> Tuple[str, bool]: def _format(node: Any, level: int = 0) -> tuple[str, bool]:
if indent is not None: if indent is not None:
level += 1 level += 1
prefix = "\n" + indent * level prefix = "\n" + indent * level
@ -41,7 +41,7 @@ def _format(node: Any, level: int = 0) -> Tuple[str, bool]:
value, simple = _format(value, level) value, simple = _format(value, level)
allsimple = allsimple and simple allsimple = allsimple and simple
if keywords: if keywords:
args.append("%s=%s" % (name, value)) args.append(f"{name}={value}")
else: else:
args.append(value) args.append(value)
if include_attributes and node._attributes: if include_attributes and node._attributes:
@ -54,16 +54,16 @@ def _format(node: Any, level: int = 0) -> Tuple[str, bool]:
continue continue
value, simple = _format(value, level) value, simple = _format(value, level)
allsimple = allsimple and simple allsimple = allsimple and simple
args.append("%s=%s" % (name, value)) args.append(f"{name}={value}")
if allsimple and len(args) <= 3: if allsimple and len(args) <= 3:
return "%s(%s)" % (node.__class__.__name__, ", ".join(args)), not args return "{}({})".format(node.__class__.__name__, ", ".join(args)), not args
return "%s(%s%s)" % (node.__class__.__name__, prefix, sep.join(args)), False return f"{node.__class__.__name__}({prefix}{sep.join(args)})", False
elif isinstance(node, list): elif isinstance(node, list):
if not node: if not node:
return "[]", True return "[]", True
return "[%s%s]" % (prefix, sep.join(_format(x, level)[0] for x in node)), False return f"[{prefix}{sep.join(_format(x, level)[0] for x in node)}]", False
return repr(node), True return repr(node), True
if all(cls.__name__ != "AST" for cls in node.__class__.__mro__): if all(cls.__name__ != "AST" for cls in node.__class__.__mro__):
raise TypeError("expected AST, got %r" % node.__class__.__name__) raise TypeError(f"expected AST, got {node.__class__.__name__!r}")
return _format(node)[0] return _format(node)[0]

View file

@ -6,7 +6,7 @@
import sysconfig import sysconfig
import tempfile import tempfile
import tokenize import tokenize
from typing import IO, Any, Dict, List, Optional, Set, Tuple from typing import IO, Any
from pegen.c_generator import CParserGenerator from pegen.c_generator import CParserGenerator
from pegen.grammar import Grammar from pegen.grammar import Grammar
@ -18,11 +18,11 @@
MOD_DIR = pathlib.Path(__file__).resolve().parent MOD_DIR = pathlib.Path(__file__).resolve().parent
TokenDefinitions = Tuple[Dict[int, str], Dict[str, int], Set[str]] TokenDefinitions = tuple[dict[int, str], dict[str, int], set[str]]
Incomplete = Any # TODO: install `types-setuptools` and remove this alias Incomplete = Any # TODO: install `types-setuptools` and remove this alias
def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> List[str]: def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> list[str]:
flags = sysconfig.get_config_var(compiler_flags) flags = sysconfig.get_config_var(compiler_flags)
py_flags_nodist = sysconfig.get_config_var(compiler_py_flags_nodist) py_flags_nodist = sysconfig.get_config_var(compiler_py_flags_nodist)
if flags is None or py_flags_nodist is None: if flags is None or py_flags_nodist is None:
@ -71,11 +71,11 @@ def fixup_build_ext(cmd: Incomplete) -> None:
def compile_c_extension( def compile_c_extension(
generated_source_path: str, generated_source_path: str,
build_dir: Optional[str] = None, build_dir: str | None = None,
verbose: bool = False, verbose: bool = False,
keep_asserts: bool = True, keep_asserts: bool = True,
disable_optimization: bool = False, disable_optimization: bool = False,
library_dir: Optional[str] = None, library_dir: str | None = None,
) -> pathlib.Path: ) -> pathlib.Path:
"""Compile the generated source for a parser generator into an extension module. """Compile the generated source for a parser generator into an extension module.
@ -93,11 +93,10 @@ def compile_c_extension(
""" """
import setuptools.command.build_ext import setuptools.command.build_ext
import setuptools.logging import setuptools.logging
from setuptools import Distribution, Extension
from setuptools import Extension, Distribution
from setuptools.modified import newer_group
from setuptools._distutils.ccompiler import new_compiler from setuptools._distutils.ccompiler import new_compiler
from setuptools._distutils.sysconfig import customize_compiler from setuptools._distutils.sysconfig import customize_compiler
from setuptools.modified import newer_group
if verbose: if verbose:
setuptools.logging.set_threshold(logging.DEBUG) setuptools.logging.set_threshold(logging.DEBUG)
@ -241,7 +240,7 @@ def compile_c_extension(
def build_parser( def build_parser(
grammar_file: str, verbose_tokenizer: bool = False, verbose_parser: bool = False grammar_file: str, verbose_tokenizer: bool = False, verbose_parser: bool = False
) -> Tuple[Grammar, Parser, Tokenizer]: ) -> tuple[Grammar, Parser, Tokenizer]:
with open(grammar_file) as file: with open(grammar_file) as file:
tokenizer = Tokenizer(tokenize.generate_tokens(file.readline), verbose=verbose_tokenizer) tokenizer = Tokenizer(tokenize.generate_tokens(file.readline), verbose=verbose_tokenizer)
parser = GrammarParser(tokenizer, verbose=verbose_parser) parser = GrammarParser(tokenizer, verbose=verbose_parser)
@ -292,7 +291,7 @@ def build_c_generator(
keep_asserts_in_extension: bool = True, keep_asserts_in_extension: bool = True,
skip_actions: bool = False, skip_actions: bool = False,
) -> ParserGenerator: ) -> ParserGenerator:
with open(tokens_file, "r") as tok_file: with open(tokens_file) as tok_file:
all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file) all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)
with open(output_file, "w") as file: with open(output_file, "w") as file:
gen: ParserGenerator = CParserGenerator( gen: ParserGenerator = CParserGenerator(
@ -333,7 +332,7 @@ def build_c_parser_and_generator(
verbose_c_extension: bool = False, verbose_c_extension: bool = False,
keep_asserts_in_extension: bool = True, keep_asserts_in_extension: bool = True,
skip_actions: bool = False, skip_actions: bool = False,
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: ) -> tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
"""Generate rules, C parser, tokenizer, parser generator for a given grammar """Generate rules, C parser, tokenizer, parser generator for a given grammar
Args: Args:
@ -373,7 +372,7 @@ def build_python_parser_and_generator(
verbose_tokenizer: bool = False, verbose_tokenizer: bool = False,
verbose_parser: bool = False, verbose_parser: bool = False,
skip_actions: bool = False, skip_actions: bool = False,
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: ) -> tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
"""Generate rules, python parser, tokenizer, parser generator for a given grammar """Generate rules, python parser, tokenizer, parser generator for a given grammar
Args: Args:

View file

@ -1,9 +1,10 @@
import ast import ast
import os.path import os.path
import re import re
from collections.abc import Callable
from dataclasses import dataclass, field from dataclasses import dataclass, field
from enum import Enum from enum import Enum
from typing import IO, Any, Callable, Dict, List, Optional, Set, Text, Tuple from typing import IO, Any
from pegen import grammar from pegen import grammar
from pegen.grammar import ( from pegen.grammar import (
@ -86,13 +87,13 @@ class NodeTypes(Enum):
@dataclass @dataclass
class FunctionCall: class FunctionCall:
function: str function: str
arguments: List[Any] = field(default_factory=list) arguments: list[Any] = field(default_factory=list)
assigned_variable: Optional[str] = None assigned_variable: str | None = None
assigned_variable_type: Optional[str] = None assigned_variable_type: str | None = None
return_type: Optional[str] = None return_type: str | None = None
nodetype: Optional[NodeTypes] = None nodetype: NodeTypes | None = None
force_true: bool = False force_true: bool = False
comment: Optional[str] = None comment: str | None = None
def __str__(self) -> str: def __str__(self) -> str:
parts = [] parts = []
@ -124,14 +125,14 @@ class CCallMakerVisitor(GrammarVisitor):
def __init__( def __init__(
self, self,
parser_generator: ParserGenerator, parser_generator: ParserGenerator,
exact_tokens: Dict[str, int], exact_tokens: dict[str, int],
non_exact_tokens: Set[str], non_exact_tokens: set[str],
): ):
self.gen = parser_generator self.gen = parser_generator
self.exact_tokens = exact_tokens self.exact_tokens = exact_tokens
self.non_exact_tokens = non_exact_tokens self.non_exact_tokens = non_exact_tokens
self.cache: Dict[str, str] = {} self.cache: dict[str, str] = {}
self.cleanup_statements: List[str] = [] self.cleanup_statements: list[str] = []
def keyword_helper(self, keyword: str) -> FunctionCall: def keyword_helper(self, keyword: str) -> FunctionCall:
return FunctionCall( return FunctionCall(
@ -167,7 +168,7 @@ def visit_NameLeaf(self, node: NameLeaf) -> FunctionCall:
) )
return FunctionCall( return FunctionCall(
assigned_variable=f"{name.lower()}_var", assigned_variable=f"{name.lower()}_var",
function=f"_PyPegen_expect_token", function="_PyPegen_expect_token",
arguments=["p", name], arguments=["p", name],
nodetype=NodeTypes.GENERIC_TOKEN, nodetype=NodeTypes.GENERIC_TOKEN,
return_type="Token *", return_type="Token *",
@ -199,7 +200,7 @@ def visit_StringLeaf(self, node: StringLeaf) -> FunctionCall:
type = self.exact_tokens[val] type = self.exact_tokens[val]
return FunctionCall( return FunctionCall(
assigned_variable="_literal", assigned_variable="_literal",
function=f"_PyPegen_expect_token", function="_PyPegen_expect_token",
arguments=["p", type], arguments=["p", type],
nodetype=NodeTypes.GENERIC_TOKEN, nodetype=NodeTypes.GENERIC_TOKEN,
return_type="Token *", return_type="Token *",
@ -271,7 +272,7 @@ def visit_Forced(self, node: Forced) -> FunctionCall:
type = self.exact_tokens[val] type = self.exact_tokens[val]
return FunctionCall( return FunctionCall(
assigned_variable="_literal", assigned_variable="_literal",
function=f"_PyPegen_expect_forced_token", function="_PyPegen_expect_forced_token",
arguments=["p", type, f'"{val}"'], arguments=["p", type, f'"{val}"'],
nodetype=NodeTypes.GENERIC_TOKEN, nodetype=NodeTypes.GENERIC_TOKEN,
return_type="Token *", return_type="Token *",
@ -283,7 +284,7 @@ def visit_Forced(self, node: Forced) -> FunctionCall:
call.comment = None call.comment = None
return FunctionCall( return FunctionCall(
assigned_variable="_literal", assigned_variable="_literal",
function=f"_PyPegen_expect_forced_result", function="_PyPegen_expect_forced_result",
arguments=["p", str(call), f'"{node.node.rhs!s}"'], arguments=["p", str(call), f'"{node.node.rhs!s}"'],
return_type="void *", return_type="void *",
comment=f"forced_token=({node.node.rhs!s})", comment=f"forced_token=({node.node.rhs!s})",
@ -306,7 +307,7 @@ def _generate_artificial_rule_call(
node: Any, node: Any,
prefix: str, prefix: str,
rule_generation_func: Callable[[], str], rule_generation_func: Callable[[], str],
return_type: Optional[str] = None, return_type: str | None = None,
) -> FunctionCall: ) -> FunctionCall:
node_str = f"{node}" node_str = f"{node}"
key = f"{prefix}_{node_str}" key = f"{prefix}_{node_str}"
@ -377,10 +378,10 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
def __init__( def __init__(
self, self,
grammar: grammar.Grammar, grammar: grammar.Grammar,
tokens: Dict[int, str], tokens: dict[int, str],
exact_tokens: Dict[str, int], exact_tokens: dict[str, int],
non_exact_tokens: Set[str], non_exact_tokens: set[str],
file: Optional[IO[Text]], file: IO[str] | None,
debug: bool = False, debug: bool = False,
skip_actions: bool = False, skip_actions: bool = False,
): ):
@ -391,7 +392,7 @@ def __init__(
self._varname_counter = 0 self._varname_counter = 0
self.debug = debug self.debug = debug
self.skip_actions = skip_actions self.skip_actions = skip_actions
self.cleanup_statements: List[str] = [] self.cleanup_statements: list[str] = []
def add_level(self) -> None: def add_level(self) -> None:
self.print("if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) {") self.print("if (p->level++ == MAXSTACK || _Py_ReachedRecursionLimitWithMargin(PyThreadState_Get(), 1)) {")
@ -427,12 +428,12 @@ def call_with_errorcheck_goto(self, call_text: str, goto_target: str) -> None:
self.print(f"if ({error_var}) {{") self.print(f"if ({error_var}) {{")
with self.indent(): with self.indent():
self.print(f"goto {goto_target};") self.print(f"goto {goto_target};")
self.print(f"}}") self.print("}")
def out_of_memory_return( def out_of_memory_return(
self, self,
expr: str, expr: str,
cleanup_code: Optional[str] = None, cleanup_code: str | None = None,
) -> None: ) -> None:
self.print(f"if ({expr}) {{") self.print(f"if ({expr}) {{")
with self.indent(): with self.indent():
@ -441,14 +442,14 @@ def out_of_memory_return(
self.print("p->error_indicator = 1;") self.print("p->error_indicator = 1;")
self.print("PyErr_NoMemory();") self.print("PyErr_NoMemory();")
self.add_return("NULL") self.add_return("NULL")
self.print(f"}}") self.print("}")
def out_of_memory_goto(self, expr: str, goto_target: str) -> None: def out_of_memory_goto(self, expr: str, goto_target: str) -> None:
self.print(f"if ({expr}) {{") self.print(f"if ({expr}) {{")
with self.indent(): with self.indent():
self.print("PyErr_NoMemory();") self.print("PyErr_NoMemory();")
self.print(f"goto {goto_target};") self.print(f"goto {goto_target};")
self.print(f"}}") self.print("}")
def generate(self, filename: str) -> None: def generate(self, filename: str) -> None:
self.collect_rules() self.collect_rules()
@ -491,8 +492,8 @@ def generate(self, filename: str) -> None:
if trailer: if trailer:
self.print(trailer.rstrip("\n") % dict(mode=mode, modulename=modulename)) self.print(trailer.rstrip("\n") % dict(mode=mode, modulename=modulename))
def _group_keywords_by_length(self) -> Dict[int, List[Tuple[str, int]]]: def _group_keywords_by_length(self) -> dict[int, list[tuple[str, int]]]:
groups: Dict[int, List[Tuple[str, int]]] = {} groups: dict[int, list[tuple[str, int]]] = {}
for keyword_str, keyword_type in self.keywords.items(): for keyword_str, keyword_type in self.keywords.items():
length = len(keyword_str) length = len(keyword_str)
if length in groups: if length in groups:
@ -584,10 +585,10 @@ def _set_up_rule_memoization(self, node: Rule, result_type: str) -> None:
self.print("if (_raw == NULL || p->mark <= _resmark)") self.print("if (_raw == NULL || p->mark <= _resmark)")
with self.indent(): with self.indent():
self.print("break;") self.print("break;")
self.print(f"_resmark = p->mark;") self.print("_resmark = p->mark;")
self.print("_res = _raw;") self.print("_res = _raw;")
self.print("}") self.print("}")
self.print(f"p->mark = _resmark;") self.print("p->mark = _resmark;")
self.add_return("_res") self.add_return("_res")
self.print("}") self.print("}")
self.print(f"static {result_type}") self.print(f"static {result_type}")
@ -643,7 +644,7 @@ def _handle_loop_rule_body(self, node: Rule, rhs: Rhs) -> None:
if memoize: if memoize:
self.print("int _start_mark = p->mark;") self.print("int _start_mark = p->mark;")
self.print("void **_children = PyMem_Malloc(sizeof(void *));") self.print("void **_children = PyMem_Malloc(sizeof(void *));")
self.out_of_memory_return(f"!_children") self.out_of_memory_return("!_children")
self.print("Py_ssize_t _children_capacity = 1;") self.print("Py_ssize_t _children_capacity = 1;")
self.print("Py_ssize_t _n = 0;") self.print("Py_ssize_t _n = 0;")
if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts): if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts):
@ -661,7 +662,7 @@ def _handle_loop_rule_body(self, node: Rule, rhs: Rhs) -> None:
self.add_return("NULL") self.add_return("NULL")
self.print("}") self.print("}")
self.print("asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena);") self.print("asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena);")
self.out_of_memory_return(f"!_seq", cleanup_code="PyMem_Free(_children);") self.out_of_memory_return("!_seq", cleanup_code="PyMem_Free(_children);")
self.print("for (Py_ssize_t i = 0; i < _n; i++) asdl_seq_SET_UNTYPED(_seq, i, _children[i]);") self.print("for (Py_ssize_t i = 0; i < _n; i++) asdl_seq_SET_UNTYPED(_seq, i, _children[i]);")
self.print("PyMem_Free(_children);") self.print("PyMem_Free(_children);")
if memoize and node.name: if memoize and node.name:
@ -715,7 +716,7 @@ def visit_NamedItem(self, node: NamedItem) -> None:
self.print(call) self.print(call)
def visit_Rhs( def visit_Rhs(
self, node: Rhs, is_loop: bool, is_gather: bool, rulename: Optional[str] self, node: Rhs, is_loop: bool, is_gather: bool, rulename: str | None
) -> None: ) -> None:
if is_loop: if is_loop:
assert len(node.alts) == 1 assert len(node.alts) == 1
@ -734,7 +735,7 @@ def join_conditions(self, keyword: str, node: Any) -> None:
self.visit(item) self.visit(item)
self.print(")") self.print(")")
def emit_action(self, node: Alt, cleanup_code: Optional[str] = None) -> None: def emit_action(self, node: Alt, cleanup_code: str | None = None) -> None:
self.print(f"_res = {node.action};") self.print(f"_res = {node.action};")
self.print("if (_res == NULL && PyErr_Occurred()) {") self.print("if (_res == NULL && PyErr_Occurred()) {")
@ -776,7 +777,7 @@ def emit_default_action(self, is_gather: bool, node: Alt) -> None:
def emit_dummy_action(self) -> None: def emit_dummy_action(self) -> None:
self.print("_res = _PyPegen_dummy_name(p);") self.print("_res = _PyPegen_dummy_name(p);")
def handle_alt_normal(self, node: Alt, is_gather: bool, rulename: Optional[str]) -> None: def handle_alt_normal(self, node: Alt, is_gather: bool, rulename: str | None) -> None:
self.join_conditions(keyword="if", node=node) self.join_conditions(keyword="if", node=node)
self.print("{") self.print("{")
# We have parsed successfully all the conditions for the option. # We have parsed successfully all the conditions for the option.
@ -796,10 +797,10 @@ def handle_alt_normal(self, node: Alt, is_gather: bool, rulename: Optional[str])
self.emit_default_action(is_gather, node) self.emit_default_action(is_gather, node)
# As the current option has parsed correctly, do not continue with the rest. # As the current option has parsed correctly, do not continue with the rest.
self.print(f"goto done;") self.print("goto done;")
self.print("}") self.print("}")
def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: Optional[str]) -> None: def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: str | None) -> None:
# Condition of the main body of the alternative # Condition of the main body of the alternative
self.join_conditions(keyword="while", node=node) self.join_conditions(keyword="while", node=node)
self.print("{") self.print("{")
@ -823,7 +824,7 @@ def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: Optional[str]) -
self.print( self.print(
"void **_new_children = PyMem_Realloc(_children, _children_capacity*sizeof(void *));" "void **_new_children = PyMem_Realloc(_children, _children_capacity*sizeof(void *));"
) )
self.out_of_memory_return(f"!_new_children", cleanup_code="PyMem_Free(_children);") self.out_of_memory_return("!_new_children", cleanup_code="PyMem_Free(_children);")
self.print("_children = _new_children;") self.print("_children = _new_children;")
self.print("}") self.print("}")
self.print("_children[_n++] = _res;") self.print("_children[_n++] = _res;")
@ -831,7 +832,7 @@ def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: Optional[str]) -
self.print("}") self.print("}")
def visit_Alt( def visit_Alt(
self, node: Alt, is_loop: bool, is_gather: bool, rulename: Optional[str] self, node: Alt, is_loop: bool, is_gather: bool, rulename: str | None
) -> None: ) -> None:
if len(node.items) == 1 and str(node.items[0]).startswith("invalid_"): if len(node.items) == 1 and str(node.items[0]).startswith("invalid_"):
self.print(f"if (p->call_invalid_rules) {{ // {node}") self.print(f"if (p->call_invalid_rules) {{ // {node}")
@ -875,7 +876,7 @@ def visit_Alt(
self.print("}") self.print("}")
self.print("}") self.print("}")
def collect_vars(self, node: Alt) -> Dict[Optional[str], Optional[str]]: def collect_vars(self, node: Alt) -> dict[str | None, str | None]:
types = {} types = {}
with self.local_variable_context(): with self.local_variable_context():
for item in node.items: for item in node.items:
@ -883,7 +884,7 @@ def collect_vars(self, node: Alt) -> Dict[Optional[str], Optional[str]]:
types[name] = type types[name] = type
return types return types
def add_var(self, node: NamedItem) -> Tuple[Optional[str], Optional[str]]: def add_var(self, node: NamedItem) -> tuple[str | None, str | None]:
call = self.callmakervisitor.generate_call(node.item) call = self.callmakervisitor.generate_call(node.item)
name = node.name if node.name else call.assigned_variable name = node.name if node.name else call.assigned_variable
if name is not None: if name is not None:

View file

@ -3,7 +3,6 @@
import argparse import argparse
import pprint import pprint
import sys import sys
from typing import Dict, Set
from pegen.build import build_parser from pegen.build import build_parser
from pegen.grammar import ( from pegen.grammar import (
@ -33,20 +32,20 @@
class FirstSetCalculator(GrammarVisitor): class FirstSetCalculator(GrammarVisitor):
def __init__(self, rules: Dict[str, Rule]) -> None: def __init__(self, rules: dict[str, Rule]) -> None:
self.rules = rules self.rules = rules
self.nullables = compute_nullables(rules) self.nullables = compute_nullables(rules)
self.first_sets: Dict[str, Set[str]] = dict() self.first_sets: dict[str, set[str]] = dict()
self.in_process: Set[str] = set() self.in_process: set[str] = set()
def calculate(self) -> Dict[str, Set[str]]: def calculate(self) -> dict[str, set[str]]:
for name, rule in self.rules.items(): for name, rule in self.rules.items():
self.visit(rule) self.visit(rule)
return self.first_sets return self.first_sets
def visit_Alt(self, item: Alt) -> Set[str]: def visit_Alt(self, item: Alt) -> set[str]:
result: Set[str] = set() result: set[str] = set()
to_remove: Set[str] = set() to_remove: set[str] = set()
for other in item.items: for other in item.items:
new_terminals = self.visit(other) new_terminals = self.visit(other)
if isinstance(other.item, NegativeLookahead): if isinstance(other.item, NegativeLookahead):
@ -71,34 +70,34 @@ def visit_Alt(self, item: Alt) -> Set[str]:
return result return result
def visit_Cut(self, item: Cut) -> Set[str]: def visit_Cut(self, item: Cut) -> set[str]:
return set() return set()
def visit_Group(self, item: Group) -> Set[str]: def visit_Group(self, item: Group) -> set[str]:
return self.visit(item.rhs) return self.visit(item.rhs)
def visit_PositiveLookahead(self, item: Lookahead) -> Set[str]: def visit_PositiveLookahead(self, item: Lookahead) -> set[str]:
return self.visit(item.node) return self.visit(item.node)
def visit_NegativeLookahead(self, item: NegativeLookahead) -> Set[str]: def visit_NegativeLookahead(self, item: NegativeLookahead) -> set[str]:
return self.visit(item.node) return self.visit(item.node)
def visit_NamedItem(self, item: NamedItem) -> Set[str]: def visit_NamedItem(self, item: NamedItem) -> set[str]:
return self.visit(item.item) return self.visit(item.item)
def visit_Opt(self, item: Opt) -> Set[str]: def visit_Opt(self, item: Opt) -> set[str]:
return self.visit(item.node) return self.visit(item.node)
def visit_Gather(self, item: Gather) -> Set[str]: def visit_Gather(self, item: Gather) -> set[str]:
return self.visit(item.node) return self.visit(item.node)
def visit_Repeat0(self, item: Repeat0) -> Set[str]: def visit_Repeat0(self, item: Repeat0) -> set[str]:
return self.visit(item.node) return self.visit(item.node)
def visit_Repeat1(self, item: Repeat1) -> Set[str]: def visit_Repeat1(self, item: Repeat1) -> set[str]:
return self.visit(item.node) return self.visit(item.node)
def visit_NameLeaf(self, item: NameLeaf) -> Set[str]: def visit_NameLeaf(self, item: NameLeaf) -> set[str]:
if item.value not in self.rules: if item.value not in self.rules:
return {item.value} return {item.value}
@ -110,16 +109,16 @@ def visit_NameLeaf(self, item: NameLeaf) -> Set[str]:
return self.first_sets[item.value] return self.first_sets[item.value]
def visit_StringLeaf(self, item: StringLeaf) -> Set[str]: def visit_StringLeaf(self, item: StringLeaf) -> set[str]:
return {item.value} return {item.value}
def visit_Rhs(self, item: Rhs) -> Set[str]: def visit_Rhs(self, item: Rhs) -> set[str]:
result: Set[str] = set() result: set[str] = set()
for alt in item.alts: for alt in item.alts:
result |= self.visit(alt) result |= self.visit(alt)
return result return result
def visit_Rule(self, item: Rule) -> Set[str]: def visit_Rule(self, item: Rule) -> set[str]:
if item.name in self.in_process: if item.name in self.in_process:
return set() return set()
elif item.name not in self.first_sets: elif item.name not in self.first_sets:
@ -138,7 +137,7 @@ def main() -> None:
try: try:
grammar, parser, tokenizer = build_parser(args.grammar_file) grammar, parser, tokenizer = build_parser(args.grammar_file)
except Exception as err: except Exception as err:
print("ERROR: Failed to parse grammar file", file=sys.stderr) print("ERROR: Failed to parse grammar file", err, file=sys.stderr)
sys.exit(1) sys.exit(1)
firs_sets = FirstSetCalculator(grammar.rules).calculate() firs_sets = FirstSetCalculator(grammar.rules).calculate()

View file

@ -1,15 +1,7 @@
from __future__ import annotations from __future__ import annotations
from typing import ( from collections.abc import Iterable, Iterator, Set
AbstractSet, from typing import Any
Any,
Iterable,
Iterator,
List,
Optional,
Tuple,
Union,
)
class GrammarError(Exception): class GrammarError(Exception):
@ -34,7 +26,7 @@ def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Any:
class Grammar: class Grammar:
def __init__(self, rules: Iterable[Rule], metas: Iterable[Tuple[str, Optional[str]]]): def __init__(self, rules: Iterable[Rule], metas: Iterable[tuple[str, str | None]]):
# Check if there are repeated rules in "rules" # Check if there are repeated rules in "rules"
all_rules = {} all_rules = {}
for rule in rules: for rule in rules:
@ -66,7 +58,7 @@ def __iter__(self) -> Iterator[Rule]:
class Rule: class Rule:
def __init__(self, name: str, type: Optional[str], rhs: Rhs, memo: Optional[object] = None): def __init__(self, name: str, type: str | None, rhs: Rhs, memo: object | None = None):
self.name = name self.name = name
self.type = type self.type = type
self.rhs = rhs self.rhs = rhs
@ -141,9 +133,9 @@ def __repr__(self) -> str:
class Rhs: class Rhs:
def __init__(self, alts: List[Alt]): def __init__(self, alts: list[Alt]):
self.alts = alts self.alts = alts
self.memo: Optional[Tuple[Optional[str], str]] = None self.memo: tuple[str | None, str] | None = None
def __str__(self) -> str: def __str__(self) -> str:
return " | ".join(str(alt) for alt in self.alts) return " | ".join(str(alt) for alt in self.alts)
@ -151,7 +143,7 @@ def __str__(self) -> str:
def __repr__(self) -> str: def __repr__(self) -> str:
return f"Rhs({self.alts!r})" return f"Rhs({self.alts!r})"
def __iter__(self) -> Iterator[List[Alt]]: def __iter__(self) -> Iterator[list[Alt]]:
yield self.alts yield self.alts
@property @property
@ -165,7 +157,7 @@ def can_be_inlined(self) -> bool:
class Alt: class Alt:
def __init__(self, items: List[NamedItem], *, icut: int = -1, action: Optional[str] = None): def __init__(self, items: list[NamedItem], *, icut: int = -1, action: str | None = None):
self.items = items self.items = items
self.icut = icut self.icut = icut
self.action = action self.action = action
@ -185,12 +177,12 @@ def __repr__(self) -> str:
args.append(f"action={self.action!r}") args.append(f"action={self.action!r}")
return f"Alt({', '.join(args)})" return f"Alt({', '.join(args)})"
def __iter__(self) -> Iterator[List[NamedItem]]: def __iter__(self) -> Iterator[list[NamedItem]]:
yield self.items yield self.items
class NamedItem: class NamedItem:
def __init__(self, name: Optional[str], item: Item, type: Optional[str] = None): def __init__(self, name: str | None, item: Item, type: str | None = None):
self.name = name self.name = name
self.item = item self.item = item
self.type = type self.type = type
@ -271,7 +263,7 @@ class Repeat:
def __init__(self, node: Plain): def __init__(self, node: Plain):
self.node = node self.node = node
self.memo: Optional[Tuple[Optional[str], str]] = None self.memo: tuple[str | None, str] | None = None
def __iter__(self) -> Iterator[Plain]: def __iter__(self) -> Iterator[Plain]:
yield self.node yield self.node
@ -334,12 +326,12 @@ def __init__(self) -> None:
pass pass
def __repr__(self) -> str: def __repr__(self) -> str:
return f"Cut()" return "Cut()"
def __str__(self) -> str: def __str__(self) -> str:
return f"~" return "~"
def __iter__(self) -> Iterator[Tuple[str, str]]: def __iter__(self) -> Iterator[tuple[str, str]]:
yield from () yield from ()
def __eq__(self, other: object) -> bool: def __eq__(self, other: object) -> bool:
@ -347,15 +339,15 @@ def __eq__(self, other: object) -> bool:
return NotImplemented return NotImplemented
return True return True
def initial_names(self) -> AbstractSet[str]: def initial_names(self) -> Set[str]:
return set() return set()
Plain = Union[Leaf, Group] Plain = Leaf | Group
Item = Union[Plain, Opt, Repeat, Forced, Lookahead, Rhs, Cut] Item = Plain | Opt | Repeat | Forced | Lookahead | Rhs | Cut
RuleName = Tuple[str, Optional[str]] RuleName = tuple[str, str | None]
MetaTuple = Tuple[str, Optional[str]] MetaTuple = tuple[str, str | None]
MetaList = List[MetaTuple] MetaList = list[MetaTuple]
RuleList = List[Rule] RuleList = list[Rule]
NamedItemList = List[NamedItem] NamedItemList = list[NamedItem]
LookaheadOrCut = Union[Lookahead, Cut] LookaheadOrCut = Lookahead | Cut

View file

@ -1,6 +1,7 @@
import argparse import argparse
import sys import sys
from typing import Any, Callable, Iterator from collections.abc import Callable, Iterator
from typing import Any
from pegen.build import build_parser from pegen.build import build_parser
from pegen.grammar import Grammar, Rule from pegen.grammar import Grammar, Rule
@ -52,7 +53,7 @@ def main() -> None:
try: try:
grammar, parser, tokenizer = build_parser(args.filename) grammar, parser, tokenizer = build_parser(args.filename)
except Exception as err: except Exception as err:
print("ERROR: Failed to parse grammar file", file=sys.stderr) print("ERROR: Failed to parse grammar file", err, file=sys.stderr)
sys.exit(1) sys.exit(1)
visitor = ASTGrammarPrinter() visitor = ASTGrammarPrinter()

View file

@ -5,7 +5,8 @@
import tokenize import tokenize
import traceback import traceback
from abc import abstractmethod from abc import abstractmethod
from typing import Any, Callable, ClassVar, Dict, Optional, Tuple, Type, TypeVar, cast from collections.abc import Callable
from typing import Any, ClassVar, TypeVar, cast
from pegen.tokenizer import Mark, Tokenizer, exact_token_types from pegen.tokenizer import Mark, Tokenizer, exact_token_types
@ -74,12 +75,12 @@ def memoize_wrapper(self: "Parser", *args: object) -> Any:
def memoize_left_rec( def memoize_left_rec(
method: Callable[["Parser"], Optional[T]] method: Callable[["Parser"], T | None]
) -> Callable[["Parser"], Optional[T]]: ) -> Callable[["Parser"], T | None]:
"""Memoize a left-recursive symbol method.""" """Memoize a left-recursive symbol method."""
method_name = method.__name__ method_name = method.__name__
def memoize_left_rec_wrapper(self: "Parser") -> Optional[T]: def memoize_left_rec_wrapper(self: "Parser") -> T | None:
mark = self._mark() mark = self._mark()
key = mark, method_name, () key = mark, method_name, ()
# Fast path: cache hit, and not verbose. # Fast path: cache hit, and not verbose.
@ -160,15 +161,15 @@ def memoize_left_rec_wrapper(self: "Parser") -> Optional[T]:
class Parser: class Parser:
"""Parsing base class.""" """Parsing base class."""
KEYWORDS: ClassVar[Tuple[str, ...]] KEYWORDS: ClassVar[tuple[str, ...]]
SOFT_KEYWORDS: ClassVar[Tuple[str, ...]] SOFT_KEYWORDS: ClassVar[tuple[str, ...]]
def __init__(self, tokenizer: Tokenizer, *, verbose: bool = False): def __init__(self, tokenizer: Tokenizer, *, verbose: bool = False):
self._tokenizer = tokenizer self._tokenizer = tokenizer
self._verbose = verbose self._verbose = verbose
self._level = 0 self._level = 0
self._cache: Dict[Tuple[Mark, str, Tuple[Any, ...]], Tuple[Any, Mark]] = {} self._cache: dict[tuple[Mark, str, tuple[Any, ...]], tuple[Any, Mark]] = {}
# Integer tracking whether we are in a left recursive rule or not. Can be useful # Integer tracking whether we are in a left recursive rule or not. Can be useful
# for error reporting. # for error reporting.
self.in_recursive_rule = 0 self.in_recursive_rule = 0
@ -185,28 +186,28 @@ def showpeek(self) -> str:
return f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}" return f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}"
@memoize @memoize
def name(self) -> Optional[tokenize.TokenInfo]: def name(self) -> tokenize.TokenInfo | None:
tok = self._tokenizer.peek() tok = self._tokenizer.peek()
if tok.type == token.NAME and tok.string not in self.KEYWORDS: if tok.type == token.NAME and tok.string not in self.KEYWORDS:
return self._tokenizer.getnext() return self._tokenizer.getnext()
return None return None
@memoize @memoize
def number(self) -> Optional[tokenize.TokenInfo]: def number(self) -> tokenize.TokenInfo | None:
tok = self._tokenizer.peek() tok = self._tokenizer.peek()
if tok.type == token.NUMBER: if tok.type == token.NUMBER:
return self._tokenizer.getnext() return self._tokenizer.getnext()
return None return None
@memoize @memoize
def string(self) -> Optional[tokenize.TokenInfo]: def string(self) -> tokenize.TokenInfo | None:
tok = self._tokenizer.peek() tok = self._tokenizer.peek()
if tok.type == token.STRING: if tok.type == token.STRING:
return self._tokenizer.getnext() return self._tokenizer.getnext()
return None return None
@memoize @memoize
def fstring_start(self) -> Optional[tokenize.TokenInfo]: def fstring_start(self) -> tokenize.TokenInfo | None:
FSTRING_START = getattr(token, "FSTRING_START", None) FSTRING_START = getattr(token, "FSTRING_START", None)
if not FSTRING_START: if not FSTRING_START:
return None return None
@ -216,7 +217,7 @@ def fstring_start(self) -> Optional[tokenize.TokenInfo]:
return None return None
@memoize @memoize
def fstring_middle(self) -> Optional[tokenize.TokenInfo]: def fstring_middle(self) -> tokenize.TokenInfo | None:
FSTRING_MIDDLE = getattr(token, "FSTRING_MIDDLE", None) FSTRING_MIDDLE = getattr(token, "FSTRING_MIDDLE", None)
if not FSTRING_MIDDLE: if not FSTRING_MIDDLE:
return None return None
@ -226,7 +227,7 @@ def fstring_middle(self) -> Optional[tokenize.TokenInfo]:
return None return None
@memoize @memoize
def fstring_end(self) -> Optional[tokenize.TokenInfo]: def fstring_end(self) -> tokenize.TokenInfo | None:
FSTRING_END = getattr(token, "FSTRING_END", None) FSTRING_END = getattr(token, "FSTRING_END", None)
if not FSTRING_END: if not FSTRING_END:
return None return None
@ -236,28 +237,28 @@ def fstring_end(self) -> Optional[tokenize.TokenInfo]:
return None return None
@memoize @memoize
def op(self) -> Optional[tokenize.TokenInfo]: def op(self) -> tokenize.TokenInfo | None:
tok = self._tokenizer.peek() tok = self._tokenizer.peek()
if tok.type == token.OP: if tok.type == token.OP:
return self._tokenizer.getnext() return self._tokenizer.getnext()
return None return None
@memoize @memoize
def type_comment(self) -> Optional[tokenize.TokenInfo]: def type_comment(self) -> tokenize.TokenInfo | None:
tok = self._tokenizer.peek() tok = self._tokenizer.peek()
if tok.type == token.TYPE_COMMENT: if tok.type == token.TYPE_COMMENT:
return self._tokenizer.getnext() return self._tokenizer.getnext()
return None return None
@memoize @memoize
def soft_keyword(self) -> Optional[tokenize.TokenInfo]: def soft_keyword(self) -> tokenize.TokenInfo | None:
tok = self._tokenizer.peek() tok = self._tokenizer.peek()
if tok.type == token.NAME and tok.string in self.SOFT_KEYWORDS: if tok.type == token.NAME and tok.string in self.SOFT_KEYWORDS:
return self._tokenizer.getnext() return self._tokenizer.getnext()
return None return None
@memoize @memoize
def expect(self, type: str) -> Optional[tokenize.TokenInfo]: def expect(self, type: str) -> tokenize.TokenInfo | None:
tok = self._tokenizer.peek() tok = self._tokenizer.peek()
if tok.string == type: if tok.string == type:
return self._tokenizer.getnext() return self._tokenizer.getnext()
@ -271,7 +272,7 @@ def expect(self, type: str) -> Optional[tokenize.TokenInfo]:
return self._tokenizer.getnext() return self._tokenizer.getnext()
return None return None
def expect_forced(self, res: Any, expectation: str) -> Optional[tokenize.TokenInfo]: def expect_forced(self, res: Any, expectation: str) -> tokenize.TokenInfo | None:
if res is None: if res is None:
raise self.make_syntax_error(f"expected {expectation}") raise self.make_syntax_error(f"expected {expectation}")
return res return res
@ -293,7 +294,7 @@ def make_syntax_error(self, message: str, filename: str = "<unknown>") -> Syntax
return SyntaxError(message, (filename, tok.start[0], 1 + tok.start[1], tok.line)) return SyntaxError(message, (filename, tok.start[0], 1 + tok.start[1], tok.line))
def simple_parser_main(parser_class: Type[Parser]) -> None: def simple_parser_main(parser_class: type[Parser]) -> None:
argparser = argparse.ArgumentParser() argparser = argparse.ArgumentParser()
argparser.add_argument( argparser.add_argument(
"-v", "-v",
@ -330,7 +331,7 @@ def simple_parser_main(parser_class: Type[Parser]) -> None:
endpos = 0 endpos = 0
else: else:
endpos = file.tell() endpos = file.tell()
except IOError: except OSError:
endpos = 0 endpos = 0
finally: finally:
if file is not sys.stdin: if file is not sys.stdin:

View file

@ -1,22 +1,10 @@
import sys
import ast import ast
import contextlib import contextlib
import re import re
import sys
from abc import abstractmethod from abc import abstractmethod
from typing import ( from collections.abc import Iterable, Iterator, Set
IO, from typing import IO, Any
AbstractSet,
Any,
Dict,
Iterable,
Iterator,
List,
Optional,
Set,
Text,
Tuple,
Union,
)
from pegen import sccutils from pegen import sccutils
from pegen.grammar import ( from pegen.grammar import (
@ -44,7 +32,7 @@
class RuleCollectorVisitor(GrammarVisitor): class RuleCollectorVisitor(GrammarVisitor):
"""Visitor that invokes a provided callmaker visitor with just the NamedItem nodes""" """Visitor that invokes a provided callmaker visitor with just the NamedItem nodes"""
def __init__(self, rules: Dict[str, Rule], callmakervisitor: GrammarVisitor) -> None: def __init__(self, rules: dict[str, Rule], callmakervisitor: GrammarVisitor) -> None:
self.rules = rules self.rules = rules
self.callmaker = callmakervisitor self.callmaker = callmakervisitor
@ -58,7 +46,7 @@ def visit_NamedItem(self, item: NamedItem) -> None:
class KeywordCollectorVisitor(GrammarVisitor): class KeywordCollectorVisitor(GrammarVisitor):
"""Visitor that collects all the keywords and soft keywords in the Grammar""" """Visitor that collects all the keywords and soft keywords in the Grammar"""
def __init__(self, gen: "ParserGenerator", keywords: Dict[str, int], soft_keywords: Set[str]): def __init__(self, gen: "ParserGenerator", keywords: dict[str, int], soft_keywords: set[str]):
self.generator = gen self.generator = gen
self.keywords = keywords self.keywords = keywords
self.soft_keywords = soft_keywords self.soft_keywords = soft_keywords
@ -73,7 +61,7 @@ def visit_StringLeaf(self, node: StringLeaf) -> None:
class RuleCheckingVisitor(GrammarVisitor): class RuleCheckingVisitor(GrammarVisitor):
def __init__(self, rules: Dict[str, Rule], tokens: Set[str]): def __init__(self, rules: dict[str, Rule], tokens: set[str]):
self.rules = rules self.rules = rules
self.tokens = tokens self.tokens = tokens
# If python < 3.12 add the virtual fstring tokens # If python < 3.12 add the virtual fstring tokens
@ -100,11 +88,11 @@ def visit_NamedItem(self, node: NamedItem) -> None:
class ParserGenerator: class ParserGenerator:
callmakervisitor: GrammarVisitor callmakervisitor: GrammarVisitor
def __init__(self, grammar: Grammar, tokens: Set[str], file: Optional[IO[Text]]): def __init__(self, grammar: Grammar, tokens: set[str], file: IO[str] | None):
self.grammar = grammar self.grammar = grammar
self.tokens = tokens self.tokens = tokens
self.keywords: Dict[str, int] = {} self.keywords: dict[str, int] = {}
self.soft_keywords: Set[str] = set() self.soft_keywords: set[str] = set()
self.rules = grammar.rules self.rules = grammar.rules
self.validate_rule_names() self.validate_rule_names()
if "trailer" not in grammar.metas and "start" not in self.rules: if "trailer" not in grammar.metas and "start" not in self.rules:
@ -117,8 +105,8 @@ def __init__(self, grammar: Grammar, tokens: Set[str], file: Optional[IO[Text]])
self.first_graph, self.first_sccs = compute_left_recursives(self.rules) self.first_graph, self.first_sccs = compute_left_recursives(self.rules)
self.counter = 0 # For name_rule()/name_loop() self.counter = 0 # For name_rule()/name_loop()
self.keyword_counter = 499 # For keyword_type() self.keyword_counter = 499 # For keyword_type()
self.all_rules: Dict[str, Rule] = self.rules.copy() # Rules + temporal rules self.all_rules: dict[str, Rule] = self.rules.copy() # Rules + temporal rules
self._local_variable_stack: List[List[str]] = [] self._local_variable_stack: list[list[str]] = []
def validate_rule_names(self) -> None: def validate_rule_names(self) -> None:
for rule in self.rules: for rule in self.rules:
@ -132,7 +120,7 @@ def local_variable_context(self) -> Iterator[None]:
self._local_variable_stack.pop() self._local_variable_stack.pop()
@property @property
def local_variable_names(self) -> List[str]: def local_variable_names(self) -> list[str]:
return self._local_variable_stack[-1] return self._local_variable_stack[-1]
@abstractmethod @abstractmethod
@ -164,7 +152,7 @@ def collect_rules(self) -> None:
keyword_collector.visit(rule) keyword_collector.visit(rule)
rule_collector = RuleCollectorVisitor(self.rules, self.callmakervisitor) rule_collector = RuleCollectorVisitor(self.rules, self.callmakervisitor)
done: Set[str] = set() done: set[str] = set()
while True: while True:
computed_rules = list(self.all_rules) computed_rules = list(self.all_rules)
todo = [i for i in computed_rules if i not in done] todo = [i for i in computed_rules if i not in done]
@ -229,10 +217,10 @@ def dedupe(self, name: str) -> str:
class NullableVisitor(GrammarVisitor): class NullableVisitor(GrammarVisitor):
def __init__(self, rules: Dict[str, Rule]) -> None: def __init__(self, rules: dict[str, Rule]) -> None:
self.rules = rules self.rules = rules
self.visited: Set[Any] = set() self.visited: set[Any] = set()
self.nullables: Set[Union[Rule, NamedItem]] = set() self.nullables: set[Rule | NamedItem] = set()
def visit_Rule(self, rule: Rule) -> bool: def visit_Rule(self, rule: Rule) -> bool:
if rule in self.visited: if rule in self.visited:
@ -294,7 +282,7 @@ def visit_StringLeaf(self, node: StringLeaf) -> bool:
return not node.value return not node.value
def compute_nullables(rules: Dict[str, Rule]) -> Set[Any]: def compute_nullables(rules: dict[str, Rule]) -> set[Any]:
"""Compute which rules in a grammar are nullable. """Compute which rules in a grammar are nullable.
Thanks to TatSu (tatsu/leftrec.py) for inspiration. Thanks to TatSu (tatsu/leftrec.py) for inspiration.
@ -306,12 +294,12 @@ def compute_nullables(rules: Dict[str, Rule]) -> Set[Any]:
class InitialNamesVisitor(GrammarVisitor): class InitialNamesVisitor(GrammarVisitor):
def __init__(self, rules: Dict[str, Rule]) -> None: def __init__(self, rules: dict[str, Rule]) -> None:
self.rules = rules self.rules = rules
self.nullables = compute_nullables(rules) self.nullables = compute_nullables(rules)
def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Set[Any]: def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> set[Any]:
names: Set[str] = set() names: set[str] = set()
for value in node: for value in node:
if isinstance(value, list): if isinstance(value, list):
for item in value: for item in value:
@ -320,33 +308,33 @@ def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Set[A
names |= self.visit(value, *args, **kwargs) names |= self.visit(value, *args, **kwargs)
return names return names
def visit_Alt(self, alt: Alt) -> Set[Any]: def visit_Alt(self, alt: Alt) -> set[Any]:
names: Set[str] = set() names: set[str] = set()
for item in alt.items: for item in alt.items:
names |= self.visit(item) names |= self.visit(item)
if item not in self.nullables: if item not in self.nullables:
break break
return names return names
def visit_Forced(self, force: Forced) -> Set[Any]: def visit_Forced(self, force: Forced) -> set[Any]:
return set() return set()
def visit_LookAhead(self, lookahead: Lookahead) -> Set[Any]: def visit_LookAhead(self, lookahead: Lookahead) -> set[Any]:
return set() return set()
def visit_Cut(self, cut: Cut) -> Set[Any]: def visit_Cut(self, cut: Cut) -> set[Any]:
return set() return set()
def visit_NameLeaf(self, node: NameLeaf) -> Set[Any]: def visit_NameLeaf(self, node: NameLeaf) -> set[Any]:
return {node.value} return {node.value}
def visit_StringLeaf(self, node: StringLeaf) -> Set[Any]: def visit_StringLeaf(self, node: StringLeaf) -> set[Any]:
return set() return set()
def compute_left_recursives( def compute_left_recursives(
rules: Dict[str, Rule] rules: dict[str, Rule]
) -> Tuple[Dict[str, AbstractSet[str]], List[AbstractSet[str]]]: ) -> tuple[dict[str, Set[str]], list[Set[str]]]:
graph = make_first_graph(rules) graph = make_first_graph(rules)
sccs = list(sccutils.strongly_connected_components(graph.keys(), graph)) sccs = list(sccutils.strongly_connected_components(graph.keys(), graph))
for scc in sccs: for scc in sccs:
@ -374,7 +362,7 @@ def compute_left_recursives(
return graph, sccs return graph, sccs
def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]: def make_first_graph(rules: dict[str, Rule]) -> dict[str, Set[str]]:
"""Compute the graph of left-invocations. """Compute the graph of left-invocations.
There's an edge from A to B if A may invoke B at its initial There's an edge from A to B if A may invoke B at its initial
@ -384,7 +372,7 @@ def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]:
""" """
initial_name_visitor = InitialNamesVisitor(rules) initial_name_visitor = InitialNamesVisitor(rules)
graph = {} graph = {}
vertices: Set[str] = set() vertices: set[str] = set()
for rulename, rhs in rules.items(): for rulename, rhs in rules.items():
graph[rulename] = names = initial_name_visitor.visit(rhs) graph[rulename] = names = initial_name_visitor.visit(rhs)
vertices |= names vertices |= names

View file

@ -1,6 +1,7 @@
import os.path import os.path
import token import token
from typing import IO, Any, Callable, Dict, Optional, Sequence, Set, Text, Tuple from collections.abc import Callable, Sequence
from typing import IO, Any
from pegen import grammar from pegen import grammar
from pegen.grammar import ( from pegen.grammar import (
@ -74,10 +75,10 @@ def visit_NegativeLookahead(self, node: NegativeLookahead) -> bool:
def visit_Opt(self, node: Opt) -> bool: def visit_Opt(self, node: Opt) -> bool:
return self.visit(node.node) return self.visit(node.node)
def visit_Repeat(self, node: Repeat0) -> Tuple[str, str]: def visit_Repeat(self, node: Repeat0) -> tuple[str, str]:
return self.visit(node.node) return self.visit(node.node)
def visit_Gather(self, node: Gather) -> Tuple[str, str]: def visit_Gather(self, node: Gather) -> tuple[str, str]:
return self.visit(node.node) return self.visit(node.node)
def visit_Group(self, node: Group) -> bool: def visit_Group(self, node: Group) -> bool:
@ -93,9 +94,9 @@ def visit_Forced(self, node: Forced) -> bool:
class PythonCallMakerVisitor(GrammarVisitor): class PythonCallMakerVisitor(GrammarVisitor):
def __init__(self, parser_generator: ParserGenerator): def __init__(self, parser_generator: ParserGenerator):
self.gen = parser_generator self.gen = parser_generator
self.cache: Dict[str, Tuple[str, str]] = {} self.cache: dict[str, tuple[str, str]] = {}
def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]: def visit_NameLeaf(self, node: NameLeaf) -> tuple[str | None, str]:
name = node.value name = node.value
if name == "SOFT_KEYWORD": if name == "SOFT_KEYWORD":
return "soft_keyword", "self.soft_keyword()" return "soft_keyword", "self.soft_keyword()"
@ -108,31 +109,31 @@ def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]:
return "_" + name.lower(), f"self.expect({name!r})" return "_" + name.lower(), f"self.expect({name!r})"
return name, f"self.{name}()" return name, f"self.{name}()"
def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]: def visit_StringLeaf(self, node: StringLeaf) -> tuple[str, str]:
return "literal", f"self.expect({node.value})" return "literal", f"self.expect({node.value})"
def visit_NamedItem(self, node: NamedItem) -> Tuple[Optional[str], str]: def visit_NamedItem(self, node: NamedItem) -> tuple[str | None, str]:
name, call = self.visit(node.item) name, call = self.visit(node.item)
if node.name: if node.name:
name = node.name name = node.name
return name, call return name, call
def lookahead_call_helper(self, node: Lookahead) -> Tuple[str, str]: def lookahead_call_helper(self, node: Lookahead) -> tuple[str, str]:
name, call = self.visit(node.node) name, call = self.visit(node.node)
head, tail = call.split("(", 1) head, tail = call.split("(", 1)
assert tail[-1] == ")" assert tail[-1] == ")"
tail = tail[:-1] tail = tail[:-1]
return head, tail return head, tail
def visit_PositiveLookahead(self, node: PositiveLookahead) -> Tuple[None, str]: def visit_PositiveLookahead(self, node: PositiveLookahead) -> tuple[None, str]:
head, tail = self.lookahead_call_helper(node) head, tail = self.lookahead_call_helper(node)
return None, f"self.positive_lookahead({head}, {tail})" return None, f"self.positive_lookahead({head}, {tail})"
def visit_NegativeLookahead(self, node: NegativeLookahead) -> Tuple[None, str]: def visit_NegativeLookahead(self, node: NegativeLookahead) -> tuple[None, str]:
head, tail = self.lookahead_call_helper(node) head, tail = self.lookahead_call_helper(node)
return None, f"self.negative_lookahead({head}, {tail})" return None, f"self.negative_lookahead({head}, {tail})"
def visit_Opt(self, node: Opt) -> Tuple[str, str]: def visit_Opt(self, node: Opt) -> tuple[str, str]:
name, call = self.visit(node.node) name, call = self.visit(node.node)
# Note trailing comma (the call may already have one comma # Note trailing comma (the call may already have one comma
# at the end, for example when rules have both repeat0 and optional # at the end, for example when rules have both repeat0 and optional
@ -148,7 +149,7 @@ def _generate_artificial_rule_call(
prefix: str, prefix: str,
call_by_name_func: Callable[[str], str], call_by_name_func: Callable[[str], str],
rule_generation_func: Callable[[], str], rule_generation_func: Callable[[], str],
) -> Tuple[str, str]: ) -> tuple[str, str]:
node_str = f"{node}" node_str = f"{node}"
key = f"{prefix}_{node_str}" key = f"{prefix}_{node_str}"
if key in self.cache: if key in self.cache:
@ -159,7 +160,7 @@ def _generate_artificial_rule_call(
self.cache[key] = name, call self.cache[key] = name, call
return self.cache[key] return self.cache[key]
def visit_Rhs(self, node: Rhs) -> Tuple[str, str]: def visit_Rhs(self, node: Rhs) -> tuple[str, str]:
if len(node.alts) == 1 and len(node.alts[0].items) == 1: if len(node.alts) == 1 and len(node.alts[0].items) == 1:
return self.visit(node.alts[0].items[0]) return self.visit(node.alts[0].items[0])
@ -170,7 +171,7 @@ def visit_Rhs(self, node: Rhs) -> Tuple[str, str]:
lambda: self.gen.artificial_rule_from_rhs(node), lambda: self.gen.artificial_rule_from_rhs(node),
) )
def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]: def visit_Repeat0(self, node: Repeat0) -> tuple[str, str]:
return self._generate_artificial_rule_call( return self._generate_artificial_rule_call(
node, node,
"repeat0", "repeat0",
@ -178,7 +179,7 @@ def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]:
lambda: self.gen.artificial_rule_from_repeat(node.node, is_repeat1=False), lambda: self.gen.artificial_rule_from_repeat(node.node, is_repeat1=False),
) )
def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]: def visit_Repeat1(self, node: Repeat1) -> tuple[str, str]:
return self._generate_artificial_rule_call( return self._generate_artificial_rule_call(
node, node,
"repeat1", "repeat1",
@ -186,7 +187,7 @@ def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]:
lambda: self.gen.artificial_rule_from_repeat(node.node, is_repeat1=True), lambda: self.gen.artificial_rule_from_repeat(node.node, is_repeat1=True),
) )
def visit_Gather(self, node: Gather) -> Tuple[str, str]: def visit_Gather(self, node: Gather) -> tuple[str, str]:
return self._generate_artificial_rule_call( return self._generate_artificial_rule_call(
node, node,
"gather", "gather",
@ -194,13 +195,13 @@ def visit_Gather(self, node: Gather) -> Tuple[str, str]:
lambda: self.gen.artificial_rule_from_gather(node), lambda: self.gen.artificial_rule_from_gather(node),
) )
def visit_Group(self, node: Group) -> Tuple[Optional[str], str]: def visit_Group(self, node: Group) -> tuple[str | None, str]:
return self.visit(node.rhs) return self.visit(node.rhs)
def visit_Cut(self, node: Cut) -> Tuple[str, str]: def visit_Cut(self, node: Cut) -> tuple[str, str]:
return "cut", "True" return "cut", "True"
def visit_Forced(self, node: Forced) -> Tuple[str, str]: def visit_Forced(self, node: Forced) -> tuple[str, str]:
if isinstance(node.node, Group): if isinstance(node.node, Group):
_, val = self.visit(node.node.rhs) _, val = self.visit(node.node.rhs)
return "forced", f"self.expect_forced({val}, '''({node.node.rhs!s})''')" return "forced", f"self.expect_forced({val}, '''({node.node.rhs!s})''')"
@ -215,10 +216,10 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
def __init__( def __init__(
self, self,
grammar: grammar.Grammar, grammar: grammar.Grammar,
file: Optional[IO[Text]], file: IO[str] | None,
tokens: Set[str] = set(token.tok_name.values()), tokens: set[str] = set(token.tok_name.values()),
location_formatting: Optional[str] = None, location_formatting: str | None = None,
unreachable_formatting: Optional[str] = None, unreachable_formatting: str | None = None,
): ):
tokens.add("SOFT_KEYWORD") tokens.add("SOFT_KEYWORD")
super().__init__(grammar, tokens, file) super().__init__(grammar, tokens, file)
@ -355,7 +356,7 @@ def visit_Alt(self, node: Alt, is_loop: bool, is_gather: bool) -> None:
if is_loop: if is_loop:
self.print(f"children.append({action})") self.print(f"children.append({action})")
self.print(f"mark = self._mark()") self.print("mark = self._mark()")
else: else:
if "UNREACHABLE" in action: if "UNREACHABLE" in action:
action = action.replace("UNREACHABLE", self.unreachable_formatting) action = action.replace("UNREACHABLE", self.unreachable_formatting)

View file

@ -1,11 +1,11 @@
# Adapted from mypy (mypy/build.py) under the MIT license. # Adapted from mypy (mypy/build.py) under the MIT license.
from typing import * from collections.abc import Iterable, Iterator, Set
def strongly_connected_components( def strongly_connected_components(
vertices: AbstractSet[str], edges: Dict[str, AbstractSet[str]] vertices: Set[str], edges: dict[str, Set[str]]
) -> Iterator[AbstractSet[str]]: ) -> Iterator[Set[str]]:
"""Compute Strongly Connected Components of a directed graph. """Compute Strongly Connected Components of a directed graph.
Args: Args:
@ -20,12 +20,12 @@ def strongly_connected_components(
From https://code.activestate.com/recipes/578507-strongly-connected-components-of-a-directed-graph/. From https://code.activestate.com/recipes/578507-strongly-connected-components-of-a-directed-graph/.
""" """
identified: Set[str] = set() identified: set[str] = set()
stack: List[str] = [] stack: list[str] = []
index: Dict[str, int] = {} index: dict[str, int] = {}
boundaries: List[int] = [] boundaries: list[int] = []
def dfs(v: str) -> Iterator[Set[str]]: def dfs(v: str) -> Iterator[set[str]]:
index[v] = len(stack) index[v] = len(stack)
stack.append(v) stack.append(v)
boundaries.append(index[v]) boundaries.append(index[v])
@ -50,8 +50,8 @@ def dfs(v: str) -> Iterator[Set[str]]:
def topsort( def topsort(
data: Dict[AbstractSet[str], Set[AbstractSet[str]]] data: dict[Set[str], set[Set[str]]]
) -> Iterable[AbstractSet[AbstractSet[str]]]: ) -> Iterable[Set[Set[str]]]:
"""Topological sort. """Topological sort.
Args: Args:
@ -94,12 +94,12 @@ def topsort(
break break
yield ready yield ready
data = {item: (dep - ready) for item, dep in data.items() if item not in ready} data = {item: (dep - ready) for item, dep in data.items() if item not in ready}
assert not data, "A cyclic dependency exists amongst %r" % data assert not data, f"A cyclic dependency exists amongst {data}"
def find_cycles_in_scc( def find_cycles_in_scc(
graph: Dict[str, AbstractSet[str]], scc: AbstractSet[str], start: str graph: dict[str, Set[str]], scc: Set[str], start: str
) -> Iterable[List[str]]: ) -> Iterable[list[str]]:
"""Find cycles in SCC emanating from start. """Find cycles in SCC emanating from start.
Yields lists of the form ['A', 'B', 'C', 'A'], which means there's Yields lists of the form ['A', 'B', 'C', 'A'], which means there's
@ -117,7 +117,7 @@ def find_cycles_in_scc(
assert start in graph assert start in graph
# Recursive helper that yields cycles. # Recursive helper that yields cycles.
def dfs(node: str, path: List[str]) -> Iterator[List[str]]: def dfs(node: str, path: list[str]) -> Iterator[list[str]]:
if node in path: if node in path:
yield path + [node] yield path + [node]
return return

View file

@ -6,7 +6,7 @@
import textwrap import textwrap
import token import token
import tokenize import tokenize
from typing import IO, Any, Dict, Final, Optional, Type, cast from typing import IO, Any, Final, cast
from pegen.build import compile_c_extension from pegen.build import compile_c_extension
from pegen.c_generator import CParserGenerator from pegen.c_generator import CParserGenerator
@ -23,19 +23,19 @@
} }
def generate_parser(grammar: Grammar) -> Type[Parser]: def generate_parser(grammar: Grammar) -> type[Parser]:
# Generate a parser. # Generate a parser.
out = io.StringIO() out = io.StringIO()
genr = PythonParserGenerator(grammar, out) genr = PythonParserGenerator(grammar, out)
genr.generate("<string>") genr.generate("<string>")
# Load the generated parser class. # Load the generated parser class.
ns: Dict[str, Any] = {} ns: dict[str, Any] = {}
exec(out.getvalue(), ns) exec(out.getvalue(), ns)
return ns["GeneratedParser"] return ns["GeneratedParser"]
def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = False) -> Any: def run_parser(file: IO[bytes], parser_class: type[Parser], *, verbose: bool = False) -> Any:
# Run a parser on a file (stream). # Run a parser on a file (stream).
tokenizer = Tokenizer(tokenize.generate_tokens(file.readline)) # type: ignore[arg-type] # typeshed issue #3515 tokenizer = Tokenizer(tokenize.generate_tokens(file.readline)) # type: ignore[arg-type] # typeshed issue #3515
parser = parser_class(tokenizer, verbose=verbose) parser = parser_class(tokenizer, verbose=verbose)
@ -46,7 +46,7 @@ def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = F
def parse_string( def parse_string(
source: str, parser_class: Type[Parser], *, dedent: bool = True, verbose: bool = False source: str, parser_class: type[Parser], *, dedent: bool = True, verbose: bool = False
) -> Any: ) -> Any:
# Run the parser on a string. # Run the parser on a string.
if dedent: if dedent:
@ -55,7 +55,7 @@ def parse_string(
return run_parser(file, parser_class, verbose=verbose) # type: ignore[arg-type] # typeshed issue #3515 return run_parser(file, parser_class, verbose=verbose) # type: ignore[arg-type] # typeshed issue #3515
def make_parser(source: str) -> Type[Parser]: def make_parser(source: str) -> type[Parser]:
# Combine parse_string() and generate_parser(). # Combine parse_string() and generate_parser().
grammar = parse_string(source, GrammarParser) grammar = parse_string(source, GrammarParser)
return generate_parser(grammar) return generate_parser(grammar)
@ -86,7 +86,7 @@ def generate_parser_c_extension(
grammar: Grammar, grammar: Grammar,
path: pathlib.PurePath, path: pathlib.PurePath,
debug: bool = False, debug: bool = False,
library_dir: Optional[str] = None, library_dir: str | None = None,
) -> Any: ) -> Any:
"""Generate a parser c extension for the given grammar in the given path """Generate a parser c extension for the given grammar in the given path

View file

@ -1,6 +1,6 @@
import token import token
import tokenize import tokenize
from typing import Dict, Iterator, List from collections.abc import Iterator
Mark = int # NewType('Mark', int) Mark = int # NewType('Mark', int)
@ -8,7 +8,11 @@
def shorttok(tok: tokenize.TokenInfo) -> str: def shorttok(tok: tokenize.TokenInfo) -> str:
return "%-25.25s" % f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}" formatted = (
f"{tok.start[0]}.{tok.start[1]}: "
f"{token.tok_name[tok.type]}:{tok.string!r}"
)
return f"{formatted:<25.25}"
class Tokenizer: class Tokenizer:
@ -17,7 +21,7 @@ class Tokenizer:
This is pretty tied to Python's syntax. This is pretty tied to Python's syntax.
""" """
_tokens: List[tokenize.TokenInfo] _tokens: list[tokenize.TokenInfo]
def __init__( def __init__(
self, tokengen: Iterator[tokenize.TokenInfo], *, path: str = "", verbose: bool = False self, tokengen: Iterator[tokenize.TokenInfo], *, path: str = "", verbose: bool = False
@ -26,7 +30,7 @@ def __init__(
self._tokens = [] self._tokens = []
self._index = 0 self._index = 0
self._verbose = verbose self._verbose = verbose
self._lines: Dict[int, str] = {} self._lines: dict[int, str] = {}
self._path = path self._path = path
if verbose: if verbose:
self.report(False, False) self.report(False, False)
@ -72,7 +76,7 @@ def get_last_non_whitespace_token(self) -> tokenize.TokenInfo:
break break
return tok return tok
def get_lines(self, line_numbers: List[int]) -> List[str]: def get_lines(self, line_numbers: list[int]) -> list[str]:
"""Retrieve source lines corresponding to line numbers.""" """Retrieve source lines corresponding to line numbers."""
if self._lines: if self._lines:
lines = self._lines lines = self._lines

View file

@ -1,5 +1,3 @@
from typing import Optional
from pegen import grammar from pegen import grammar
from pegen.grammar import Alt, GrammarVisitor, Rhs, Rule from pegen.grammar import Alt, GrammarVisitor, Rhs, Rule
@ -11,7 +9,7 @@ class ValidationError(Exception):
class GrammarValidator(GrammarVisitor): class GrammarValidator(GrammarVisitor):
def __init__(self, grammar: grammar.Grammar) -> None: def __init__(self, grammar: grammar.Grammar) -> None:
self.grammar = grammar self.grammar = grammar
self.rulename: Optional[str] = None self.rulename: str | None = None
def validate_rule(self, rulename: str, node: Rule) -> None: def validate_rule(self, rulename: str, node: Rule) -> None:
self.rulename = rulename self.rulename = rulename