Refactor scripts in Tools/peg_generator/scripts (GH-20401)

(cherry picked from commit ba6fd87e41) Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com>
2025-10-31 13:41:24 +00:00 · 2020-06-05 21:41:12 -07:00 · 2020-06-05 21:41:12 -07:00 · 18f1226884
commit 18f1226884
parent d5e7348e41
7 changed files with 146 additions and 147 deletions
--- a/Modules/_peg_parser.c
+++ b/Modules/_peg_parser.c
@ -80,14 +80,15 @@ _Py_compile_string(PyObject *self, PyObject *args, PyObject *kwds)
 PyObject *
 _Py_parse_string(PyObject *self, PyObject *args, PyObject *kwds)
 {
-    static char *keywords[] = {"string", "filename", "mode", "oldparser", NULL};
+    static char *keywords[] = {"string", "filename", "mode", "oldparser", "ast", NULL};
    char *the_string;
    char *filename = "<string>";
    char *mode_str = "exec";
    int oldparser = 0;
+    int ast = 1;

-    if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|ssp", keywords,
-            &the_string, &filename, &mode_str, &oldparser)) {
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|sspp", keywords,
+            &the_string, &filename, &mode_str, &oldparser, &ast)) {
        return NULL;
    }

@ -110,7 +111,14 @@ _Py_parse_string(PyObject *self, PyObject *args, PyObject *kwds)
        return NULL;
    }

-    PyObject *result = PyAST_mod2obj(mod);
+    PyObject *result;
+    if (ast) {
+        result = PyAST_mod2obj(mod);
+    }
+    else {
+        Py_INCREF(Py_None);
+        result = Py_None;
+    }
    PyArena_Free(arena);
    return result;
 }
--- a/Tools/peg_generator/Makefile
+++ b/Tools/peg_generator/Makefile
@ -70,23 +70,21 @@ stats: peg_extension/parse.c data/xxl.py
 time: time_compile

 time_compile: venv data/xxl.py
-	$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl compile
+	$(VENVPYTHON) scripts/benchmark.py --parser=new --target=xxl compile

 time_parse: venv data/xxl.py
-	$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl parse
+	$(VENVPYTHON) scripts/benchmark.py --parser=new --target=xxl parse

 time_old: time_old_compile

 time_old_compile: venv data/xxl.py
-	$(VENVPYTHON) scripts/benchmark.py --parser=cpython --target=xxl compile
+	$(VENVPYTHON) scripts/benchmark.py --parser=old --target=xxl compile

 time_old_parse: venv data/xxl.py
-	$(VENVPYTHON) scripts/benchmark.py --parser=cpython --target=xxl parse
+	$(VENVPYTHON) scripts/benchmark.py --parser=old --target=xxl parse

 time_peg_dir: venv
 	$(VENVPYTHON) scripts/test_parse_directory.py \
-		--grammar-file $(GRAMMAR) \
-		--tokens-file $(TOKENS) \
 		-d $(TESTDIR) \
 		$(TESTFLAGS) \
 		--exclude "*/failset/*" \
@ -95,12 +93,8 @@ time_peg_dir: venv

 time_stdlib: $(CPYTHON) venv
 	$(VENVPYTHON) scripts/test_parse_directory.py \
-		--grammar-file $(GRAMMAR) \
-		--tokens-file $(TOKENS) \
 		-d $(CPYTHON) \
 		$(TESTFLAGS) \
-		--exclude "*/test2to3/*" \
-		--exclude "*/test2to3/**/*" \
 		--exclude "*/bad*" \
 		--exclude "*/lib2to3/tests/data/*"

--- a/Tools/peg_generator/scripts/benchmark.py
+++ b/Tools/peg_generator/scripts/benchmark.py
@ -24,7 +24,7 @@
 argparser.add_argument(
    "--parser",
    action="store",
-    choices=["pegen", "cpython"],
+    choices=["new", "old"],
    default="pegen",
    help="Which parser to benchmark (default is pegen)",
 )
@ -40,7 +40,12 @@
 command_compile = subcommands.add_parser(
    "compile", help="Benchmark parsing and compiling to bytecode"
 )
-command_parse = subcommands.add_parser("parse", help="Benchmark parsing and generating an ast.AST")
+command_parse = subcommands.add_parser(
+    "parse", help="Benchmark parsing and generating an ast.AST"
+)
+command_notree = subcommands.add_parser(
+    "notree", help="Benchmark parsing and dumping the tree"
+)


 def benchmark(func):
@ -62,7 +67,7 @@ def wrapper(*args):

@benchmark
 def time_compile(source, parser):
-    if parser == "cpython":
+    if parser == "old":
        return _peg_parser.compile_string(
            source,
            oldparser=True,
@ -73,32 +78,40 @@ def time_compile(source, parser):

@benchmark
 def time_parse(source, parser):
-    if parser == "cpython":
+    if parser == "old":
        return _peg_parser.parse_string(source, oldparser=True)
    else:
        return _peg_parser.parse_string(source)


+@benchmark
+def time_notree(source, parser):
+    if parser == "old":
+        return _peg_parser.parse_string(source, oldparser=True, ast=False)
+    else:
+        return _peg_parser.parse_string(source, ast=False)
+
+
 def run_benchmark_xxl(subcommand, parser, source):
    if subcommand == "compile":
        time_compile(source, parser)
    elif subcommand == "parse":
        time_parse(source, parser)
+    elif subcommand == "notree":
+        time_notree(source, parser)


 def run_benchmark_stdlib(subcommand, parser):
+    modes = {"compile": 2, "parse": 1, "notree": 0}
    for _ in range(3):
        parse_directory(
            "../../Lib",
-            "../../Grammar/python.gram",
-            "../../Grammar/Tokens",
            verbose=False,
            excluded_files=["*/bad*", "*/lib2to3/tests/data/*",],
-            skip_actions=False,
            tree_arg=0,
            short=True,
-            mode=2 if subcommand == "compile" else 1,
-            parser=parser,
+            mode=modes[subcommand],
+            oldparser=(parser == "old"),
        )


--- a/Tools/peg_generator/scripts/grammar_grapher.py
+++ b/Tools/peg_generator/scripts/grammar_grapher.py
@ -42,6 +42,13 @@
 )

 argparser = argparse.ArgumentParser(prog="graph_grammar", description="Graph a grammar tree",)
+argparser.add_argument(
+    "-s",
+    "--start",
+    choices=["exec", "eval", "single"],
+    default="exec",
+    help="Choose the grammar's start rule (exec, eval or single)",
+)
 argparser.add_argument("grammar_file", help="The grammar file to graph")


@ -91,19 +98,15 @@ def main() -> None:
        references[name] = set(references_for_item(rule))

    # Flatten the start node if has only a single reference
-    root_node = "start"
-    if start := references["start"]:
-        if len(start) == 1:
-            root_node = list(start)[0]
-            del references["start"]
+    root_node = {"exec": "file", "eval": "eval", "single": "interactive"}[args.start]

    print("digraph g1 {")
    print('\toverlap="scale";')  # Force twopi to scale the graph to avoid overlaps
    print(f'\troot="{root_node}";')
-    print(f"\t{root_node} [color=green, shape=circle]")
+    print(f"\t{root_node} [color=green, shape=circle];")
    for name, refs in references.items():
-        if refs:  # Ignore empty sets
-            print(f"\t{name} -> {','.join(refs)};")
+        for ref in refs:
+            print(f"\t{name} -> {ref};")
    print("}")


--- a/Tools/peg_generator/scripts/show_parse.py
+++ b/Tools/peg_generator/scripts/show_parse.py
@ -41,7 +41,13 @@
 parser.add_argument(
    "-d", "--diff", action="store_true", help="show diff between grammar and ast (requires -g)"
 )
-parser.add_argument("-g", "--grammar-file", help="grammar to use (default: use the ast module)")
+parser.add_argument(
+    "-p",
+    "--parser",
+    choices=["new", "old"],
+    default="new",
+    help="choose the parser to use"
+)
 parser.add_argument(
    "-m",
    "--multiline",
@ -84,19 +90,18 @@ def print_parse(source: str, verbose: bool = False) -> None:

 def main() -> None:
    args = parser.parse_args()
-    if args.diff and not args.grammar_file:
-        parser.error("-d/--diff requires -g/--grammar-file")
+    new_parser = args.parser == "new"
    if args.multiline:
        sep = "\n"
    else:
        sep = " "
    program = sep.join(args.program)
-    if args.grammar_file:
+    if new_parser:
        tree = _peg_parser.parse_string(program)

        if args.diff:
-            a = tree
-            b = _peg_parser.parse_string(program, oldparser=True)
+            a = _peg_parser.parse_string(program, oldparser=True)
+            b = tree
            diff = diff_trees(a, b, args.verbose)
            if diff:
                for line in diff:
@ -104,11 +109,11 @@ def main() -> None:
            else:
                print("# Trees are the same")
        else:
-            print(f"# Parsed using {args.grammar_file}")
+            print("# Parsed using the new parser")
            print(format_tree(tree, args.verbose))
    else:
        tree = _peg_parser.parse_string(program, oldparser=True)
-        print("# Parse using the old parser")
+        print("# Parsed using the old parser")
        print(format_tree(tree, args.verbose))


--- a/Tools/peg_generator/scripts/test_parse_directory.py
+++ b/Tools/peg_generator/scripts/test_parse_directory.py
@ -11,7 +11,7 @@
 from glob import glob
 from pathlib import PurePath

-from typing import List, Optional, Any
+from typing import List, Optional, Any, Tuple

 sys.path.insert(0, os.getcwd())
 from pegen.ast_dump import ast_dump
@ -22,13 +22,15 @@
 FAIL = "\033[91m"
 ENDC = "\033[0m"

+COMPILE = 2
+PARSE = 1
+NOTREE = 0
+
 argparser = argparse.ArgumentParser(
    prog="test_parse_directory",
    description="Helper program to test directories or files for pegen",
 )
 argparser.add_argument("-d", "--directory", help="Directory path containing files to test")
-argparser.add_argument("--grammar-file", help="Grammar file path")
-argparser.add_argument("--tokens-file", help="Tokens file path")
 argparser.add_argument(
    "-e", "--exclude", action="append", default=[], help="Glob(s) for matching files to exclude"
 )
@ -38,9 +40,6 @@
 argparser.add_argument(
    "-v", "--verbose", action="store_true", help="Display detailed errors for failures"
 )
-argparser.add_argument(
-    "--skip-actions", action="store_true", help="Suppress code emission for rule actions",
-)
 argparser.add_argument(
    "-t", "--tree", action="count", help="Compare parse tree to official AST", default=0
 )
@ -113,92 +112,35 @@ def compare_trees(
    return 1


-def parse_directory(
-    directory: str,
-    grammar_file: str,
-    tokens_file: str,
-    verbose: bool,
-    excluded_files: List[str],
-    skip_actions: bool,
-    tree_arg: int,
-    short: bool,
-    mode: int,
-    parser: str,
-) -> int:
-    if parser == "cpython" and (tree_arg or mode == 0):
-        print("Cannot specify tree argument or mode=0 with the cpython parser.", file=sys.stderr)
-        return 1
-
-    if not directory:
-        print("You must specify a directory of files to test.", file=sys.stderr)
-        return 1
-
-    if grammar_file and tokens_file:
-        if not os.path.exists(grammar_file):
-            print(f"The specified grammar file, {grammar_file}, does not exist.", file=sys.stderr)
-            return 1
-    else:
-        print(
-            "A grammar file or a tokens file was not provided - attempting to use existing parser from stdlib...\n"
+def parse_file(source: str, file: str, mode: int, oldparser: bool) -> Tuple[Any, float]:
+    t0 = time.time()
+    if mode == COMPILE:
+        result = _peg_parser.compile_string(
+            source,
+            filename=file,
+            oldparser=oldparser,
+        )
+    else:
+        result = _peg_parser.parse_string(
+            source,
+            filename=file,
+            oldparser=oldparser,
+            ast=(mode == PARSE),
        )
-
-    if tree_arg:
-        assert mode == 1, "Mode should be 1 (parse), when comparing the generated trees"
-
-    # For a given directory, traverse files and attempt to parse each one
-    # - Output success/failure for each file
-    errors = 0
-    files = []
-    trees = {}  # Trees to compare (after everything else is done)
-    total_seconds = 0
-
-    for file in sorted(glob(f"{directory}/**/*.py", recursive=True)):
-        # Only attempt to parse Python files and files that are not excluded
-        should_exclude_file = False
-        for pattern in excluded_files:
-            if PurePath(file).match(pattern):
-                should_exclude_file = True
-                break
-
-        if not should_exclude_file:
-            with tokenize.open(file) as f:
-                source = f.read()
-            try:
-                t0 = time.time()
-                if mode == 2:
-                    result = _peg_parser.compile_string(
-                        source,
-                        filename=file,
-                        oldparser=parser == "cpython",
-                    )
-                else:
-                    result = _peg_parser.parse_string(
-                        source,
-                        filename=file,
-                        oldparser=parser == "cpython"
-                    )
-                t1 = time.time()
-                total_seconds += (t1 - t0)
-                if tree_arg:
-                    trees[file] = result
-                if not short:
-                    report_status(succeeded=True, file=file, verbose=verbose)
-            except Exception as error:
-                try:
-                    _peg_parser.parse_string(source, mode="exec", oldparser=True)
-                except Exception:
-                    if not short:
-                        print(f"File {file} cannot be parsed by either pegen or the ast module.")
-                else:
-                    report_status(
-                        succeeded=False, file=file, verbose=verbose, error=error, short=short
-                    )
-                    errors += 1
-            files.append(file)
    t1 = time.time()
+    return result, t1 - t0

+
+def is_parsing_failure(source: str) -> bool:
+    try:
+        _peg_parser.parse_string(source, mode="exec", oldparser=True)
+    except SyntaxError:
+        return False
+    return True
+
+
+def generate_time_stats(files, total_seconds) -> None:
    total_files = len(files)
-
    total_bytes = 0
    total_lines = 0
    for file in files:
@ -217,6 +159,57 @@ def parse_directory(
            f"or {total_bytes / total_seconds :,.0f} bytes/sec.",
        )

+
+def parse_directory(
+    directory: str,
+    verbose: bool,
+    excluded_files: List[str],
+    tree_arg: int,
+    short: bool,
+    mode: int,
+    oldparser: bool,
+) -> int:
+    if tree_arg:
+        assert mode == PARSE, "Mode should be 1 (parse), when comparing the generated trees"
+
+    if oldparser and tree_arg:
+        print("Cannot specify tree argument with the cpython parser.", file=sys.stderr)
+        return 1
+
+    # For a given directory, traverse files and attempt to parse each one
+    # - Output success/failure for each file
+    errors = 0
+    files = []
+    trees = {}  # Trees to compare (after everything else is done)
+    total_seconds = 0
+
+    for file in sorted(glob(f"{directory}/**/*.py", recursive=True)):
+        # Only attempt to parse Python files and files that are not excluded
+        if any(PurePath(file).match(pattern) for pattern in excluded_files):
+            continue
+
+        with tokenize.open(file) as f:
+            source = f.read()
+
+        try:
+            result, dt = parse_file(source, file, mode, oldparser)
+            total_seconds += dt
+            if tree_arg:
+                trees[file] = result
+            report_status(succeeded=True, file=file, verbose=verbose, short=short)
+        except SyntaxError as error:
+            if is_parsing_failure(source):
+                print(f"File {file} cannot be parsed by either parser.")
+            else:
+                report_status(
+                    succeeded=False, file=file, verbose=verbose, error=error, short=short
+                )
+                errors += 1
+        files.append(file)
+
+    t1 = time.time()
+
+    generate_time_stats(files, total_seconds)
    if short:
        print_memstats()

@ -240,26 +233,20 @@ def parse_directory(
 def main() -> None:
    args = argparser.parse_args()
    directory = args.directory
-    grammar_file = args.grammar_file
-    tokens_file = args.tokens_file
    verbose = args.verbose
    excluded_files = args.exclude
-    skip_actions = args.skip_actions
    tree = args.tree
    short = args.short
    mode = 1 if args.tree else 2
    sys.exit(
        parse_directory(
            directory,
-            grammar_file,
-            tokens_file,
            verbose,
            excluded_files,
-            skip_actions,
            tree,
            short,
            mode,
-            "pegen",
+            oldparser=False,
        )
    )

--- a/Tools/peg_generator/scripts/test_pypi_packages.py
+++ b/Tools/peg_generator/scripts/test_pypi_packages.py
@ -57,22 +57,11 @@ def find_dirname(package_name: str) -> str:
 def run_tests(dirname: str, tree: int) -> int:
    return test_parse_directory.parse_directory(
        dirname,
-        HERE / ".." / ".." / ".." / "Grammar" / "python.gram",
-        HERE / ".." / ".." / ".." / "Grammar" / "Tokens",
        verbose=False,
-        excluded_files=[
-            "*/failset/*",
-            "*/failset/**",
-            "*/failset/**/*",
-            "*/test2to3/*",
-            "*/test2to3/**/*",
-            "*/bad*",
-            "*/lib2to3/tests/data/*",
-        ],
-        skip_actions=False,
+        excluded_files=[],
        tree_arg=tree,
        short=True,
-        mode=1,
+        mode=1 if tree else 0,
        parser="pegen",
    )