mirror of
				https://github.com/python/cpython.git
				synced 2025-10-30 21:21:22 +00:00 
			
		
		
		
	bpo-40939: Clean and adapt the peg_generator directory after deleting the old parser (GH-20822)
This commit is contained in:
		
							parent
							
								
									b4282dd150
								
							
						
					
					
						commit
						756180b4bf
					
				
					 6 changed files with 30 additions and 304 deletions
				
			
		|  | @ -22,7 +22,7 @@ data/xxl.py: | |||
| 
 | ||||
| build: peg_extension/parse.c | ||||
| 
 | ||||
| peg_extension/parse.c: $(GRAMMAR) $(TOKENS) pegen/*.py peg_extension/peg_extension.c ../../Parser/pegen/pegen.c ../../Parser/pegen/parse_string.c ../../Parser/pegen/*.h pegen/grammar_parser.py | ||||
| peg_extension/parse.c: $(GRAMMAR) $(TOKENS) pegen/*.py peg_extension/peg_extension.c ../../Parser/pegen.c ../../Parser/string_parser.c ../../Parser/*.h pegen/grammar_parser.py | ||||
| 	$(PYTHON) -m pegen -q c $(GRAMMAR) $(TOKENS) -o peg_extension/parse.c --compile-extension | ||||
| 
 | ||||
| clean: | ||||
|  | @ -70,18 +70,10 @@ stats: peg_extension/parse.c data/xxl.py | |||
| time: time_compile | ||||
| 
 | ||||
| time_compile: venv data/xxl.py | ||||
| 	$(VENVPYTHON) scripts/benchmark.py --parser=new --target=xxl compile | ||||
| 	$(VENVPYTHON) scripts/benchmark.py --target=xxl compile | ||||
| 
 | ||||
| time_parse: venv data/xxl.py | ||||
| 	$(VENVPYTHON) scripts/benchmark.py --parser=new --target=xxl parse | ||||
| 
 | ||||
| time_old: time_old_compile | ||||
| 
 | ||||
| time_old_compile: venv data/xxl.py | ||||
| 	$(VENVPYTHON) scripts/benchmark.py --parser=old --target=xxl compile | ||||
| 
 | ||||
| time_old_parse: venv data/xxl.py | ||||
| 	$(VENVPYTHON) scripts/benchmark.py --parser=old --target=xxl parse | ||||
| 	$(VENVPYTHON) scripts/benchmark.py --target=xxl parse | ||||
| 
 | ||||
| time_peg_dir: venv | ||||
| 	$(VENVPYTHON) scripts/test_parse_directory.py \
 | ||||
|  |  | |||
|  | @ -41,9 +41,7 @@ def main(): | |||
|         "grammar", type=str, help="The file with the grammar definition in PEG format" | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         "tokens_file", | ||||
|         type=argparse.FileType("r"), | ||||
|         help="The file with the token definitions" | ||||
|         "tokens_file", type=argparse.FileType("r"), help="The file with the token definitions" | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         "keyword_file", | ||||
|  | @ -61,9 +59,7 @@ def main(): | |||
|     gen.collect_todo() | ||||
| 
 | ||||
|     with args.keyword_file as thefile: | ||||
|         all_keywords = sorted( | ||||
|             list(gen.callmakervisitor.keyword_cache.keys()) + EXTRA_KEYWORDS | ||||
|         ) | ||||
|         all_keywords = sorted(list(gen.callmakervisitor.keyword_cache.keys()) + EXTRA_KEYWORDS) | ||||
| 
 | ||||
|         keywords = ",\n    ".join(map(repr, all_keywords)) | ||||
|         thefile.write(TEMPLATE.format(keywords=keywords)) | ||||
|  |  | |||
|  | @ -6,13 +6,13 @@ | |||
| import os | ||||
| from time import time | ||||
| 
 | ||||
| import _peg_parser | ||||
| 
 | ||||
| try: | ||||
|     import memory_profiler | ||||
| except ModuleNotFoundError: | ||||
|     print("Please run `make venv` to create a virtual environment and install" | ||||
|           " all the dependencies, before running this script.") | ||||
|     print( | ||||
|         "Please run `make venv` to create a virtual environment and install" | ||||
|         " all the dependencies, before running this script." | ||||
|     ) | ||||
|     sys.exit(1) | ||||
| 
 | ||||
| sys.path.insert(0, os.getcwd()) | ||||
|  | @ -21,13 +21,6 @@ | |||
| argparser = argparse.ArgumentParser( | ||||
|     prog="benchmark", description="Reproduce the various pegen benchmarks" | ||||
| ) | ||||
| argparser.add_argument( | ||||
|     "--parser", | ||||
|     action="store", | ||||
|     choices=["new", "old"], | ||||
|     default="pegen", | ||||
|     help="Which parser to benchmark (default is pegen)", | ||||
| ) | ||||
| argparser.add_argument( | ||||
|     "--target", | ||||
|     action="store", | ||||
|  | @ -40,12 +33,7 @@ | |||
| command_compile = subcommands.add_parser( | ||||
|     "compile", help="Benchmark parsing and compiling to bytecode" | ||||
| ) | ||||
| command_parse = subcommands.add_parser( | ||||
|     "parse", help="Benchmark parsing and generating an ast.AST" | ||||
| ) | ||||
| command_notree = subcommands.add_parser( | ||||
|     "notree", help="Benchmark parsing and dumping the tree" | ||||
| ) | ||||
| command_parse = subcommands.add_parser("parse", help="Benchmark parsing and generating an ast.AST") | ||||
| 
 | ||||
| 
 | ||||
| def benchmark(func): | ||||
|  | @ -66,59 +54,37 @@ def wrapper(*args): | |||
| 
 | ||||
| 
 | ||||
| @benchmark | ||||
| def time_compile(source, parser): | ||||
|     if parser == "old": | ||||
|         return _peg_parser.compile_string( | ||||
|             source, | ||||
|             oldparser=True, | ||||
|         ) | ||||
|     else: | ||||
|         return _peg_parser.compile_string(source) | ||||
| def time_compile(source): | ||||
|     return compile(source, "<string>", "exec") | ||||
| 
 | ||||
| 
 | ||||
| @benchmark | ||||
| def time_parse(source, parser): | ||||
|     if parser == "old": | ||||
|         return _peg_parser.parse_string(source, oldparser=True) | ||||
|     else: | ||||
|         return _peg_parser.parse_string(source) | ||||
| def time_parse(source): | ||||
|     return ast.parse(source) | ||||
| 
 | ||||
| 
 | ||||
| @benchmark | ||||
| def time_notree(source, parser): | ||||
|     if parser == "old": | ||||
|         return _peg_parser.parse_string(source, oldparser=True, ast=False) | ||||
|     else: | ||||
|         return _peg_parser.parse_string(source, ast=False) | ||||
| 
 | ||||
| 
 | ||||
| def run_benchmark_xxl(subcommand, parser, source): | ||||
| def run_benchmark_xxl(subcommand, source): | ||||
|     if subcommand == "compile": | ||||
|         time_compile(source, parser) | ||||
|         time_compile(source) | ||||
|     elif subcommand == "parse": | ||||
|         time_parse(source, parser) | ||||
|     elif subcommand == "notree": | ||||
|         time_notree(source, parser) | ||||
|         time_parse(source) | ||||
| 
 | ||||
| 
 | ||||
| def run_benchmark_stdlib(subcommand, parser): | ||||
|     modes = {"compile": 2, "parse": 1, "notree": 0} | ||||
| def run_benchmark_stdlib(subcommand): | ||||
|     modes = {"compile": 2, "parse": 1} | ||||
|     for _ in range(3): | ||||
|         parse_directory( | ||||
|             "../../Lib", | ||||
|             verbose=False, | ||||
|             excluded_files=["*/bad*", "*/lib2to3/tests/data/*",], | ||||
|             tree_arg=0, | ||||
|             short=True, | ||||
|             mode=modes[subcommand], | ||||
|             oldparser=(parser == "old"), | ||||
|         ) | ||||
| 
 | ||||
| 
 | ||||
| def main(): | ||||
|     args = argparser.parse_args() | ||||
|     subcommand = args.subcommand | ||||
|     parser = args.parser | ||||
|     target = args.target | ||||
| 
 | ||||
|     if subcommand is None: | ||||
|  | @ -127,9 +93,9 @@ def main(): | |||
|     if target == "xxl": | ||||
|         with open(os.path.join("data", "xxl.py"), "r") as f: | ||||
|             source = f.read() | ||||
|             run_benchmark_xxl(subcommand, parser, source) | ||||
|             run_benchmark_xxl(subcommand, source) | ||||
|     elif target == "stdlib": | ||||
|         run_benchmark_stdlib(subcommand, parser) | ||||
|         run_benchmark_stdlib(subcommand) | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|  |  | |||
|  | @ -14,8 +14,7 @@ | |||
| Usage: python -m scripts.find_max_nesting | ||||
| """ | ||||
| import sys | ||||
| 
 | ||||
| from _peg_parser import parse_string | ||||
| import ast | ||||
| 
 | ||||
| GRAMMAR_FILE = "data/python.gram" | ||||
| INITIAL_NESTING_DEPTH = 10 | ||||
|  | @ -28,9 +27,8 @@ | |||
| 
 | ||||
| def check_nested_expr(nesting_depth: int) -> bool: | ||||
|     expr = f"{'(' * nesting_depth}0{')' * nesting_depth}" | ||||
| 
 | ||||
|     try: | ||||
|         parse_string(expr) | ||||
|         ast.parse(expr) | ||||
|         print(f"Nesting depth of {nesting_depth} is successful") | ||||
|         return True | ||||
|     except Exception as err: | ||||
|  |  | |||
|  | @ -1,121 +0,0 @@ | |||
| #!/usr/bin/env python3.8 | ||||
| 
 | ||||
| """Show the parse tree for a given program, nicely formatted. | ||||
| 
 | ||||
| Example: | ||||
| 
 | ||||
| $ scripts/show_parse.py a+b | ||||
| Module( | ||||
|     body=[ | ||||
|         Expr( | ||||
|             value=BinOp( | ||||
|                 left=Name(id="a", ctx=Load()), op=Add(), right=Name(id="b", ctx=Load()) | ||||
|             ) | ||||
|         ) | ||||
|     ], | ||||
|     type_ignores=[], | ||||
| ) | ||||
| $ | ||||
| 
 | ||||
| Use -v to show line numbers and column offsets. | ||||
| 
 | ||||
| The formatting is done using black.  You can also import this module | ||||
| and call one of its functions. | ||||
| """ | ||||
| 
 | ||||
| import argparse | ||||
| import ast | ||||
| import difflib | ||||
| import os | ||||
| import sys | ||||
| import tempfile | ||||
| 
 | ||||
| import _peg_parser | ||||
| 
 | ||||
| from typing import List | ||||
| 
 | ||||
| sys.path.insert(0, os.getcwd()) | ||||
| from pegen.ast_dump import ast_dump | ||||
| 
 | ||||
| parser = argparse.ArgumentParser() | ||||
| parser.add_argument( | ||||
|     "-d", "--diff", action="store_true", help="show diff between grammar and ast (requires -g)" | ||||
| ) | ||||
| parser.add_argument( | ||||
|     "-p", | ||||
|     "--parser", | ||||
|     choices=["new", "old"], | ||||
|     default="new", | ||||
|     help="choose the parser to use" | ||||
| ) | ||||
| parser.add_argument( | ||||
|     "-m", | ||||
|     "--multiline", | ||||
|     action="store_true", | ||||
|     help="concatenate program arguments using newline instead of space", | ||||
| ) | ||||
| parser.add_argument("-v", "--verbose", action="store_true", help="show line/column numbers") | ||||
| parser.add_argument("program", nargs="+", help="program to parse (will be concatenated)") | ||||
| 
 | ||||
| 
 | ||||
| def format_tree(tree: ast.AST, verbose: bool = False) -> str: | ||||
|     with tempfile.NamedTemporaryFile("w+") as tf: | ||||
|         tf.write(ast_dump(tree, include_attributes=verbose)) | ||||
|         tf.write("\n") | ||||
|         tf.flush() | ||||
|         cmd = f"black -q {tf.name}" | ||||
|         sts = os.system(cmd) | ||||
|         if sts: | ||||
|             raise RuntimeError(f"Command {cmd!r} failed with status 0x{sts:x}") | ||||
|         tf.seek(0) | ||||
|         return tf.read() | ||||
| 
 | ||||
| 
 | ||||
| def diff_trees(a: ast.AST, b: ast.AST, verbose: bool = False) -> List[str]: | ||||
|     sa = format_tree(a, verbose) | ||||
|     sb = format_tree(b, verbose) | ||||
|     la = sa.splitlines() | ||||
|     lb = sb.splitlines() | ||||
|     return list(difflib.unified_diff(la, lb, "a", "b", lineterm="")) | ||||
| 
 | ||||
| 
 | ||||
| def show_parse(source: str, verbose: bool = False) -> str: | ||||
|     tree = _peg_parser.parse_string(source, oldparser=True) | ||||
|     return format_tree(tree, verbose).rstrip("\n") | ||||
| 
 | ||||
| 
 | ||||
| def print_parse(source: str, verbose: bool = False) -> None: | ||||
|     print(show_parse(source, verbose)) | ||||
| 
 | ||||
| 
 | ||||
| def main() -> None: | ||||
|     args = parser.parse_args() | ||||
|     new_parser = args.parser == "new" | ||||
|     if args.multiline: | ||||
|         sep = "\n" | ||||
|     else: | ||||
|         sep = " " | ||||
|     program = sep.join(args.program) | ||||
|     if new_parser: | ||||
|         tree = _peg_parser.parse_string(program) | ||||
| 
 | ||||
|         if args.diff: | ||||
|             a = _peg_parser.parse_string(program, oldparser=True) | ||||
|             b = tree | ||||
|             diff = diff_trees(a, b, args.verbose) | ||||
|             if diff: | ||||
|                 for line in diff: | ||||
|                     print(line) | ||||
|             else: | ||||
|                 print("# Trees are the same") | ||||
|         else: | ||||
|             print("# Parsed using the new parser") | ||||
|             print(format_tree(tree, args.verbose)) | ||||
|     else: | ||||
|         tree = _peg_parser.parse_string(program, oldparser=True) | ||||
|         print("# Parsed using the old parser") | ||||
|         print(format_tree(tree, args.verbose)) | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     main() | ||||
|  | @ -7,7 +7,6 @@ | |||
| import time | ||||
| import traceback | ||||
| import tokenize | ||||
| import _peg_parser | ||||
| from glob import glob | ||||
| from pathlib import PurePath | ||||
| 
 | ||||
|  | @ -16,7 +15,6 @@ | |||
| sys.path.insert(0, os.getcwd()) | ||||
| from pegen.ast_dump import ast_dump | ||||
| from pegen.testutil import print_memstats | ||||
| from scripts import show_parse | ||||
| 
 | ||||
| SUCCESS = "\033[92m" | ||||
| FAIL = "\033[91m" | ||||
|  | @ -40,9 +38,6 @@ | |||
| argparser.add_argument( | ||||
|     "-v", "--verbose", action="store_true", help="Display detailed errors for failures" | ||||
| ) | ||||
| argparser.add_argument( | ||||
|     "-t", "--tree", action="count", help="Compare parse tree to official AST", default=0 | ||||
| ) | ||||
| 
 | ||||
| 
 | ||||
| def report_status( | ||||
|  | @ -79,66 +74,13 @@ def report_status( | |||
|             print(f"  {str(error.__class__.__name__)}: {error}") | ||||
| 
 | ||||
| 
 | ||||
| def compare_trees( | ||||
|     actual_tree: ast.AST, file: str, verbose: bool, include_attributes: bool = False, | ||||
| ) -> int: | ||||
|     with open(file) as f: | ||||
|         expected_tree = _peg_parser.parse_string(f.read(), oldparser=True) | ||||
| 
 | ||||
|     expected_text = ast_dump(expected_tree, include_attributes=include_attributes) | ||||
|     actual_text = ast_dump(actual_tree, include_attributes=include_attributes) | ||||
|     if actual_text == expected_text: | ||||
|         if verbose: | ||||
|             print("Tree for {file}:") | ||||
|             print(show_parse.format_tree(actual_tree, include_attributes)) | ||||
|         return 0 | ||||
| 
 | ||||
|     print(f"Diffing ASTs for {file} ...") | ||||
| 
 | ||||
|     expected = show_parse.format_tree(expected_tree, include_attributes) | ||||
|     actual = show_parse.format_tree(actual_tree, include_attributes) | ||||
| 
 | ||||
|     if verbose: | ||||
|         print("Expected for {file}:") | ||||
|         print(expected) | ||||
|         print("Actual for {file}:") | ||||
|         print(actual) | ||||
|         print(f"Diff for {file}:") | ||||
| 
 | ||||
|     diff = show_parse.diff_trees(expected_tree, actual_tree, include_attributes) | ||||
|     for line in diff: | ||||
|         print(line) | ||||
| 
 | ||||
|     return 1 | ||||
| 
 | ||||
| 
 | ||||
| def parse_file(source: str, file: str, mode: int, oldparser: bool) -> Tuple[Any, float]: | ||||
| def parse_file(source: str, file: str) -> Tuple[Any, float]: | ||||
|     t0 = time.time() | ||||
|     if mode == COMPILE: | ||||
|         result = _peg_parser.compile_string( | ||||
|             source, | ||||
|             filename=file, | ||||
|             oldparser=oldparser, | ||||
|         ) | ||||
|     else: | ||||
|         result = _peg_parser.parse_string( | ||||
|             source, | ||||
|             filename=file, | ||||
|             oldparser=oldparser, | ||||
|             ast=(mode == PARSE), | ||||
|         ) | ||||
|     result = ast.parse(source, filename=file) | ||||
|     t1 = time.time() | ||||
|     return result, t1 - t0 | ||||
| 
 | ||||
| 
 | ||||
| def is_parsing_failure(source: str) -> bool: | ||||
|     try: | ||||
|         _peg_parser.parse_string(source, mode="exec", oldparser=True) | ||||
|     except SyntaxError: | ||||
|         return False | ||||
|     return True | ||||
| 
 | ||||
| 
 | ||||
| def generate_time_stats(files, total_seconds) -> None: | ||||
|     total_files = len(files) | ||||
|     total_bytes = 0 | ||||
|  | @ -160,27 +102,11 @@ def generate_time_stats(files, total_seconds) -> None: | |||
|         ) | ||||
| 
 | ||||
| 
 | ||||
| def parse_directory( | ||||
|     directory: str, | ||||
|     verbose: bool, | ||||
|     excluded_files: List[str], | ||||
|     tree_arg: int, | ||||
|     short: bool, | ||||
|     mode: int, | ||||
|     oldparser: bool, | ||||
| ) -> int: | ||||
|     if tree_arg: | ||||
|         assert mode == PARSE, "Mode should be 1 (parse), when comparing the generated trees" | ||||
| 
 | ||||
|     if oldparser and tree_arg: | ||||
|         print("Cannot specify tree argument with the cpython parser.", file=sys.stderr) | ||||
|         return 1 | ||||
| 
 | ||||
| def parse_directory(directory: str, verbose: bool, excluded_files: List[str], short: bool) -> int: | ||||
|     # For a given directory, traverse files and attempt to parse each one | ||||
|     # - Output success/failure for each file | ||||
|     errors = 0 | ||||
|     files = [] | ||||
|     trees = {}  # Trees to compare (after everything else is done) | ||||
|     total_seconds = 0 | ||||
| 
 | ||||
|     for file in sorted(glob(f"{directory}/**/*.py", recursive=True)): | ||||
|  | @ -192,39 +118,20 @@ def parse_directory( | |||
|             source = f.read() | ||||
| 
 | ||||
|         try: | ||||
|             result, dt = parse_file(source, file, mode, oldparser) | ||||
|             result, dt = parse_file(source, file) | ||||
|             total_seconds += dt | ||||
|             if tree_arg: | ||||
|                 trees[file] = result | ||||
|             report_status(succeeded=True, file=file, verbose=verbose, short=short) | ||||
|         except SyntaxError as error: | ||||
|             if is_parsing_failure(source): | ||||
|                 print(f"File {file} cannot be parsed by either parser.") | ||||
|             else: | ||||
|                 report_status( | ||||
|                     succeeded=False, file=file, verbose=verbose, error=error, short=short | ||||
|                 ) | ||||
|                 errors += 1 | ||||
|             report_status(succeeded=False, file=file, verbose=verbose, error=error, short=short) | ||||
|             errors += 1 | ||||
|         files.append(file) | ||||
| 
 | ||||
|     t1 = time.time() | ||||
| 
 | ||||
|     generate_time_stats(files, total_seconds) | ||||
|     if short: | ||||
|         print_memstats() | ||||
| 
 | ||||
|     if errors: | ||||
|         print(f"Encountered {errors} failures.", file=sys.stderr) | ||||
| 
 | ||||
|     # Compare trees (the dict is empty unless -t is given) | ||||
|     compare_trees_errors = 0 | ||||
|     for file, tree in trees.items(): | ||||
|         if not short: | ||||
|             print("Comparing ASTs for", file) | ||||
|         if compare_trees(tree, file, verbose, tree_arg >= 2) == 1: | ||||
|             compare_trees_errors += 1 | ||||
| 
 | ||||
|     if errors or compare_trees_errors: | ||||
|         return 1 | ||||
| 
 | ||||
|     return 0 | ||||
|  | @ -235,20 +142,8 @@ def main() -> None: | |||
|     directory = args.directory | ||||
|     verbose = args.verbose | ||||
|     excluded_files = args.exclude | ||||
|     tree = args.tree | ||||
|     short = args.short | ||||
|     mode = 1 if args.tree else 2 | ||||
|     sys.exit( | ||||
|         parse_directory( | ||||
|             directory, | ||||
|             verbose, | ||||
|             excluded_files, | ||||
|             tree, | ||||
|             short, | ||||
|             mode, | ||||
|             oldparser=False, | ||||
|         ) | ||||
|     ) | ||||
|     sys.exit(parse_directory(directory, verbose, excluded_files, short)) | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Pablo Galindo
						Pablo Galindo