mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	Refactor scripts in Tools/peg_generator/scripts (GH-20401)
(cherry picked from commit ba6fd87e41)
Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com>
			
			
This commit is contained in:
		
							parent
							
								
									d5e7348e41
								
							
						
					
					
						commit
						18f1226884
					
				
					 7 changed files with 146 additions and 147 deletions
				
			
		|  | @ -80,14 +80,15 @@ _Py_compile_string(PyObject *self, PyObject *args, PyObject *kwds) | |||
| PyObject * | ||||
| _Py_parse_string(PyObject *self, PyObject *args, PyObject *kwds) | ||||
| { | ||||
|     static char *keywords[] = {"string", "filename", "mode", "oldparser", NULL}; | ||||
|     static char *keywords[] = {"string", "filename", "mode", "oldparser", "ast", NULL}; | ||||
|     char *the_string; | ||||
|     char *filename = "<string>"; | ||||
|     char *mode_str = "exec"; | ||||
|     int oldparser = 0; | ||||
|     int ast = 1; | ||||
| 
 | ||||
|     if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|ssp", keywords, | ||||
|             &the_string, &filename, &mode_str, &oldparser)) { | ||||
|     if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|sspp", keywords, | ||||
|             &the_string, &filename, &mode_str, &oldparser, &ast)) { | ||||
|         return NULL; | ||||
|     } | ||||
| 
 | ||||
|  | @ -110,7 +111,14 @@ _Py_parse_string(PyObject *self, PyObject *args, PyObject *kwds) | |||
|         return NULL; | ||||
|     } | ||||
| 
 | ||||
|     PyObject *result = PyAST_mod2obj(mod); | ||||
|     PyObject *result; | ||||
|     if (ast) { | ||||
|         result = PyAST_mod2obj(mod); | ||||
|     } | ||||
|     else { | ||||
|         Py_INCREF(Py_None); | ||||
|         result = Py_None; | ||||
|     } | ||||
|     PyArena_Free(arena); | ||||
|     return result; | ||||
| } | ||||
|  |  | |||
|  | @ -70,23 +70,21 @@ stats: peg_extension/parse.c data/xxl.py | |||
| time: time_compile | ||||
| 
 | ||||
| time_compile: venv data/xxl.py | ||||
| 	$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl compile | ||||
| 	$(VENVPYTHON) scripts/benchmark.py --parser=new --target=xxl compile | ||||
| 
 | ||||
| time_parse: venv data/xxl.py | ||||
| 	$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl parse | ||||
| 	$(VENVPYTHON) scripts/benchmark.py --parser=new --target=xxl parse | ||||
| 
 | ||||
| time_old: time_old_compile | ||||
| 
 | ||||
| time_old_compile: venv data/xxl.py | ||||
| 	$(VENVPYTHON) scripts/benchmark.py --parser=cpython --target=xxl compile | ||||
| 	$(VENVPYTHON) scripts/benchmark.py --parser=old --target=xxl compile | ||||
| 
 | ||||
| time_old_parse: venv data/xxl.py | ||||
| 	$(VENVPYTHON) scripts/benchmark.py --parser=cpython --target=xxl parse | ||||
| 	$(VENVPYTHON) scripts/benchmark.py --parser=old --target=xxl parse | ||||
| 
 | ||||
| time_peg_dir: venv | ||||
| 	$(VENVPYTHON) scripts/test_parse_directory.py \
 | ||||
| 		--grammar-file $(GRAMMAR) \
 | ||||
| 		--tokens-file $(TOKENS) \
 | ||||
| 		-d $(TESTDIR) \
 | ||||
| 		$(TESTFLAGS) \
 | ||||
| 		--exclude "*/failset/*" \
 | ||||
|  | @ -95,12 +93,8 @@ time_peg_dir: venv | |||
| 
 | ||||
| time_stdlib: $(CPYTHON) venv | ||||
| 	$(VENVPYTHON) scripts/test_parse_directory.py \
 | ||||
| 		--grammar-file $(GRAMMAR) \
 | ||||
| 		--tokens-file $(TOKENS) \
 | ||||
| 		-d $(CPYTHON) \
 | ||||
| 		$(TESTFLAGS) \
 | ||||
| 		--exclude "*/test2to3/*" \
 | ||||
| 		--exclude "*/test2to3/**/*" \
 | ||||
| 		--exclude "*/bad*" \
 | ||||
| 		--exclude "*/lib2to3/tests/data/*" | ||||
| 
 | ||||
|  |  | |||
|  | @ -24,7 +24,7 @@ | |||
| argparser.add_argument( | ||||
|     "--parser", | ||||
|     action="store", | ||||
|     choices=["pegen", "cpython"], | ||||
|     choices=["new", "old"], | ||||
|     default="pegen", | ||||
|     help="Which parser to benchmark (default is pegen)", | ||||
| ) | ||||
|  | @ -40,7 +40,12 @@ | |||
| command_compile = subcommands.add_parser( | ||||
|     "compile", help="Benchmark parsing and compiling to bytecode" | ||||
| ) | ||||
| command_parse = subcommands.add_parser("parse", help="Benchmark parsing and generating an ast.AST") | ||||
| command_parse = subcommands.add_parser( | ||||
|     "parse", help="Benchmark parsing and generating an ast.AST" | ||||
| ) | ||||
| command_notree = subcommands.add_parser( | ||||
|     "notree", help="Benchmark parsing and dumping the tree" | ||||
| ) | ||||
| 
 | ||||
| 
 | ||||
| def benchmark(func): | ||||
|  | @ -62,7 +67,7 @@ def wrapper(*args): | |||
| 
 | ||||
| @benchmark | ||||
| def time_compile(source, parser): | ||||
|     if parser == "cpython": | ||||
|     if parser == "old": | ||||
|         return _peg_parser.compile_string( | ||||
|             source, | ||||
|             oldparser=True, | ||||
|  | @ -73,32 +78,40 @@ def time_compile(source, parser): | |||
| 
 | ||||
| @benchmark | ||||
| def time_parse(source, parser): | ||||
|     if parser == "cpython": | ||||
|     if parser == "old": | ||||
|         return _peg_parser.parse_string(source, oldparser=True) | ||||
|     else: | ||||
|         return _peg_parser.parse_string(source) | ||||
| 
 | ||||
| 
 | ||||
| @benchmark | ||||
| def time_notree(source, parser): | ||||
|     if parser == "old": | ||||
|         return _peg_parser.parse_string(source, oldparser=True, ast=False) | ||||
|     else: | ||||
|         return _peg_parser.parse_string(source, ast=False) | ||||
| 
 | ||||
| 
 | ||||
| def run_benchmark_xxl(subcommand, parser, source): | ||||
|     if subcommand == "compile": | ||||
|         time_compile(source, parser) | ||||
|     elif subcommand == "parse": | ||||
|         time_parse(source, parser) | ||||
|     elif subcommand == "notree": | ||||
|         time_notree(source, parser) | ||||
| 
 | ||||
| 
 | ||||
| def run_benchmark_stdlib(subcommand, parser): | ||||
|     modes = {"compile": 2, "parse": 1, "notree": 0} | ||||
|     for _ in range(3): | ||||
|         parse_directory( | ||||
|             "../../Lib", | ||||
|             "../../Grammar/python.gram", | ||||
|             "../../Grammar/Tokens", | ||||
|             verbose=False, | ||||
|             excluded_files=["*/bad*", "*/lib2to3/tests/data/*",], | ||||
|             skip_actions=False, | ||||
|             tree_arg=0, | ||||
|             short=True, | ||||
|             mode=2 if subcommand == "compile" else 1, | ||||
|             parser=parser, | ||||
|             mode=modes[subcommand], | ||||
|             oldparser=(parser == "old"), | ||||
|         ) | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -42,6 +42,13 @@ | |||
| ) | ||||
| 
 | ||||
| argparser = argparse.ArgumentParser(prog="graph_grammar", description="Graph a grammar tree",) | ||||
| argparser.add_argument( | ||||
|     "-s", | ||||
|     "--start", | ||||
|     choices=["exec", "eval", "single"], | ||||
|     default="exec", | ||||
|     help="Choose the grammar's start rule (exec, eval or single)", | ||||
| ) | ||||
| argparser.add_argument("grammar_file", help="The grammar file to graph") | ||||
| 
 | ||||
| 
 | ||||
|  | @ -91,19 +98,15 @@ def main() -> None: | |||
|         references[name] = set(references_for_item(rule)) | ||||
| 
 | ||||
|     # Flatten the start node if has only a single reference | ||||
|     root_node = "start" | ||||
|     if start := references["start"]: | ||||
|         if len(start) == 1: | ||||
|             root_node = list(start)[0] | ||||
|             del references["start"] | ||||
|     root_node = {"exec": "file", "eval": "eval", "single": "interactive"}[args.start] | ||||
| 
 | ||||
|     print("digraph g1 {") | ||||
|     print('\toverlap="scale";')  # Force twopi to scale the graph to avoid overlaps | ||||
|     print(f'\troot="{root_node}";') | ||||
|     print(f"\t{root_node} [color=green, shape=circle]") | ||||
|     print(f"\t{root_node} [color=green, shape=circle];") | ||||
|     for name, refs in references.items(): | ||||
|         if refs:  # Ignore empty sets | ||||
|             print(f"\t{name} -> {','.join(refs)};") | ||||
|         for ref in refs: | ||||
|             print(f"\t{name} -> {ref};") | ||||
|     print("}") | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -41,7 +41,13 @@ | |||
| parser.add_argument( | ||||
|     "-d", "--diff", action="store_true", help="show diff between grammar and ast (requires -g)" | ||||
| ) | ||||
| parser.add_argument("-g", "--grammar-file", help="grammar to use (default: use the ast module)") | ||||
| parser.add_argument( | ||||
|     "-p", | ||||
|     "--parser", | ||||
|     choices=["new", "old"], | ||||
|     default="new", | ||||
|     help="choose the parser to use" | ||||
| ) | ||||
| parser.add_argument( | ||||
|     "-m", | ||||
|     "--multiline", | ||||
|  | @ -84,19 +90,18 @@ def print_parse(source: str, verbose: bool = False) -> None: | |||
| 
 | ||||
| def main() -> None: | ||||
|     args = parser.parse_args() | ||||
|     if args.diff and not args.grammar_file: | ||||
|         parser.error("-d/--diff requires -g/--grammar-file") | ||||
|     new_parser = args.parser == "new" | ||||
|     if args.multiline: | ||||
|         sep = "\n" | ||||
|     else: | ||||
|         sep = " " | ||||
|     program = sep.join(args.program) | ||||
|     if args.grammar_file: | ||||
|     if new_parser: | ||||
|         tree = _peg_parser.parse_string(program) | ||||
| 
 | ||||
|         if args.diff: | ||||
|             a = tree | ||||
|             b = _peg_parser.parse_string(program, oldparser=True) | ||||
|             a = _peg_parser.parse_string(program, oldparser=True) | ||||
|             b = tree | ||||
|             diff = diff_trees(a, b, args.verbose) | ||||
|             if diff: | ||||
|                 for line in diff: | ||||
|  | @ -104,11 +109,11 @@ def main() -> None: | |||
|             else: | ||||
|                 print("# Trees are the same") | ||||
|         else: | ||||
|             print(f"# Parsed using {args.grammar_file}") | ||||
|             print("# Parsed using the new parser") | ||||
|             print(format_tree(tree, args.verbose)) | ||||
|     else: | ||||
|         tree = _peg_parser.parse_string(program, oldparser=True) | ||||
|         print("# Parse using the old parser") | ||||
|         print("# Parsed using the old parser") | ||||
|         print(format_tree(tree, args.verbose)) | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -11,7 +11,7 @@ | |||
| from glob import glob | ||||
| from pathlib import PurePath | ||||
| 
 | ||||
| from typing import List, Optional, Any | ||||
| from typing import List, Optional, Any, Tuple | ||||
| 
 | ||||
| sys.path.insert(0, os.getcwd()) | ||||
| from pegen.ast_dump import ast_dump | ||||
|  | @ -22,13 +22,15 @@ | |||
| FAIL = "\033[91m" | ||||
| ENDC = "\033[0m" | ||||
| 
 | ||||
| COMPILE = 2 | ||||
| PARSE = 1 | ||||
| NOTREE = 0 | ||||
| 
 | ||||
| argparser = argparse.ArgumentParser( | ||||
|     prog="test_parse_directory", | ||||
|     description="Helper program to test directories or files for pegen", | ||||
| ) | ||||
| argparser.add_argument("-d", "--directory", help="Directory path containing files to test") | ||||
| argparser.add_argument("--grammar-file", help="Grammar file path") | ||||
| argparser.add_argument("--tokens-file", help="Tokens file path") | ||||
| argparser.add_argument( | ||||
|     "-e", "--exclude", action="append", default=[], help="Glob(s) for matching files to exclude" | ||||
| ) | ||||
|  | @ -38,9 +40,6 @@ | |||
| argparser.add_argument( | ||||
|     "-v", "--verbose", action="store_true", help="Display detailed errors for failures" | ||||
| ) | ||||
| argparser.add_argument( | ||||
|     "--skip-actions", action="store_true", help="Suppress code emission for rule actions", | ||||
| ) | ||||
| argparser.add_argument( | ||||
|     "-t", "--tree", action="count", help="Compare parse tree to official AST", default=0 | ||||
| ) | ||||
|  | @ -113,92 +112,35 @@ def compare_trees( | |||
|     return 1 | ||||
| 
 | ||||
| 
 | ||||
| def parse_directory( | ||||
|     directory: str, | ||||
|     grammar_file: str, | ||||
|     tokens_file: str, | ||||
|     verbose: bool, | ||||
|     excluded_files: List[str], | ||||
|     skip_actions: bool, | ||||
|     tree_arg: int, | ||||
|     short: bool, | ||||
|     mode: int, | ||||
|     parser: str, | ||||
| ) -> int: | ||||
|     if parser == "cpython" and (tree_arg or mode == 0): | ||||
|         print("Cannot specify tree argument or mode=0 with the cpython parser.", file=sys.stderr) | ||||
|         return 1 | ||||
| 
 | ||||
|     if not directory: | ||||
|         print("You must specify a directory of files to test.", file=sys.stderr) | ||||
|         return 1 | ||||
| 
 | ||||
|     if grammar_file and tokens_file: | ||||
|         if not os.path.exists(grammar_file): | ||||
|             print(f"The specified grammar file, {grammar_file}, does not exist.", file=sys.stderr) | ||||
|             return 1 | ||||
|     else: | ||||
|         print( | ||||
|             "A grammar file or a tokens file was not provided - attempting to use existing parser from stdlib...\n" | ||||
| def parse_file(source: str, file: str, mode: int, oldparser: bool) -> Tuple[Any, float]: | ||||
|     t0 = time.time() | ||||
|     if mode == COMPILE: | ||||
|         result = _peg_parser.compile_string( | ||||
|             source, | ||||
|             filename=file, | ||||
|             oldparser=oldparser, | ||||
|         ) | ||||
|     else: | ||||
|         result = _peg_parser.parse_string( | ||||
|             source, | ||||
|             filename=file, | ||||
|             oldparser=oldparser, | ||||
|             ast=(mode == PARSE), | ||||
|         ) | ||||
| 
 | ||||
|     if tree_arg: | ||||
|         assert mode == 1, "Mode should be 1 (parse), when comparing the generated trees" | ||||
| 
 | ||||
|     # For a given directory, traverse files and attempt to parse each one | ||||
|     # - Output success/failure for each file | ||||
|     errors = 0 | ||||
|     files = [] | ||||
|     trees = {}  # Trees to compare (after everything else is done) | ||||
|     total_seconds = 0 | ||||
| 
 | ||||
|     for file in sorted(glob(f"{directory}/**/*.py", recursive=True)): | ||||
|         # Only attempt to parse Python files and files that are not excluded | ||||
|         should_exclude_file = False | ||||
|         for pattern in excluded_files: | ||||
|             if PurePath(file).match(pattern): | ||||
|                 should_exclude_file = True | ||||
|                 break | ||||
| 
 | ||||
|         if not should_exclude_file: | ||||
|             with tokenize.open(file) as f: | ||||
|                 source = f.read() | ||||
|             try: | ||||
|                 t0 = time.time() | ||||
|                 if mode == 2: | ||||
|                     result = _peg_parser.compile_string( | ||||
|                         source, | ||||
|                         filename=file, | ||||
|                         oldparser=parser == "cpython", | ||||
|                     ) | ||||
|                 else: | ||||
|                     result = _peg_parser.parse_string( | ||||
|                         source, | ||||
|                         filename=file, | ||||
|                         oldparser=parser == "cpython" | ||||
|                     ) | ||||
|                 t1 = time.time() | ||||
|                 total_seconds += (t1 - t0) | ||||
|                 if tree_arg: | ||||
|                     trees[file] = result | ||||
|                 if not short: | ||||
|                     report_status(succeeded=True, file=file, verbose=verbose) | ||||
|             except Exception as error: | ||||
|                 try: | ||||
|                     _peg_parser.parse_string(source, mode="exec", oldparser=True) | ||||
|                 except Exception: | ||||
|                     if not short: | ||||
|                         print(f"File {file} cannot be parsed by either pegen or the ast module.") | ||||
|                 else: | ||||
|                     report_status( | ||||
|                         succeeded=False, file=file, verbose=verbose, error=error, short=short | ||||
|                     ) | ||||
|                     errors += 1 | ||||
|             files.append(file) | ||||
|     t1 = time.time() | ||||
|     return result, t1 - t0 | ||||
| 
 | ||||
| 
 | ||||
| def is_parsing_failure(source: str) -> bool: | ||||
|     try: | ||||
|         _peg_parser.parse_string(source, mode="exec", oldparser=True) | ||||
|     except SyntaxError: | ||||
|         return False | ||||
|     return True | ||||
| 
 | ||||
| 
 | ||||
| def generate_time_stats(files, total_seconds) -> None: | ||||
|     total_files = len(files) | ||||
| 
 | ||||
|     total_bytes = 0 | ||||
|     total_lines = 0 | ||||
|     for file in files: | ||||
|  | @ -217,6 +159,57 @@ def parse_directory( | |||
|             f"or {total_bytes / total_seconds :,.0f} bytes/sec.", | ||||
|         ) | ||||
| 
 | ||||
| 
 | ||||
| def parse_directory( | ||||
|     directory: str, | ||||
|     verbose: bool, | ||||
|     excluded_files: List[str], | ||||
|     tree_arg: int, | ||||
|     short: bool, | ||||
|     mode: int, | ||||
|     oldparser: bool, | ||||
| ) -> int: | ||||
|     if tree_arg: | ||||
|         assert mode == PARSE, "Mode should be 1 (parse), when comparing the generated trees" | ||||
| 
 | ||||
|     if oldparser and tree_arg: | ||||
|         print("Cannot specify tree argument with the cpython parser.", file=sys.stderr) | ||||
|         return 1 | ||||
| 
 | ||||
|     # For a given directory, traverse files and attempt to parse each one | ||||
|     # - Output success/failure for each file | ||||
|     errors = 0 | ||||
|     files = [] | ||||
|     trees = {}  # Trees to compare (after everything else is done) | ||||
|     total_seconds = 0 | ||||
| 
 | ||||
|     for file in sorted(glob(f"{directory}/**/*.py", recursive=True)): | ||||
|         # Only attempt to parse Python files and files that are not excluded | ||||
|         if any(PurePath(file).match(pattern) for pattern in excluded_files): | ||||
|             continue | ||||
| 
 | ||||
|         with tokenize.open(file) as f: | ||||
|             source = f.read() | ||||
| 
 | ||||
|         try: | ||||
|             result, dt = parse_file(source, file, mode, oldparser) | ||||
|             total_seconds += dt | ||||
|             if tree_arg: | ||||
|                 trees[file] = result | ||||
|             report_status(succeeded=True, file=file, verbose=verbose, short=short) | ||||
|         except SyntaxError as error: | ||||
|             if is_parsing_failure(source): | ||||
|                 print(f"File {file} cannot be parsed by either parser.") | ||||
|             else: | ||||
|                 report_status( | ||||
|                     succeeded=False, file=file, verbose=verbose, error=error, short=short | ||||
|                 ) | ||||
|                 errors += 1 | ||||
|         files.append(file) | ||||
| 
 | ||||
|     t1 = time.time() | ||||
| 
 | ||||
|     generate_time_stats(files, total_seconds) | ||||
|     if short: | ||||
|         print_memstats() | ||||
| 
 | ||||
|  | @ -240,26 +233,20 @@ def parse_directory( | |||
| def main() -> None: | ||||
|     args = argparser.parse_args() | ||||
|     directory = args.directory | ||||
|     grammar_file = args.grammar_file | ||||
|     tokens_file = args.tokens_file | ||||
|     verbose = args.verbose | ||||
|     excluded_files = args.exclude | ||||
|     skip_actions = args.skip_actions | ||||
|     tree = args.tree | ||||
|     short = args.short | ||||
|     mode = 1 if args.tree else 2 | ||||
|     sys.exit( | ||||
|         parse_directory( | ||||
|             directory, | ||||
|             grammar_file, | ||||
|             tokens_file, | ||||
|             verbose, | ||||
|             excluded_files, | ||||
|             skip_actions, | ||||
|             tree, | ||||
|             short, | ||||
|             mode, | ||||
|             "pegen", | ||||
|             oldparser=False, | ||||
|         ) | ||||
|     ) | ||||
| 
 | ||||
|  |  | |||
|  | @ -57,22 +57,11 @@ def find_dirname(package_name: str) -> str: | |||
| def run_tests(dirname: str, tree: int) -> int: | ||||
|     return test_parse_directory.parse_directory( | ||||
|         dirname, | ||||
|         HERE / ".." / ".." / ".." / "Grammar" / "python.gram", | ||||
|         HERE / ".." / ".." / ".." / "Grammar" / "Tokens", | ||||
|         verbose=False, | ||||
|         excluded_files=[ | ||||
|             "*/failset/*", | ||||
|             "*/failset/**", | ||||
|             "*/failset/**/*", | ||||
|             "*/test2to3/*", | ||||
|             "*/test2to3/**/*", | ||||
|             "*/bad*", | ||||
|             "*/lib2to3/tests/data/*", | ||||
|         ], | ||||
|         skip_actions=False, | ||||
|         excluded_files=[], | ||||
|         tree_arg=tree, | ||||
|         short=True, | ||||
|         mode=1, | ||||
|         mode=1 if tree else 0, | ||||
|         parser="pegen", | ||||
|     ) | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Miss Islington (bot)
						Miss Islington (bot)