mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 05:31:20 +00:00 
			
		
		
		
	bpo-40688: Use the correct parser in the peg_generator scripts (GH-20235)
The scripts in `Tools/peg_generator/scripts` mostly assume that
`ast.parse` and `compile` use the old parser, since this was the
state of things, while we were developing them. They need to be
updated to always use the correct parser. `_peg_parser` is being
extended to support both parsing and compiling with both parsers.
(cherry picked from commit 9645930b5b)
Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com>
			
			
This commit is contained in:
		
							parent
							
								
									318a18eb88
								
							
						
					
					
						commit
						3c6c86ab77
					
				
					 6 changed files with 151 additions and 187 deletions
				
			
		|  | @ -1,60 +1,9 @@ | ||||||
| #include <Python.h> | #include <Python.h> | ||||||
| #include "pegen_interface.h" | #include "pegen_interface.h" | ||||||
| 
 | 
 | ||||||
| PyObject * | static int | ||||||
| _Py_parse_file(PyObject *self, PyObject *args, PyObject *kwds) | _mode_str_to_int(char *mode_str) | ||||||
| { | { | ||||||
|     static char *keywords[] = {"file", "mode", NULL}; |  | ||||||
|     char *filename; |  | ||||||
|     char *mode_str = "exec"; |  | ||||||
| 
 |  | ||||||
|     if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|s", keywords, &filename, &mode_str)) { |  | ||||||
|         return NULL; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     int mode; |  | ||||||
|     if (strcmp(mode_str, "exec") == 0) { |  | ||||||
|         mode = Py_file_input; |  | ||||||
|     } |  | ||||||
|     else if (strcmp(mode_str, "single") == 0) { |  | ||||||
|         mode = Py_single_input; |  | ||||||
|     } |  | ||||||
|     else { |  | ||||||
|         return PyErr_Format(PyExc_ValueError, "mode must be either 'exec' or 'single'"); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     PyArena *arena = PyArena_New(); |  | ||||||
|     if (arena == NULL) { |  | ||||||
|         return NULL; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     PyCompilerFlags flags = _PyCompilerFlags_INIT; |  | ||||||
|     PyObject *result = NULL; |  | ||||||
| 
 |  | ||||||
|     mod_ty res = PyPegen_ASTFromFilename(filename, mode, &flags, arena); |  | ||||||
|     if (res == NULL) { |  | ||||||
|         goto error; |  | ||||||
|     } |  | ||||||
|     result = PyAST_mod2obj(res); |  | ||||||
| 
 |  | ||||||
| error: |  | ||||||
|     PyArena_Free(arena); |  | ||||||
|     return result; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| PyObject * |  | ||||||
| _Py_parse_string(PyObject *self, PyObject *args, PyObject *kwds) |  | ||||||
| { |  | ||||||
|     static char *keywords[] = {"string", "mode", "oldparser", NULL}; |  | ||||||
|     char *the_string; |  | ||||||
|     char *mode_str = "exec"; |  | ||||||
|     int oldparser = 0; |  | ||||||
| 
 |  | ||||||
|     if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|sp", keywords, |  | ||||||
|             &the_string, &mode_str, &oldparser)) { |  | ||||||
|         return NULL; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     int mode; |     int mode; | ||||||
|     if (strcmp(mode_str, "exec") == 0) { |     if (strcmp(mode_str, "exec") == 0) { | ||||||
|         mode = Py_file_input; |         mode = Py_file_input; | ||||||
|  | @ -66,39 +15,119 @@ _Py_parse_string(PyObject *self, PyObject *args, PyObject *kwds) | ||||||
|         mode = Py_single_input; |         mode = Py_single_input; | ||||||
|     } |     } | ||||||
|     else { |     else { | ||||||
|  |         mode = -1; | ||||||
|  |     } | ||||||
|  |     return mode; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static mod_ty | ||||||
|  | _run_parser(char *str, char *filename, int mode, PyCompilerFlags *flags, PyArena *arena, int oldparser) | ||||||
|  | { | ||||||
|  |     mod_ty mod; | ||||||
|  |     if (!oldparser) { | ||||||
|  |         mod = PyPegen_ASTFromString(str, filename, mode, flags, arena); | ||||||
|  |     } | ||||||
|  |     else { | ||||||
|  |         mod = PyParser_ASTFromString(str, filename, mode, flags, arena); | ||||||
|  |     } | ||||||
|  |     return mod; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | PyObject * | ||||||
|  | _Py_compile_string(PyObject *self, PyObject *args, PyObject *kwds) | ||||||
|  | { | ||||||
|  |     static char *keywords[] = {"string", "filename", "mode", "oldparser", NULL}; | ||||||
|  |     char *the_string; | ||||||
|  |     char *filename = "<string>"; | ||||||
|  |     char *mode_str = "exec"; | ||||||
|  |     int oldparser = 0; | ||||||
|  | 
 | ||||||
|  |     if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|ssp", keywords, | ||||||
|  |             &the_string, &filename, &mode_str, &oldparser)) { | ||||||
|  |         return NULL; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     int mode = _mode_str_to_int(mode_str); | ||||||
|  |     if (mode == -1) { | ||||||
|         return PyErr_Format(PyExc_ValueError, "mode must be either 'exec' or 'eval' or 'single'"); |         return PyErr_Format(PyExc_ValueError, "mode must be either 'exec' or 'eval' or 'single'"); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     PyCompilerFlags flags = _PyCompilerFlags_INIT; | ||||||
|  |     flags.cf_flags = PyCF_IGNORE_COOKIE; | ||||||
|  | 
 | ||||||
|     PyArena *arena = PyArena_New(); |     PyArena *arena = PyArena_New(); | ||||||
|     if (arena == NULL) { |     if (arena == NULL) { | ||||||
|         return NULL; |         return NULL; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     PyObject *result = NULL; |     mod_ty mod = _run_parser(the_string, filename, mode, &flags, arena, oldparser); | ||||||
|  |     if (mod == NULL) { | ||||||
|  |         PyArena_Free(arena); | ||||||
|  |         return NULL; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     PyObject *filename_ob = PyUnicode_DecodeFSDefault(filename); | ||||||
|  |     if (filename_ob == NULL) { | ||||||
|  |         PyArena_Free(arena); | ||||||
|  |         return NULL; | ||||||
|  |     } | ||||||
|  |     PyCodeObject *result = PyAST_CompileObject(mod, filename_ob, &flags, -1, arena); | ||||||
|  |     Py_XDECREF(filename_ob); | ||||||
|  |     PyArena_Free(arena); | ||||||
|  |     return (PyObject *)result; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | PyObject * | ||||||
|  | _Py_parse_string(PyObject *self, PyObject *args, PyObject *kwds) | ||||||
|  | { | ||||||
|  |     static char *keywords[] = {"string", "filename", "mode", "oldparser", NULL}; | ||||||
|  |     char *the_string; | ||||||
|  |     char *filename = "<string>"; | ||||||
|  |     char *mode_str = "exec"; | ||||||
|  |     int oldparser = 0; | ||||||
|  | 
 | ||||||
|  |     if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|ssp", keywords, | ||||||
|  |             &the_string, &filename, &mode_str, &oldparser)) { | ||||||
|  |         return NULL; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     int mode = _mode_str_to_int(mode_str); | ||||||
|  |     if (mode == -1) { | ||||||
|  |         return PyErr_Format(PyExc_ValueError, "mode must be either 'exec' or 'eval' or 'single'"); | ||||||
|  |     } | ||||||
| 
 | 
 | ||||||
|     PyCompilerFlags flags = _PyCompilerFlags_INIT; |     PyCompilerFlags flags = _PyCompilerFlags_INIT; | ||||||
|     flags.cf_flags = PyCF_IGNORE_COOKIE; |     flags.cf_flags = PyCF_IGNORE_COOKIE; | ||||||
| 
 | 
 | ||||||
|     mod_ty res; |     PyArena *arena = PyArena_New(); | ||||||
|     if (oldparser) { |     if (arena == NULL) { | ||||||
|         res = PyParser_ASTFromString(the_string, "<string>", mode, &flags, arena); |         return NULL; | ||||||
|     } |     } | ||||||
|     else { |  | ||||||
|         res = PyPegen_ASTFromString(the_string, "<string>", mode, &flags, arena); |  | ||||||
|     } |  | ||||||
|     if (res == NULL) { |  | ||||||
|         goto error; |  | ||||||
|     } |  | ||||||
|     result = PyAST_mod2obj(res); |  | ||||||
| 
 | 
 | ||||||
| error: |     mod_ty mod = _run_parser(the_string, filename, mode, &flags, arena, oldparser); | ||||||
|  |     if (mod == NULL) { | ||||||
|  |         PyArena_Free(arena); | ||||||
|  |         return NULL; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     PyObject *result = PyAST_mod2obj(mod); | ||||||
|     PyArena_Free(arena); |     PyArena_Free(arena); | ||||||
|     return result; |     return result; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static PyMethodDef ParseMethods[] = { | static PyMethodDef ParseMethods[] = { | ||||||
|     {"parse_file", (PyCFunction)(void (*)(void))_Py_parse_file, METH_VARARGS|METH_KEYWORDS, "Parse a file."}, |     { | ||||||
|     {"parse_string", (PyCFunction)(void (*)(void))_Py_parse_string, METH_VARARGS|METH_KEYWORDS,"Parse a string."}, |         "parse_string", | ||||||
|  |         (PyCFunction)(void (*)(void))_Py_parse_string, | ||||||
|  |         METH_VARARGS|METH_KEYWORDS, | ||||||
|  |         "Parse a string, return an AST." | ||||||
|  |     }, | ||||||
|  |     { | ||||||
|  |         "compile_string", | ||||||
|  |         (PyCFunction)(void (*)(void))_Py_compile_string, | ||||||
|  |         METH_VARARGS|METH_KEYWORDS, | ||||||
|  |         "Compile a string, return a code object." | ||||||
|  |     }, | ||||||
|     {NULL, NULL, 0, NULL} /* Sentinel */ |     {NULL, NULL, 0, NULL} /* Sentinel */ | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -69,25 +69,22 @@ stats: peg_extension/parse.c data/xxl.py | ||||||
| 
 | 
 | ||||||
| time: time_compile | time: time_compile | ||||||
| 
 | 
 | ||||||
| time_compile: venv peg_extension/parse.c data/xxl.py | time_compile: venv data/xxl.py | ||||||
| 	$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl compile | 	$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl compile | ||||||
| 
 | 
 | ||||||
| time_parse: venv peg_extension/parse.c data/xxl.py | time_parse: venv data/xxl.py | ||||||
| 	$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl parse | 	$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl parse | ||||||
| 
 | 
 | ||||||
| time_check: venv peg_extension/parse.c data/xxl.py | time_old: time_old_compile | ||||||
| 	$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl check |  | ||||||
| 
 | 
 | ||||||
| time_stdlib: time_stdlib_compile | time_old_compile: venv data/xxl.py | ||||||
| 
 |  | ||||||
| time_stdlib_compile: venv peg_extension/parse.c data/xxl.py |  | ||||||
| 	$(VENVPYTHON) scripts/benchmark.py --parser=cpython --target=xxl compile | 	$(VENVPYTHON) scripts/benchmark.py --parser=cpython --target=xxl compile | ||||||
| 
 | 
 | ||||||
| time_stdlib_parse: venv peg_extension/parse.c data/xxl.py | time_old_parse: venv data/xxl.py | ||||||
| 	$(VENVPYTHON) scripts/benchmark.py --parser=cpython --target=xxl parse | 	$(VENVPYTHON) scripts/benchmark.py --parser=cpython --target=xxl parse | ||||||
| 
 | 
 | ||||||
| test_local: | time_peg_dir: venv | ||||||
| 	$(PYTHON) scripts/test_parse_directory.py \
 | 	$(VENVPYTHON) scripts/test_parse_directory.py \
 | ||||||
| 		--grammar-file $(GRAMMAR) \
 | 		--grammar-file $(GRAMMAR) \
 | ||||||
| 		--tokens-file $(TOKENS) \
 | 		--tokens-file $(TOKENS) \
 | ||||||
| 		-d $(TESTDIR) \
 | 		-d $(TESTDIR) \
 | ||||||
|  | @ -96,8 +93,8 @@ test_local: | ||||||
| 		--exclude "*/failset/**" \
 | 		--exclude "*/failset/**" \
 | ||||||
| 		--exclude "*/failset/**/*" | 		--exclude "*/failset/**/*" | ||||||
| 
 | 
 | ||||||
| test_global: $(CPYTHON) | time_stdlib: $(CPYTHON) venv | ||||||
| 	$(PYTHON) scripts/test_parse_directory.py \
 | 	$(VENVPYTHON) scripts/test_parse_directory.py \
 | ||||||
| 		--grammar-file $(GRAMMAR) \
 | 		--grammar-file $(GRAMMAR) \
 | ||||||
| 		--tokens-file $(TOKENS) \
 | 		--tokens-file $(TOKENS) \
 | ||||||
| 		-d $(CPYTHON) \
 | 		-d $(CPYTHON) \
 | ||||||
|  | @ -113,9 +110,6 @@ mypy: regen-metaparser | ||||||
| format-python: | format-python: | ||||||
| 	black pegen scripts | 	black pegen scripts | ||||||
| 
 | 
 | ||||||
| bench: venv |  | ||||||
| 	$(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=stdlib check |  | ||||||
| 
 |  | ||||||
| format: format-python | format: format-python | ||||||
| 
 | 
 | ||||||
| find_max_nesting: | find_max_nesting: | ||||||
|  |  | ||||||
|  | @ -6,6 +6,8 @@ | ||||||
| import os | import os | ||||||
| from time import time | from time import time | ||||||
| 
 | 
 | ||||||
|  | import _peg_parser | ||||||
|  | 
 | ||||||
| try: | try: | ||||||
|     import memory_profiler |     import memory_profiler | ||||||
| except ModuleNotFoundError: | except ModuleNotFoundError: | ||||||
|  | @ -14,8 +16,6 @@ | ||||||
|     sys.exit(1) |     sys.exit(1) | ||||||
| 
 | 
 | ||||||
| sys.path.insert(0, os.getcwd()) | sys.path.insert(0, os.getcwd()) | ||||||
| from peg_extension import parse |  | ||||||
| from pegen.build import build_c_parser_and_generator |  | ||||||
| from scripts.test_parse_directory import parse_directory | from scripts.test_parse_directory import parse_directory | ||||||
| 
 | 
 | ||||||
| argparser = argparse.ArgumentParser( | argparser = argparse.ArgumentParser( | ||||||
|  | @ -41,9 +41,6 @@ | ||||||
|     "compile", help="Benchmark parsing and compiling to bytecode" |     "compile", help="Benchmark parsing and compiling to bytecode" | ||||||
| ) | ) | ||||||
| command_parse = subcommands.add_parser("parse", help="Benchmark parsing and generating an ast.AST") | command_parse = subcommands.add_parser("parse", help="Benchmark parsing and generating an ast.AST") | ||||||
| command_check = subcommands.add_parser( |  | ||||||
|     "check", help="Benchmark parsing and throwing the tree away" |  | ||||||
| ) |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def benchmark(func): | def benchmark(func): | ||||||
|  | @ -66,22 +63,20 @@ def wrapper(*args): | ||||||
| @benchmark | @benchmark | ||||||
| def time_compile(source, parser): | def time_compile(source, parser): | ||||||
|     if parser == "cpython": |     if parser == "cpython": | ||||||
|         return compile(source, os.path.join("data", "xxl.py"), "exec") |         return _peg_parser.compile_string( | ||||||
|  |             source, | ||||||
|  |             oldparser=True, | ||||||
|  |         ) | ||||||
|     else: |     else: | ||||||
|         return parse.parse_string(source, mode=2) |         return _peg_parser.compile_string(source) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @benchmark | @benchmark | ||||||
| def time_parse(source, parser): | def time_parse(source, parser): | ||||||
|     if parser == "cpython": |     if parser == "cpython": | ||||||
|         return ast.parse(source, os.path.join("data", "xxl.py"), "exec") |         return _peg_parser.parse_string(source, oldparser=True) | ||||||
|     else: |     else: | ||||||
|         return parse.parse_string(source, mode=1) |         return _peg_parser.parse_string(source) | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| @benchmark |  | ||||||
| def time_check(source): |  | ||||||
|     return parse.parse_string(source, mode=0) |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def run_benchmark_xxl(subcommand, parser, source): | def run_benchmark_xxl(subcommand, parser, source): | ||||||
|  | @ -89,32 +84,20 @@ def run_benchmark_xxl(subcommand, parser, source): | ||||||
|         time_compile(source, parser) |         time_compile(source, parser) | ||||||
|     elif subcommand == "parse": |     elif subcommand == "parse": | ||||||
|         time_parse(source, parser) |         time_parse(source, parser) | ||||||
|     elif subcommand == "check": |  | ||||||
|         time_check(source) |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def run_benchmark_stdlib(subcommand, parser): | def run_benchmark_stdlib(subcommand, parser): | ||||||
|     modes = {"compile": 2, "parse": 1, "check": 0} |  | ||||||
|     extension = None |  | ||||||
|     if parser == "pegen": |  | ||||||
|         extension = build_c_parser_and_generator( |  | ||||||
|             "../../Grammar/python.gram", |  | ||||||
|             "../../Grammar/Tokens", |  | ||||||
|             "peg_extension/parse.c", |  | ||||||
|             compile_extension=True, |  | ||||||
|             skip_actions=False, |  | ||||||
|         ) |  | ||||||
|     for _ in range(3): |     for _ in range(3): | ||||||
|         parse_directory( |         parse_directory( | ||||||
|             "../../Lib", |             "../../Lib", | ||||||
|             "../../Grammar/python.gram", |             "../../Grammar/python.gram", | ||||||
|  |             "../../Grammar/Tokens", | ||||||
|             verbose=False, |             verbose=False, | ||||||
|             excluded_files=["*/bad*", "*/lib2to3/tests/data/*",], |             excluded_files=["*/bad*", "*/lib2to3/tests/data/*",], | ||||||
|             skip_actions=False, |             skip_actions=False, | ||||||
|             tree_arg=0, |             tree_arg=0, | ||||||
|             short=True, |             short=True, | ||||||
|             extension=extension, |             mode=2 if subcommand == "compile" else 1, | ||||||
|             mode=modes[subcommand], |  | ||||||
|             parser=parser, |             parser=parser, | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|  | @ -127,8 +110,6 @@ def main(): | ||||||
| 
 | 
 | ||||||
|     if subcommand is None: |     if subcommand is None: | ||||||
|         argparser.error("A benchmark to run is required") |         argparser.error("A benchmark to run is required") | ||||||
|     if subcommand == "check" and parser == "cpython": |  | ||||||
|         argparser.error("Cannot use check target with the CPython parser") |  | ||||||
| 
 | 
 | ||||||
|     if target == "xxl": |     if target == "xxl": | ||||||
|         with open(os.path.join("data", "xxl.py"), "r") as f: |         with open(os.path.join("data", "xxl.py"), "r") as f: | ||||||
|  |  | ||||||
|  | @ -30,6 +30,8 @@ | ||||||
| import sys | import sys | ||||||
| import tempfile | import tempfile | ||||||
| 
 | 
 | ||||||
|  | import _peg_parser | ||||||
|  | 
 | ||||||
| from typing import List | from typing import List | ||||||
| 
 | 
 | ||||||
| sys.path.insert(0, os.getcwd()) | sys.path.insert(0, os.getcwd()) | ||||||
|  | @ -72,7 +74,7 @@ def diff_trees(a: ast.AST, b: ast.AST, verbose: bool = False) -> List[str]: | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def show_parse(source: str, verbose: bool = False) -> str: | def show_parse(source: str, verbose: bool = False) -> str: | ||||||
|     tree = ast.parse(source) |     tree = _peg_parser.parse_string(source, oldparser=True) | ||||||
|     return format_tree(tree, verbose).rstrip("\n") |     return format_tree(tree, verbose).rstrip("\n") | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @ -90,17 +92,11 @@ def main() -> None: | ||||||
|         sep = " " |         sep = " " | ||||||
|     program = sep.join(args.program) |     program = sep.join(args.program) | ||||||
|     if args.grammar_file: |     if args.grammar_file: | ||||||
|         sys.path.insert(0, os.curdir) |         tree = _peg_parser.parse_string(program) | ||||||
|         from pegen.build import build_parser_and_generator |  | ||||||
| 
 |  | ||||||
|         build_parser_and_generator(args.grammar_file, "peg_parser/parse.c", compile_extension=True) |  | ||||||
|         from pegen.parse import parse_string  # type: ignore[import] |  | ||||||
| 
 |  | ||||||
|         tree = parse_string(program, mode=1) |  | ||||||
| 
 | 
 | ||||||
|         if args.diff: |         if args.diff: | ||||||
|             a = tree |             a = tree | ||||||
|             b = ast.parse(program) |             b = _peg_parser.parse_string(program, oldparser=True) | ||||||
|             diff = diff_trees(a, b, args.verbose) |             diff = diff_trees(a, b, args.verbose) | ||||||
|             if diff: |             if diff: | ||||||
|                 for line in diff: |                 for line in diff: | ||||||
|  | @ -111,8 +107,8 @@ def main() -> None: | ||||||
|             print(f"# Parsed using {args.grammar_file}") |             print(f"# Parsed using {args.grammar_file}") | ||||||
|             print(format_tree(tree, args.verbose)) |             print(format_tree(tree, args.verbose)) | ||||||
|     else: |     else: | ||||||
|         tree = ast.parse(program) |         tree = _peg_parser.parse_string(program, oldparser=True) | ||||||
|         print("# Parse using ast.parse()") |         print("# Parse using the old parser") | ||||||
|         print(format_tree(tree, args.verbose)) |         print(format_tree(tree, args.verbose)) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -6,13 +6,14 @@ | ||||||
| import sys | import sys | ||||||
| import time | import time | ||||||
| import traceback | import traceback | ||||||
|  | import tokenize | ||||||
|  | import _peg_parser | ||||||
| from glob import glob | from glob import glob | ||||||
| from pathlib import PurePath | from pathlib import PurePath | ||||||
| 
 | 
 | ||||||
| from typing import List, Optional, Any | from typing import List, Optional, Any | ||||||
| 
 | 
 | ||||||
| sys.path.insert(0, os.getcwd()) | sys.path.insert(0, os.getcwd()) | ||||||
| from pegen.build import build_c_parser_and_generator |  | ||||||
| from pegen.ast_dump import ast_dump | from pegen.ast_dump import ast_dump | ||||||
| from pegen.testutil import print_memstats | from pegen.testutil import print_memstats | ||||||
| from scripts import show_parse | from scripts import show_parse | ||||||
|  | @ -83,7 +84,7 @@ def compare_trees( | ||||||
|     actual_tree: ast.AST, file: str, verbose: bool, include_attributes: bool = False, |     actual_tree: ast.AST, file: str, verbose: bool, include_attributes: bool = False, | ||||||
| ) -> int: | ) -> int: | ||||||
|     with open(file) as f: |     with open(file) as f: | ||||||
|         expected_tree = ast.parse(f.read()) |         expected_tree = _peg_parser.parse_string(f.read(), oldparser=True) | ||||||
| 
 | 
 | ||||||
|     expected_text = ast_dump(expected_tree, include_attributes=include_attributes) |     expected_text = ast_dump(expected_tree, include_attributes=include_attributes) | ||||||
|     actual_text = ast_dump(actual_tree, include_attributes=include_attributes) |     actual_text = ast_dump(actual_tree, include_attributes=include_attributes) | ||||||
|  | @ -121,7 +122,6 @@ def parse_directory( | ||||||
|     skip_actions: bool, |     skip_actions: bool, | ||||||
|     tree_arg: int, |     tree_arg: int, | ||||||
|     short: bool, |     short: bool, | ||||||
|     extension: Any, |  | ||||||
|     mode: int, |     mode: int, | ||||||
|     parser: str, |     parser: str, | ||||||
| ) -> int: | ) -> int: | ||||||
|  | @ -137,47 +137,21 @@ def parse_directory( | ||||||
|         if not os.path.exists(grammar_file): |         if not os.path.exists(grammar_file): | ||||||
|             print(f"The specified grammar file, {grammar_file}, does not exist.", file=sys.stderr) |             print(f"The specified grammar file, {grammar_file}, does not exist.", file=sys.stderr) | ||||||
|             return 1 |             return 1 | ||||||
| 
 |  | ||||||
|         try: |  | ||||||
|             if not extension and parser == "pegen": |  | ||||||
|                 build_c_parser_and_generator( |  | ||||||
|                     grammar_file, |  | ||||||
|                     tokens_file, |  | ||||||
|                     "peg_extension/parse.c", |  | ||||||
|                     compile_extension=True, |  | ||||||
|                     skip_actions=skip_actions, |  | ||||||
|                 ) |  | ||||||
|         except Exception as err: |  | ||||||
|             print( |  | ||||||
|                 f"{FAIL}The following error occurred when generating the parser. Please check your grammar file.\n{ENDC}", |  | ||||||
|                 file=sys.stderr, |  | ||||||
|             ) |  | ||||||
|             traceback.print_exception(err.__class__, err, None) |  | ||||||
| 
 |  | ||||||
|             return 1 |  | ||||||
| 
 |  | ||||||
|     else: |     else: | ||||||
|         print( |         print( | ||||||
|             "A grammar file or a tokens file was not provided - attempting to use existing parser from stdlib...\n" |             "A grammar file or a tokens file was not provided - attempting to use existing parser from stdlib...\n" | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|     if parser == "pegen": |     if tree_arg: | ||||||
|         try: |         assert mode == 1, "Mode should be 1 (parse), when comparing the generated trees" | ||||||
|             from peg_extension import parse  # type: ignore |  | ||||||
|         except Exception as e: |  | ||||||
|             print( |  | ||||||
|                 "An existing parser was not found. Please run `make` or specify a grammar file with the `-g` flag.", |  | ||||||
|                 file=sys.stderr, |  | ||||||
|             ) |  | ||||||
|             return 1 |  | ||||||
| 
 | 
 | ||||||
|     # For a given directory, traverse files and attempt to parse each one |     # For a given directory, traverse files and attempt to parse each one | ||||||
|     # - Output success/failure for each file |     # - Output success/failure for each file | ||||||
|     errors = 0 |     errors = 0 | ||||||
|     files = [] |     files = [] | ||||||
|     trees = {}  # Trees to compare (after everything else is done) |     trees = {}  # Trees to compare (after everything else is done) | ||||||
|  |     total_seconds = 0 | ||||||
| 
 | 
 | ||||||
|     t0 = time.time() |  | ||||||
|     for file in sorted(glob(f"{directory}/**/*.py", recursive=True)): |     for file in sorted(glob(f"{directory}/**/*.py", recursive=True)): | ||||||
|         # Only attempt to parse Python files and files that are not excluded |         # Only attempt to parse Python files and files that are not excluded | ||||||
|         should_exclude_file = False |         should_exclude_file = False | ||||||
|  | @ -187,25 +161,31 @@ def parse_directory( | ||||||
|                 break |                 break | ||||||
| 
 | 
 | ||||||
|         if not should_exclude_file: |         if not should_exclude_file: | ||||||
|             try: |             with tokenize.open(file) as f: | ||||||
|                 if tree_arg: |  | ||||||
|                     mode = 1 |  | ||||||
|                 if parser == "cpython": |  | ||||||
|                     with open(file, "r") as f: |  | ||||||
|                 source = f.read() |                 source = f.read() | ||||||
|  |             try: | ||||||
|  |                 t0 = time.time() | ||||||
|                 if mode == 2: |                 if mode == 2: | ||||||
|                             compile(source, file, "exec") |                     result = _peg_parser.compile_string( | ||||||
|                         elif mode == 1: |                         source, | ||||||
|                             ast.parse(source, file, "exec") |                         filename=file, | ||||||
|  |                         oldparser=parser == "cpython", | ||||||
|  |                     ) | ||||||
|                 else: |                 else: | ||||||
|                     tree = parse.parse_file(file, mode=mode) |                     result = _peg_parser.parse_string( | ||||||
|  |                         source, | ||||||
|  |                         filename=file, | ||||||
|  |                         oldparser=parser == "cpython" | ||||||
|  |                     ) | ||||||
|  |                 t1 = time.time() | ||||||
|  |                 total_seconds += (t1 - t0) | ||||||
|                 if tree_arg: |                 if tree_arg: | ||||||
|                     trees[file] = tree |                     trees[file] = result | ||||||
|                 if not short: |                 if not short: | ||||||
|                     report_status(succeeded=True, file=file, verbose=verbose) |                     report_status(succeeded=True, file=file, verbose=verbose) | ||||||
|             except Exception as error: |             except Exception as error: | ||||||
|                 try: |                 try: | ||||||
|                     ast.parse(file) |                     _peg_parser.parse_string(source, mode="exec", oldparser=True) | ||||||
|                 except Exception: |                 except Exception: | ||||||
|                     if not short: |                     if not short: | ||||||
|                         print(f"File {file} cannot be parsed by either pegen or the ast module.") |                         print(f"File {file} cannot be parsed by either pegen or the ast module.") | ||||||
|  | @ -217,7 +197,6 @@ def parse_directory( | ||||||
|             files.append(file) |             files.append(file) | ||||||
|     t1 = time.time() |     t1 = time.time() | ||||||
| 
 | 
 | ||||||
|     total_seconds = t1 - t0 |  | ||||||
|     total_files = len(files) |     total_files = len(files) | ||||||
| 
 | 
 | ||||||
|     total_bytes = 0 |     total_bytes = 0 | ||||||
|  | @ -238,13 +217,6 @@ def parse_directory( | ||||||
|             f"or {total_bytes / total_seconds :,.0f} bytes/sec.", |             f"or {total_bytes / total_seconds :,.0f} bytes/sec.", | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|     if parser == "pegen": |  | ||||||
|         # Dump memo stats to @data. |  | ||||||
|         with open("@data", "w") as datafile: |  | ||||||
|             for i, count in enumerate(parse.get_memo_stats()): |  | ||||||
|                 if count: |  | ||||||
|                     datafile.write(f"{i:4d} {count:9d}\n") |  | ||||||
| 
 |  | ||||||
|     if short: |     if short: | ||||||
|         print_memstats() |         print_memstats() | ||||||
| 
 | 
 | ||||||
|  | @ -275,6 +247,7 @@ def main() -> None: | ||||||
|     skip_actions = args.skip_actions |     skip_actions = args.skip_actions | ||||||
|     tree = args.tree |     tree = args.tree | ||||||
|     short = args.short |     short = args.short | ||||||
|  |     mode = 1 if args.tree else 2 | ||||||
|     sys.exit( |     sys.exit( | ||||||
|         parse_directory( |         parse_directory( | ||||||
|             directory, |             directory, | ||||||
|  | @ -285,8 +258,7 @@ def main() -> None: | ||||||
|             skip_actions, |             skip_actions, | ||||||
|             tree, |             tree, | ||||||
|             short, |             short, | ||||||
|             None, |             mode, | ||||||
|             0, |  | ||||||
|             "pegen", |             "pegen", | ||||||
|         ) |         ) | ||||||
|     ) |     ) | ||||||
|  |  | ||||||
|  | @ -54,7 +54,7 @@ def find_dirname(package_name: str) -> str: | ||||||
|     assert False  # This is to fix mypy, should never be reached |     assert False  # This is to fix mypy, should never be reached | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def run_tests(dirname: str, tree: int, extension: Any) -> int: | def run_tests(dirname: str, tree: int) -> int: | ||||||
|     return test_parse_directory.parse_directory( |     return test_parse_directory.parse_directory( | ||||||
|         dirname, |         dirname, | ||||||
|         HERE / ".." / ".." / ".." / "Grammar" / "python.gram", |         HERE / ".." / ".." / ".." / "Grammar" / "python.gram", | ||||||
|  | @ -72,7 +72,6 @@ def run_tests(dirname: str, tree: int, extension: Any) -> int: | ||||||
|         skip_actions=False, |         skip_actions=False, | ||||||
|         tree_arg=tree, |         tree_arg=tree, | ||||||
|         short=True, |         short=True, | ||||||
|         extension=extension, |  | ||||||
|         mode=1, |         mode=1, | ||||||
|         parser="pegen", |         parser="pegen", | ||||||
|     ) |     ) | ||||||
|  | @ -82,13 +81,6 @@ def main() -> None: | ||||||
|     args = argparser.parse_args() |     args = argparser.parse_args() | ||||||
|     tree = args.tree |     tree = args.tree | ||||||
| 
 | 
 | ||||||
|     extension = build.build_c_parser_and_generator( |  | ||||||
|         HERE / ".." / ".." / ".." / "Grammar" / "python.gram", |  | ||||||
|         HERE / ".." / ".." / ".." / "Grammar" / "Tokens", |  | ||||||
|         "peg_extension/parse.c", |  | ||||||
|         compile_extension=True, |  | ||||||
|     ) |  | ||||||
| 
 |  | ||||||
|     for package in get_packages(): |     for package in get_packages(): | ||||||
|         print(f"Extracting files from {package}... ", end="") |         print(f"Extracting files from {package}... ", end="") | ||||||
|         try: |         try: | ||||||
|  | @ -100,7 +92,7 @@ def main() -> None: | ||||||
| 
 | 
 | ||||||
|         print(f"Trying to parse all python files ... ") |         print(f"Trying to parse all python files ... ") | ||||||
|         dirname = find_dirname(package) |         dirname = find_dirname(package) | ||||||
|         status = run_tests(dirname, tree, extension) |         status = run_tests(dirname, tree) | ||||||
|         if status == 0: |         if status == 0: | ||||||
|             shutil.rmtree(dirname) |             shutil.rmtree(dirname) | ||||||
|         else: |         else: | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Miss Islington (bot)
						Miss Islington (bot)