cpython/Tools/peg_generator/scripts/test_parse_directory.py
Lysandros Nikolaou 9645930b5b
bpo-40688: Use the correct parser in the peg_generator scripts (GH-20235)
The scripts in `Tools/peg_generator/scripts` mostly assume that
`ast.parse` and `compile` use the old parser, since this was the
state of things, while we were developing them. They need to be
updated to always use the correct parser. `_peg_parser` is being
extended to support both parsing and compiling with both parsers.
2020-05-25 20:51:58 +01:00

268 lines
7.8 KiB
Python
Executable file

#!/usr/bin/env python3.8
import argparse
import ast
import os
import sys
import time
import traceback
import tokenize
import _peg_parser
from glob import glob
from pathlib import PurePath
from typing import List, Optional, Any
sys.path.insert(0, os.getcwd())
from pegen.ast_dump import ast_dump
from pegen.testutil import print_memstats
from scripts import show_parse
SUCCESS = "\033[92m"
FAIL = "\033[91m"
ENDC = "\033[0m"
argparser = argparse.ArgumentParser(
prog="test_parse_directory",
description="Helper program to test directories or files for pegen",
)
argparser.add_argument("-d", "--directory", help="Directory path containing files to test")
argparser.add_argument("--grammar-file", help="Grammar file path")
argparser.add_argument("--tokens-file", help="Tokens file path")
argparser.add_argument(
"-e", "--exclude", action="append", default=[], help="Glob(s) for matching files to exclude"
)
argparser.add_argument(
"-s", "--short", action="store_true", help="Only show errors, in a more Emacs-friendly format"
)
argparser.add_argument(
"-v", "--verbose", action="store_true", help="Display detailed errors for failures"
)
argparser.add_argument(
"--skip-actions", action="store_true", help="Suppress code emission for rule actions",
)
argparser.add_argument(
"-t", "--tree", action="count", help="Compare parse tree to official AST", default=0
)
def report_status(
succeeded: bool,
file: str,
verbose: bool,
error: Optional[Exception] = None,
short: bool = False,
) -> None:
if short and succeeded:
return
if succeeded is True:
status = "OK"
COLOR = SUCCESS
else:
status = "Fail"
COLOR = FAIL
if short:
lineno = 0
offset = 0
if isinstance(error, SyntaxError):
lineno = error.lineno or 1
offset = error.offset or 1
message = error.args[0]
else:
message = f"{error.__class__.__name__}: {error}"
print(f"{file}:{lineno}:{offset}: {message}")
else:
print(f"{COLOR}{file:60} {status}{ENDC}")
if error and verbose:
print(f" {str(error.__class__.__name__)}: {error}")
def compare_trees(
actual_tree: ast.AST, file: str, verbose: bool, include_attributes: bool = False,
) -> int:
with open(file) as f:
expected_tree = _peg_parser.parse_string(f.read(), oldparser=True)
expected_text = ast_dump(expected_tree, include_attributes=include_attributes)
actual_text = ast_dump(actual_tree, include_attributes=include_attributes)
if actual_text == expected_text:
if verbose:
print("Tree for {file}:")
print(show_parse.format_tree(actual_tree, include_attributes))
return 0
print(f"Diffing ASTs for {file} ...")
expected = show_parse.format_tree(expected_tree, include_attributes)
actual = show_parse.format_tree(actual_tree, include_attributes)
if verbose:
print("Expected for {file}:")
print(expected)
print("Actual for {file}:")
print(actual)
print(f"Diff for {file}:")
diff = show_parse.diff_trees(expected_tree, actual_tree, include_attributes)
for line in diff:
print(line)
return 1
def parse_directory(
directory: str,
grammar_file: str,
tokens_file: str,
verbose: bool,
excluded_files: List[str],
skip_actions: bool,
tree_arg: int,
short: bool,
mode: int,
parser: str,
) -> int:
if parser == "cpython" and (tree_arg or mode == 0):
print("Cannot specify tree argument or mode=0 with the cpython parser.", file=sys.stderr)
return 1
if not directory:
print("You must specify a directory of files to test.", file=sys.stderr)
return 1
if grammar_file and tokens_file:
if not os.path.exists(grammar_file):
print(f"The specified grammar file, {grammar_file}, does not exist.", file=sys.stderr)
return 1
else:
print(
"A grammar file or a tokens file was not provided - attempting to use existing parser from stdlib...\n"
)
if tree_arg:
assert mode == 1, "Mode should be 1 (parse), when comparing the generated trees"
# For a given directory, traverse files and attempt to parse each one
# - Output success/failure for each file
errors = 0
files = []
trees = {} # Trees to compare (after everything else is done)
total_seconds = 0
for file in sorted(glob(f"{directory}/**/*.py", recursive=True)):
# Only attempt to parse Python files and files that are not excluded
should_exclude_file = False
for pattern in excluded_files:
if PurePath(file).match(pattern):
should_exclude_file = True
break
if not should_exclude_file:
with tokenize.open(file) as f:
source = f.read()
try:
t0 = time.time()
if mode == 2:
result = _peg_parser.compile_string(
source,
filename=file,
oldparser=parser == "cpython",
)
else:
result = _peg_parser.parse_string(
source,
filename=file,
oldparser=parser == "cpython"
)
t1 = time.time()
total_seconds += (t1 - t0)
if tree_arg:
trees[file] = result
if not short:
report_status(succeeded=True, file=file, verbose=verbose)
except Exception as error:
try:
_peg_parser.parse_string(source, mode="exec", oldparser=True)
except Exception:
if not short:
print(f"File {file} cannot be parsed by either pegen or the ast module.")
else:
report_status(
succeeded=False, file=file, verbose=verbose, error=error, short=short
)
errors += 1
files.append(file)
t1 = time.time()
total_files = len(files)
total_bytes = 0
total_lines = 0
for file in files:
# Count lines and bytes separately
with open(file, "rb") as f:
total_lines += sum(1 for _ in f)
total_bytes += f.tell()
print(
f"Checked {total_files:,} files, {total_lines:,} lines,",
f"{total_bytes:,} bytes in {total_seconds:,.3f} seconds.",
)
if total_seconds > 0:
print(
f"That's {total_lines / total_seconds :,.0f} lines/sec,",
f"or {total_bytes / total_seconds :,.0f} bytes/sec.",
)
if short:
print_memstats()
if errors:
print(f"Encountered {errors} failures.", file=sys.stderr)
# Compare trees (the dict is empty unless -t is given)
compare_trees_errors = 0
for file, tree in trees.items():
if not short:
print("Comparing ASTs for", file)
if compare_trees(tree, file, verbose, tree_arg >= 2) == 1:
compare_trees_errors += 1
if errors or compare_trees_errors:
return 1
return 0
def main() -> None:
args = argparser.parse_args()
directory = args.directory
grammar_file = args.grammar_file
tokens_file = args.tokens_file
verbose = args.verbose
excluded_files = args.exclude
skip_actions = args.skip_actions
tree = args.tree
short = args.short
mode = 1 if args.tree else 2
sys.exit(
parse_directory(
directory,
grammar_file,
tokens_file,
verbose,
excluded_files,
skip_actions,
tree,
short,
mode,
"pegen",
)
)
if __name__ == "__main__":
main()