| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | #!/usr/bin/env python3.8 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | """pegen -- PEG Generator.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Search the web for PEG Parsers for reference. | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import argparse | 
					
						
							|  |  |  | import sys | 
					
						
							|  |  |  | import time | 
					
						
							|  |  |  | import token | 
					
						
							|  |  |  | import traceback | 
					
						
							| 
									
										
										
										
											2020-04-28 13:11:55 +01:00
										 |  |  | from typing import Tuple | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-29 12:23:22 +01:00
										 |  |  | from pegen.grammar import Grammar | 
					
						
							|  |  |  | from pegen.parser import Parser | 
					
						
							|  |  |  | from pegen.parser_generator import ParserGenerator | 
					
						
							|  |  |  | from pegen.tokenizer import Tokenizer | 
					
						
							| 
									
										
										
										
											2020-12-26 19:11:29 +00:00
										 |  |  | from pegen.validator import validate_grammar | 
					
						
							| 
									
										
										
										
											2020-04-28 13:11:55 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def generate_c_code( | 
					
						
							|  |  |  |     args: argparse.Namespace, | 
					
						
							|  |  |  | ) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: | 
					
						
							|  |  |  |     from pegen.build import build_c_parser_and_generator | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     verbose = args.verbose | 
					
						
							|  |  |  |     verbose_tokenizer = verbose >= 3 | 
					
						
							|  |  |  |     verbose_parser = verbose == 2 or verbose >= 4 | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         grammar, parser, tokenizer, gen = build_c_parser_and_generator( | 
					
						
							|  |  |  |             args.grammar_filename, | 
					
						
							|  |  |  |             args.tokens_filename, | 
					
						
							|  |  |  |             args.output, | 
					
						
							|  |  |  |             args.compile_extension, | 
					
						
							|  |  |  |             verbose_tokenizer, | 
					
						
							|  |  |  |             verbose_parser, | 
					
						
							|  |  |  |             args.verbose, | 
					
						
							|  |  |  |             keep_asserts_in_extension=False if args.optimized else True, | 
					
						
							|  |  |  |             skip_actions=args.skip_actions, | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         return grammar, parser, tokenizer, gen | 
					
						
							|  |  |  |     except Exception as err: | 
					
						
							|  |  |  |         if args.verbose: | 
					
						
							|  |  |  |             raise  # Show traceback | 
					
						
							|  |  |  |         traceback.print_exception(err.__class__, err, None) | 
					
						
							|  |  |  |         sys.stderr.write("For full traceback, use -v\n") | 
					
						
							|  |  |  |         sys.exit(1) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def generate_python_code( | 
					
						
							|  |  |  |     args: argparse.Namespace, | 
					
						
							|  |  |  | ) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: | 
					
						
							|  |  |  |     from pegen.build import build_python_parser_and_generator | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     verbose = args.verbose | 
					
						
							|  |  |  |     verbose_tokenizer = verbose >= 3 | 
					
						
							|  |  |  |     verbose_parser = verbose == 2 or verbose >= 4 | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         grammar, parser, tokenizer, gen = build_python_parser_and_generator( | 
					
						
							|  |  |  |             args.grammar_filename, | 
					
						
							|  |  |  |             args.output, | 
					
						
							|  |  |  |             verbose_tokenizer, | 
					
						
							|  |  |  |             verbose_parser, | 
					
						
							|  |  |  |             skip_actions=args.skip_actions, | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         return grammar, parser, tokenizer, gen | 
					
						
							|  |  |  |     except Exception as err: | 
					
						
							|  |  |  |         if args.verbose: | 
					
						
							|  |  |  |             raise  # Show traceback | 
					
						
							|  |  |  |         traceback.print_exception(err.__class__, err, None) | 
					
						
							|  |  |  |         sys.stderr.write("For full traceback, use -v\n") | 
					
						
							|  |  |  |         sys.exit(1) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | argparser = argparse.ArgumentParser( | 
					
						
							|  |  |  |     prog="pegen", description="Experimental PEG-like parser generator" | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | argparser.add_argument("-q", "--quiet", action="store_true", help="Don't print the parsed grammar") | 
					
						
							|  |  |  | argparser.add_argument( | 
					
						
							|  |  |  |     "-v", | 
					
						
							|  |  |  |     "--verbose", | 
					
						
							|  |  |  |     action="count", | 
					
						
							|  |  |  |     default=0, | 
					
						
							|  |  |  |     help="Print timing stats; repeat for more debug output", | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2020-04-28 13:11:55 +01:00
										 |  |  | subparsers = argparser.add_subparsers(help="target language for the generated code") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | c_parser = subparsers.add_parser("c", help="Generate C code for inclusion into CPython") | 
					
						
							|  |  |  | c_parser.set_defaults(func=generate_c_code) | 
					
						
							|  |  |  | c_parser.add_argument("grammar_filename", help="Grammar description") | 
					
						
							|  |  |  | c_parser.add_argument("tokens_filename", help="Tokens description") | 
					
						
							|  |  |  | c_parser.add_argument( | 
					
						
							|  |  |  |     "-o", "--output", metavar="OUT", default="parse.c", help="Where to write the generated parser" | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | ) | 
					
						
							| 
									
										
										
										
											2020-04-28 13:11:55 +01:00
										 |  |  | c_parser.add_argument( | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |     "--compile-extension", | 
					
						
							|  |  |  |     action="store_true", | 
					
						
							|  |  |  |     help="Compile generated C code into an extension module", | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2020-04-28 13:11:55 +01:00
										 |  |  | c_parser.add_argument( | 
					
						
							|  |  |  |     "--optimized", action="store_true", help="Compile the extension in optimized mode" | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | c_parser.add_argument( | 
					
						
							| 
									
										
										
										
											2021-08-12 17:37:30 +01:00
										 |  |  |     "--skip-actions", | 
					
						
							|  |  |  |     action="store_true", | 
					
						
							|  |  |  |     help="Suppress code emission for rule actions", | 
					
						
							| 
									
										
										
										
											2020-04-28 13:11:55 +01:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-03 10:52:35 +03:00
										 |  |  | python_parser = subparsers.add_parser( | 
					
						
							|  |  |  |     "python", | 
					
						
							|  |  |  |     help="Generate Python code, needs grammar definition with Python actions", | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2020-04-28 13:11:55 +01:00
										 |  |  | python_parser.set_defaults(func=generate_python_code) | 
					
						
							|  |  |  | python_parser.add_argument("grammar_filename", help="Grammar description") | 
					
						
							|  |  |  | python_parser.add_argument( | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |     "-o", | 
					
						
							|  |  |  |     "--output", | 
					
						
							|  |  |  |     metavar="OUT", | 
					
						
							| 
									
										
										
										
											2020-04-28 13:11:55 +01:00
										 |  |  |     default="parse.py", | 
					
						
							|  |  |  |     help="Where to write the generated parser", | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | ) | 
					
						
							| 
									
										
										
										
											2020-04-28 13:11:55 +01:00
										 |  |  | python_parser.add_argument( | 
					
						
							| 
									
										
										
										
											2021-08-12 17:37:30 +01:00
										 |  |  |     "--skip-actions", | 
					
						
							|  |  |  |     action="store_true", | 
					
						
							|  |  |  |     help="Suppress code emission for rule actions", | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def main() -> None: | 
					
						
							| 
									
										
										
										
											2020-04-24 00:53:29 +01:00
										 |  |  |     from pegen.testutil import print_memstats | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |     args = argparser.parse_args() | 
					
						
							| 
									
										
										
										
											2020-04-28 13:11:55 +01:00
										 |  |  |     if "func" not in args: | 
					
						
							|  |  |  |         argparser.error("Must specify the target language mode ('c' or 'python')") | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-28 13:11:55 +01:00
										 |  |  |     t0 = time.time() | 
					
						
							|  |  |  |     grammar, parser, tokenizer, gen = args.func(args) | 
					
						
							|  |  |  |     t1 = time.time() | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-26 19:11:29 +00:00
										 |  |  |     validate_grammar(grammar) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |     if not args.quiet: | 
					
						
							|  |  |  |         if args.verbose: | 
					
						
							|  |  |  |             print("Raw Grammar:") | 
					
						
							|  |  |  |             for line in repr(grammar).splitlines(): | 
					
						
							|  |  |  |                 print(" ", line) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         print("Clean Grammar:") | 
					
						
							|  |  |  |         for line in str(grammar).splitlines(): | 
					
						
							|  |  |  |             print(" ", line) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if args.verbose: | 
					
						
							|  |  |  |         print("First Graph:") | 
					
						
							|  |  |  |         for src, dsts in gen.first_graph.items(): | 
					
						
							|  |  |  |             print(f"  {src} -> {', '.join(dsts)}") | 
					
						
							|  |  |  |         print("First SCCS:") | 
					
						
							|  |  |  |         for scc in gen.first_sccs: | 
					
						
							|  |  |  |             print(" ", scc, end="") | 
					
						
							|  |  |  |             if len(scc) > 1: | 
					
						
							|  |  |  |                 print( | 
					
						
							|  |  |  |                     "  # Indirectly left-recursive; leaders:", | 
					
						
							|  |  |  |                     {name for name in scc if grammar.rules[name].leader}, | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 name = next(iter(scc)) | 
					
						
							|  |  |  |                 if name in gen.first_graph[name]: | 
					
						
							|  |  |  |                     print("  # Left-recursive") | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     print() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if args.verbose: | 
					
						
							|  |  |  |         dt = t1 - t0 | 
					
						
							|  |  |  |         diag = tokenizer.diagnose() | 
					
						
							|  |  |  |         nlines = diag.end[0] | 
					
						
							|  |  |  |         if diag.type == token.ENDMARKER: | 
					
						
							|  |  |  |             nlines -= 1 | 
					
						
							|  |  |  |         print(f"Total time: {dt:.3f} sec; {nlines} lines", end="") | 
					
						
							|  |  |  |         if dt: | 
					
						
							|  |  |  |             print(f"; {nlines / dt:.0f} lines/sec") | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             print() | 
					
						
							|  |  |  |         print("Caches sizes:") | 
					
						
							|  |  |  |         print(f"  token array : {len(tokenizer._tokens):10}") | 
					
						
							|  |  |  |         print(f"        cache : {len(parser._cache):10}") | 
					
						
							|  |  |  |         if not print_memstats(): | 
					
						
							|  |  |  |             print("(Can't find psutil; install it for memory stats.)") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | if __name__ == "__main__": | 
					
						
							| 
									
										
										
										
											2020-04-24 00:53:29 +01:00
										 |  |  |     if sys.version_info < (3, 8): | 
					
						
							|  |  |  |         print("ERROR: using pegen requires at least Python 3.8!", file=sys.stderr) | 
					
						
							|  |  |  |         sys.exit(1) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |     main() |