| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | import importlib.util | 
					
						
							|  |  |  | import io | 
					
						
							|  |  |  | import os | 
					
						
							|  |  |  | import pathlib | 
					
						
							|  |  |  | import sys | 
					
						
							|  |  |  | import textwrap | 
					
						
							| 
									
										
										
										
											2020-04-28 13:11:55 +01:00
										 |  |  | import token | 
					
						
							| 
									
										
										
										
											2021-09-05 14:58:52 +01:00
										 |  |  | import tokenize | 
					
						
							| 
									
										
										
										
											2022-04-06 15:55:58 -06:00
										 |  |  | from typing import IO, Any, Dict, Final, Optional, Type, cast | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | from pegen.build import compile_c_extension | 
					
						
							|  |  |  | from pegen.c_generator import CParserGenerator | 
					
						
							|  |  |  | from pegen.grammar import Grammar | 
					
						
							|  |  |  | from pegen.grammar_parser import GeneratedParser as GrammarParser | 
					
						
							|  |  |  | from pegen.parser import Parser | 
					
						
							|  |  |  | from pegen.python_generator import PythonParserGenerator | 
					
						
							|  |  |  | from pegen.tokenizer import Tokenizer | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-05-01 23:14:12 +01:00
										 |  |  | ALL_TOKENS = token.tok_name | 
					
						
							| 
									
										
										
										
											2021-08-12 17:37:30 +01:00
										 |  |  | EXACT_TOKENS = token.EXACT_TOKEN_TYPES | 
					
						
							| 
									
										
										
										
											2020-04-28 13:11:55 +01:00
										 |  |  | NON_EXACT_TOKENS = { | 
					
						
							|  |  |  |     name for index, name in token.tok_name.items() if index not in EXACT_TOKENS.values() | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | def generate_parser(grammar: Grammar) -> Type[Parser]: | 
					
						
							|  |  |  |     # Generate a parser. | 
					
						
							|  |  |  |     out = io.StringIO() | 
					
						
							|  |  |  |     genr = PythonParserGenerator(grammar, out) | 
					
						
							|  |  |  |     genr.generate("<string>") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Load the generated parser class. | 
					
						
							|  |  |  |     ns: Dict[str, Any] = {} | 
					
						
							|  |  |  |     exec(out.getvalue(), ns) | 
					
						
							|  |  |  |     return ns["GeneratedParser"] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = False) -> Any: | 
					
						
							|  |  |  |     # Run a parser on a file (stream). | 
					
						
							| 
									
										
										
										
											2023-08-29 12:23:22 +01:00
										 |  |  |     tokenizer = Tokenizer(tokenize.generate_tokens(file.readline))  # type: ignore[arg-type] # typeshed issue #3515 | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |     parser = parser_class(tokenizer, verbose=verbose) | 
					
						
							|  |  |  |     result = parser.start() | 
					
						
							|  |  |  |     if result is None: | 
					
						
							| 
									
										
										
										
											2021-08-12 17:37:30 +01:00
										 |  |  |         raise parser.make_syntax_error("invalid syntax") | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |     return result | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def parse_string( | 
					
						
							|  |  |  |     source: str, parser_class: Type[Parser], *, dedent: bool = True, verbose: bool = False | 
					
						
							|  |  |  | ) -> Any: | 
					
						
							|  |  |  |     # Run the parser on a string. | 
					
						
							|  |  |  |     if dedent: | 
					
						
							|  |  |  |         source = textwrap.dedent(source) | 
					
						
							|  |  |  |     file = io.StringIO(source) | 
					
						
							| 
									
										
										
										
											2023-08-29 12:23:22 +01:00
										 |  |  |     return run_parser(file, parser_class, verbose=verbose)  # type: ignore[arg-type] # typeshed issue #3515 | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def make_parser(source: str) -> Type[Parser]: | 
					
						
							|  |  |  |     # Combine parse_string() and generate_parser(). | 
					
						
							|  |  |  |     grammar = parse_string(source, GrammarParser) | 
					
						
							|  |  |  |     return generate_parser(grammar) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def import_file(full_name: str, path: str) -> Any: | 
					
						
							|  |  |  |     """Import a python module from a path""" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     spec = importlib.util.spec_from_file_location(full_name, path) | 
					
						
							| 
									
										
										
										
											2021-08-12 17:37:30 +01:00
										 |  |  |     assert spec is not None | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |     mod = importlib.util.module_from_spec(spec) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # We assume this is not None and has an exec_module() method. | 
					
						
							|  |  |  |     # See https://docs.python.org/3/reference/import.html?highlight=exec_module#loading | 
					
						
							|  |  |  |     loader = cast(Any, spec.loader) | 
					
						
							|  |  |  |     loader.exec_module(mod) | 
					
						
							|  |  |  |     return mod | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def generate_c_parser_source(grammar: Grammar) -> str: | 
					
						
							|  |  |  |     out = io.StringIO() | 
					
						
							| 
									
										
										
										
											2020-05-01 23:14:12 +01:00
										 |  |  |     genr = CParserGenerator(grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, out) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |     genr.generate("<string>") | 
					
						
							|  |  |  |     return out.getvalue() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def generate_parser_c_extension( | 
					
						
							| 
									
										
										
										
											2023-06-01 17:24:15 +02:00
										 |  |  |     grammar: Grammar, | 
					
						
							|  |  |  |     path: pathlib.PurePath, | 
					
						
							|  |  |  |     debug: bool = False, | 
					
						
							| 
									
										
										
										
											2022-04-06 15:55:58 -06:00
										 |  |  |     library_dir: Optional[str] = None, | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | ) -> Any: | 
					
						
							|  |  |  |     """Generate a parser c extension for the given grammar in the given path
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Returns a module object with a parse_string() method. | 
					
						
							|  |  |  |     TODO: express that using a Protocol. | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     # Make sure that the working directory is empty: reusing non-empty temporary | 
					
						
							|  |  |  |     # directories when generating extensions can lead to segmentation faults. | 
					
						
							|  |  |  |     # Check issue #95 (https://github.com/gvanrossum/pegen/issues/95) for more | 
					
						
							|  |  |  |     # context. | 
					
						
							|  |  |  |     assert not os.listdir(path) | 
					
						
							|  |  |  |     source = path / "parse.c" | 
					
						
							| 
									
										
										
										
											2020-05-25 18:38:45 +01:00
										 |  |  |     with open(source, "w", encoding="utf-8") as file: | 
					
						
							| 
									
										
										
										
											2020-05-01 23:14:12 +01:00
										 |  |  |         genr = CParserGenerator( | 
					
						
							|  |  |  |             grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, file, debug=debug | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |         genr.generate("parse.c") | 
					
						
							| 
									
										
										
										
											2022-04-06 15:55:58 -06:00
										 |  |  |     compile_c_extension( | 
					
						
							|  |  |  |         str(source), | 
					
						
							|  |  |  |         build_dir=str(path), | 
					
						
							|  |  |  |         # Significant test_peg_generator speedups | 
					
						
							|  |  |  |         disable_optimization=True, | 
					
						
							|  |  |  |         library_dir=library_dir, | 
					
						
							|  |  |  |     ) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def print_memstats() -> bool: | 
					
						
							| 
									
										
										
										
											2023-06-01 17:24:15 +02:00
										 |  |  |     MiB: Final = 2**20 | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |     try: | 
					
						
							| 
									
										
										
										
											2023-08-29 20:14:08 +01:00
										 |  |  |         import psutil | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |     except ImportError: | 
					
						
							|  |  |  |         return False | 
					
						
							|  |  |  |     print("Memory stats:") | 
					
						
							|  |  |  |     process = psutil.Process() | 
					
						
							|  |  |  |     meminfo = process.memory_info() | 
					
						
							|  |  |  |     res = {} | 
					
						
							|  |  |  |     res["rss"] = meminfo.rss / MiB | 
					
						
							|  |  |  |     res["vms"] = meminfo.vms / MiB | 
					
						
							|  |  |  |     if sys.platform == "win32": | 
					
						
							|  |  |  |         res["maxrss"] = meminfo.peak_wset / MiB | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         # See https://stackoverflow.com/questions/938733/total-memory-used-by-python-process | 
					
						
							|  |  |  |         import resource  # Since it doesn't exist on Windows. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         rusage = resource.getrusage(resource.RUSAGE_SELF) | 
					
						
							|  |  |  |         if sys.platform == "darwin": | 
					
						
							|  |  |  |             factor = 1 | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             factor = 1024  # Linux | 
					
						
							|  |  |  |         res["maxrss"] = rusage.ru_maxrss * factor / MiB | 
					
						
							|  |  |  |     for key, value in res.items(): | 
					
						
							|  |  |  |         print(f"  {key:12.12s}: {value:10.0f} MiB") | 
					
						
							|  |  |  |     return True |