mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 05:31:20 +00:00 
			
		
		
		
	lib2to3.pgen3.driver.load_grammar() now creates a stable cache file
between runs given the same Grammar.txt input regardless of the hash randomization setting.
This commit is contained in:
		
						commit
						0c578d62fc
					
				
					 6 changed files with 116 additions and 16 deletions
				
			
		|  | @ -106,16 +106,19 @@ def parse_string(self, text, debug=False): | |||
|         return self.parse_tokens(tokens, debug) | ||||
| 
 | ||||
| 
 | ||||
| def _generate_pickle_name(gt): | ||||
|     head, tail = os.path.splitext(gt) | ||||
|     if tail == ".txt": | ||||
|         tail = "" | ||||
|     return head + tail + ".".join(map(str, sys.version_info)) + ".pickle" | ||||
| 
 | ||||
| 
 | ||||
| def load_grammar(gt="Grammar.txt", gp=None, | ||||
|                  save=True, force=False, logger=None): | ||||
|     """Load the grammar (maybe from a pickle).""" | ||||
|     if logger is None: | ||||
|         logger = logging.getLogger() | ||||
|     if gp is None: | ||||
|         head, tail = os.path.splitext(gt) | ||||
|         if tail == ".txt": | ||||
|             tail = "" | ||||
|         gp = head + tail + ".".join(map(str, sys.version_info)) + ".pickle" | ||||
|     gp = _generate_pickle_name(gt) if gp is None else gp | ||||
|     if force or not _newer(gp, gt): | ||||
|         logger.info("Generating grammar tables from %s", gt) | ||||
|         g = pgen.generate_grammar(gt) | ||||
|  | @ -124,7 +127,7 @@ def load_grammar(gt="Grammar.txt", gp=None, | |||
|             try: | ||||
|                 g.dump(gp) | ||||
|             except OSError as e: | ||||
|                 logger.info("Writing failed:"+str(e)) | ||||
|                 logger.info("Writing failed: %s", e) | ||||
|     else: | ||||
|         g = grammar.Grammar() | ||||
|         g.load(gp) | ||||
|  |  | |||
|  | @ -13,6 +13,7 @@ | |||
| """ | ||||
| 
 | ||||
| # Python imports | ||||
| import collections | ||||
| import pickle | ||||
| 
 | ||||
| # Local imports | ||||
|  | @ -85,9 +86,21 @@ def __init__(self): | |||
|         self.start = 256 | ||||
| 
 | ||||
|     def dump(self, filename): | ||||
|         """Dump the grammar tables to a pickle file.""" | ||||
|         """Dump the grammar tables to a pickle file. | ||||
| 
 | ||||
|         dump() recursively changes all dict to OrderedDict, so the pickled file | ||||
|         is not exactly the same as what was passed in to dump(). load() uses the | ||||
|         pickled file to create the tables, but  only changes OrderedDict to dict | ||||
|         at the top level; it does not recursively change OrderedDict to dict. | ||||
|         So, the loaded tables are different from the original tables that were | ||||
|         passed to load() in that some of the OrderedDict (from the pickled file) | ||||
|         are not changed back to dict. For parsing, this has no effect on | ||||
|         performance because OrderedDict uses dict's __getitem__ with nothing in | ||||
|         between. | ||||
|         """ | ||||
|         with open(filename, "wb") as f: | ||||
|             pickle.dump(self.__dict__, f, 2) | ||||
|             d = _make_deterministic(self.__dict__) | ||||
|             pickle.dump(d, f, 2) | ||||
| 
 | ||||
|     def load(self, filename): | ||||
|         """Load the grammar tables from a pickle file.""" | ||||
|  | @ -124,6 +137,17 @@ def report(self): | |||
|         print("start", self.start) | ||||
| 
 | ||||
| 
 | ||||
| def _make_deterministic(top): | ||||
|     if isinstance(top, dict): | ||||
|       return collections.OrderedDict( | ||||
|           sorted(((k, _make_deterministic(v)) for k, v in top.items()))) | ||||
|     if isinstance(top, list): | ||||
|       return [_make_deterministic(e) for e in top] | ||||
|     if isinstance(top, tuple): | ||||
|       return tuple(_make_deterministic(e) for e in top) | ||||
|     return top | ||||
| 
 | ||||
| 
 | ||||
| # Map from operator to number (since tokenize doesn't do this) | ||||
| 
 | ||||
| opmap_raw = """ | ||||
|  |  | |||
|  | @ -39,7 +39,7 @@ def make_grammar(self): | |||
|             states = [] | ||||
|             for state in dfa: | ||||
|                 arcs = [] | ||||
|                 for label, next in state.arcs.items(): | ||||
|                 for label, next in sorted(state.arcs.items()): | ||||
|                     arcs.append((self.make_label(c, label), dfa.index(next))) | ||||
|                 if state.isfinal: | ||||
|                     arcs.append((0, dfa.index(state))) | ||||
|  | @ -52,7 +52,7 @@ def make_grammar(self): | |||
|     def make_first(self, c, name): | ||||
|         rawfirst = self.first[name] | ||||
|         first = {} | ||||
|         for label in rawfirst: | ||||
|         for label in sorted(rawfirst): | ||||
|             ilabel = self.make_label(c, label) | ||||
|             ##assert ilabel not in first # XXX failed on <> ... != | ||||
|             first[ilabel] = 1 | ||||
|  | @ -192,7 +192,7 @@ def addclosure(state, base): | |||
|                 for label, next in nfastate.arcs: | ||||
|                     if label is not None: | ||||
|                         addclosure(next, arcs.setdefault(label, {})) | ||||
|             for label, nfaset in arcs.items(): | ||||
|             for label, nfaset in sorted(arcs.items()): | ||||
|                 for st in states: | ||||
|                     if st.nfaset == nfaset: | ||||
|                         break | ||||
|  | @ -222,7 +222,7 @@ def dump_dfa(self, name, dfa): | |||
|         print("Dump of DFA for", name) | ||||
|         for i, state in enumerate(dfa): | ||||
|             print("  State", i, state.isfinal and "(final)" or "") | ||||
|             for label, next in state.arcs.items(): | ||||
|             for label, next in sorted(state.arcs.items()): | ||||
|                 print("    %s -> %d" % (label, dfa.index(next))) | ||||
| 
 | ||||
|     def simplify_dfa(self, dfa): | ||||
|  |  | |||
|  | @ -9,13 +9,13 @@ | |||
| 
 | ||||
| # Local imports | ||||
| from lib2to3 import pytree, refactor | ||||
| from lib2to3.pgen2 import driver | ||||
| from lib2to3.pgen2 import driver as pgen2_driver | ||||
| 
 | ||||
| test_dir = os.path.dirname(__file__) | ||||
| proj_dir = os.path.normpath(os.path.join(test_dir, "..")) | ||||
| grammar_path = os.path.join(test_dir, "..", "Grammar.txt") | ||||
| grammar = driver.load_grammar(grammar_path) | ||||
| driver = driver.Driver(grammar, convert=pytree.convert) | ||||
| grammar = pgen2_driver.load_grammar(grammar_path) | ||||
| driver = pgen2_driver.Driver(grammar, convert=pytree.convert) | ||||
| 
 | ||||
| def parse_string(string): | ||||
|     return driver.parse_string(reformat(string), debug=True) | ||||
|  |  | |||
|  | @ -15,11 +15,15 @@ | |||
| 
 | ||||
| # Python imports | ||||
| import os | ||||
| import shutil | ||||
| import subprocess | ||||
| import sys | ||||
| import tempfile | ||||
| import unittest | ||||
| import warnings | ||||
| import subprocess | ||||
| 
 | ||||
| # Local imports | ||||
| from lib2to3.pgen2 import driver as pgen2_driver | ||||
| from lib2to3.pgen2 import tokenize | ||||
| from ..pgen2.parse import ParseError | ||||
| from lib2to3.pygram import python_symbols as syms | ||||
|  | @ -34,6 +38,71 @@ def test_formfeed(self): | |||
|         self.assertEqual(t.children[1].children[0].type, syms.print_stmt) | ||||
| 
 | ||||
| 
 | ||||
| class TestPgen2Caching(support.TestCase): | ||||
|     def test_load_grammar_from_txt_file(self): | ||||
|         pgen2_driver.load_grammar(support.grammar_path, save=False, force=True) | ||||
| 
 | ||||
|     def test_load_grammar_from_pickle(self): | ||||
|         # Make a copy of the grammar file in a temp directory we are | ||||
|         # guaranteed to be able to write to. | ||||
|         tmpdir = tempfile.mkdtemp() | ||||
|         try: | ||||
|             grammar_copy = os.path.join( | ||||
|                     tmpdir, os.path.basename(support.grammar_path)) | ||||
|             shutil.copy(support.grammar_path, grammar_copy) | ||||
|             pickle_name = pgen2_driver._generate_pickle_name(grammar_copy) | ||||
| 
 | ||||
|             pgen2_driver.load_grammar(grammar_copy, save=True, force=True) | ||||
|             self.assertTrue(os.path.exists(pickle_name)) | ||||
| 
 | ||||
|             os.unlink(grammar_copy)  # Only the pickle remains... | ||||
|             pgen2_driver.load_grammar(grammar_copy, save=False, force=False) | ||||
|         finally: | ||||
|             shutil.rmtree(tmpdir) | ||||
| 
 | ||||
|     @unittest.skipIf(sys.executable is None, 'sys.executable required') | ||||
|     def test_load_grammar_from_subprocess(self): | ||||
|         tmpdir = tempfile.mkdtemp() | ||||
|         tmpsubdir = os.path.join(tmpdir, 'subdir') | ||||
|         try: | ||||
|             os.mkdir(tmpsubdir) | ||||
|             grammar_base = os.path.basename(support.grammar_path) | ||||
|             grammar_copy = os.path.join(tmpdir, grammar_base) | ||||
|             grammar_sub_copy = os.path.join(tmpsubdir, grammar_base) | ||||
|             shutil.copy(support.grammar_path, grammar_copy) | ||||
|             shutil.copy(support.grammar_path, grammar_sub_copy) | ||||
|             pickle_name = pgen2_driver._generate_pickle_name(grammar_copy) | ||||
|             pickle_sub_name = pgen2_driver._generate_pickle_name( | ||||
|                      grammar_sub_copy) | ||||
|             self.assertNotEqual(pickle_name, pickle_sub_name) | ||||
| 
 | ||||
|             # Generate a pickle file from this process. | ||||
|             pgen2_driver.load_grammar(grammar_copy, save=True, force=True) | ||||
|             self.assertTrue(os.path.exists(pickle_name)) | ||||
| 
 | ||||
|             # Generate a new pickle file in a subprocess with a most likely | ||||
|             # different hash randomization seed. | ||||
|             sub_env = dict(os.environ) | ||||
|             sub_env['PYTHONHASHSEED'] = 'random' | ||||
|             subprocess.check_call( | ||||
|                     [sys.executable, '-c', """ | ||||
| from lib2to3.pgen2 import driver as pgen2_driver | ||||
| pgen2_driver.load_grammar(%r, save=True, force=True) | ||||
|                     """ % (grammar_sub_copy,)], | ||||
|                     env=sub_env) | ||||
|             self.assertTrue(os.path.exists(pickle_sub_name)) | ||||
| 
 | ||||
|             with open(pickle_name, 'rb') as pickle_f_1, \ | ||||
|                     open(pickle_sub_name, 'rb') as pickle_f_2: | ||||
|                 self.assertEqual( | ||||
|                     pickle_f_1.read(), pickle_f_2.read(), | ||||
|                     msg='Grammar caches generated using different hash seeds' | ||||
|                     ' were not identical.') | ||||
|         finally: | ||||
|             shutil.rmtree(tmpdir) | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| class GrammarTest(support.TestCase): | ||||
|     def validate(self, code): | ||||
|         support.parse_string(code) | ||||
|  |  | |||
|  | @ -99,6 +99,10 @@ Core and Builtins | |||
| Library | ||||
| ------- | ||||
| 
 | ||||
| - lib2to3.pgen3.driver.load_grammar() now creates a stable cache file | ||||
|   between runs given the same Grammar.txt input regardless of the hash | ||||
|   randomization setting. | ||||
| 
 | ||||
| - Issue #28005: Allow ImportErrors in encoding implementation to propagate. | ||||
| 
 | ||||
| - Issue #27570: Avoid zero-length memcpy() etc calls with null source | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Gregory P. Smith ext:(%20%5BGoogle%20Inc.%5D)
						Gregory P. Smith ext:(%20%5BGoogle%20Inc.%5D)