mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 05:31:20 +00:00 
			
		
		
		
	lib2to3.pgen3.driver.load_grammar() now creates a stable cache file
between runs given the same Grammar.txt input regardless of the hash randomization setting.
This commit is contained in:
		
						commit
						0c578d62fc
					
				
					 6 changed files with 116 additions and 16 deletions
				
			
		|  | @ -106,16 +106,19 @@ def parse_string(self, text, debug=False): | ||||||
|         return self.parse_tokens(tokens, debug) |         return self.parse_tokens(tokens, debug) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def _generate_pickle_name(gt): | ||||||
|  |     head, tail = os.path.splitext(gt) | ||||||
|  |     if tail == ".txt": | ||||||
|  |         tail = "" | ||||||
|  |     return head + tail + ".".join(map(str, sys.version_info)) + ".pickle" | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def load_grammar(gt="Grammar.txt", gp=None, | def load_grammar(gt="Grammar.txt", gp=None, | ||||||
|                  save=True, force=False, logger=None): |                  save=True, force=False, logger=None): | ||||||
|     """Load the grammar (maybe from a pickle).""" |     """Load the grammar (maybe from a pickle).""" | ||||||
|     if logger is None: |     if logger is None: | ||||||
|         logger = logging.getLogger() |         logger = logging.getLogger() | ||||||
|     if gp is None: |     gp = _generate_pickle_name(gt) if gp is None else gp | ||||||
|         head, tail = os.path.splitext(gt) |  | ||||||
|         if tail == ".txt": |  | ||||||
|             tail = "" |  | ||||||
|         gp = head + tail + ".".join(map(str, sys.version_info)) + ".pickle" |  | ||||||
|     if force or not _newer(gp, gt): |     if force or not _newer(gp, gt): | ||||||
|         logger.info("Generating grammar tables from %s", gt) |         logger.info("Generating grammar tables from %s", gt) | ||||||
|         g = pgen.generate_grammar(gt) |         g = pgen.generate_grammar(gt) | ||||||
|  | @ -124,7 +127,7 @@ def load_grammar(gt="Grammar.txt", gp=None, | ||||||
|             try: |             try: | ||||||
|                 g.dump(gp) |                 g.dump(gp) | ||||||
|             except OSError as e: |             except OSError as e: | ||||||
|                 logger.info("Writing failed:"+str(e)) |                 logger.info("Writing failed: %s", e) | ||||||
|     else: |     else: | ||||||
|         g = grammar.Grammar() |         g = grammar.Grammar() | ||||||
|         g.load(gp) |         g.load(gp) | ||||||
|  |  | ||||||
|  | @ -13,6 +13,7 @@ | ||||||
| """ | """ | ||||||
| 
 | 
 | ||||||
| # Python imports | # Python imports | ||||||
|  | import collections | ||||||
| import pickle | import pickle | ||||||
| 
 | 
 | ||||||
| # Local imports | # Local imports | ||||||
|  | @ -85,9 +86,21 @@ def __init__(self): | ||||||
|         self.start = 256 |         self.start = 256 | ||||||
| 
 | 
 | ||||||
|     def dump(self, filename): |     def dump(self, filename): | ||||||
|         """Dump the grammar tables to a pickle file.""" |         """Dump the grammar tables to a pickle file. | ||||||
|  | 
 | ||||||
|  |         dump() recursively changes all dict to OrderedDict, so the pickled file | ||||||
|  |         is not exactly the same as what was passed in to dump(). load() uses the | ||||||
|  |         pickled file to create the tables, but  only changes OrderedDict to dict | ||||||
|  |         at the top level; it does not recursively change OrderedDict to dict. | ||||||
|  |         So, the loaded tables are different from the original tables that were | ||||||
|  |         passed to load() in that some of the OrderedDict (from the pickled file) | ||||||
|  |         are not changed back to dict. For parsing, this has no effect on | ||||||
|  |         performance because OrderedDict uses dict's __getitem__ with nothing in | ||||||
|  |         between. | ||||||
|  |         """ | ||||||
|         with open(filename, "wb") as f: |         with open(filename, "wb") as f: | ||||||
|             pickle.dump(self.__dict__, f, 2) |             d = _make_deterministic(self.__dict__) | ||||||
|  |             pickle.dump(d, f, 2) | ||||||
| 
 | 
 | ||||||
|     def load(self, filename): |     def load(self, filename): | ||||||
|         """Load the grammar tables from a pickle file.""" |         """Load the grammar tables from a pickle file.""" | ||||||
|  | @ -124,6 +137,17 @@ def report(self): | ||||||
|         print("start", self.start) |         print("start", self.start) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def _make_deterministic(top): | ||||||
|  |     if isinstance(top, dict): | ||||||
|  |       return collections.OrderedDict( | ||||||
|  |           sorted(((k, _make_deterministic(v)) for k, v in top.items()))) | ||||||
|  |     if isinstance(top, list): | ||||||
|  |       return [_make_deterministic(e) for e in top] | ||||||
|  |     if isinstance(top, tuple): | ||||||
|  |       return tuple(_make_deterministic(e) for e in top) | ||||||
|  |     return top | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| # Map from operator to number (since tokenize doesn't do this) | # Map from operator to number (since tokenize doesn't do this) | ||||||
| 
 | 
 | ||||||
| opmap_raw = """ | opmap_raw = """ | ||||||
|  |  | ||||||
|  | @ -39,7 +39,7 @@ def make_grammar(self): | ||||||
|             states = [] |             states = [] | ||||||
|             for state in dfa: |             for state in dfa: | ||||||
|                 arcs = [] |                 arcs = [] | ||||||
|                 for label, next in state.arcs.items(): |                 for label, next in sorted(state.arcs.items()): | ||||||
|                     arcs.append((self.make_label(c, label), dfa.index(next))) |                     arcs.append((self.make_label(c, label), dfa.index(next))) | ||||||
|                 if state.isfinal: |                 if state.isfinal: | ||||||
|                     arcs.append((0, dfa.index(state))) |                     arcs.append((0, dfa.index(state))) | ||||||
|  | @ -52,7 +52,7 @@ def make_grammar(self): | ||||||
|     def make_first(self, c, name): |     def make_first(self, c, name): | ||||||
|         rawfirst = self.first[name] |         rawfirst = self.first[name] | ||||||
|         first = {} |         first = {} | ||||||
|         for label in rawfirst: |         for label in sorted(rawfirst): | ||||||
|             ilabel = self.make_label(c, label) |             ilabel = self.make_label(c, label) | ||||||
|             ##assert ilabel not in first # XXX failed on <> ... != |             ##assert ilabel not in first # XXX failed on <> ... != | ||||||
|             first[ilabel] = 1 |             first[ilabel] = 1 | ||||||
|  | @ -192,7 +192,7 @@ def addclosure(state, base): | ||||||
|                 for label, next in nfastate.arcs: |                 for label, next in nfastate.arcs: | ||||||
|                     if label is not None: |                     if label is not None: | ||||||
|                         addclosure(next, arcs.setdefault(label, {})) |                         addclosure(next, arcs.setdefault(label, {})) | ||||||
|             for label, nfaset in arcs.items(): |             for label, nfaset in sorted(arcs.items()): | ||||||
|                 for st in states: |                 for st in states: | ||||||
|                     if st.nfaset == nfaset: |                     if st.nfaset == nfaset: | ||||||
|                         break |                         break | ||||||
|  | @ -222,7 +222,7 @@ def dump_dfa(self, name, dfa): | ||||||
|         print("Dump of DFA for", name) |         print("Dump of DFA for", name) | ||||||
|         for i, state in enumerate(dfa): |         for i, state in enumerate(dfa): | ||||||
|             print("  State", i, state.isfinal and "(final)" or "") |             print("  State", i, state.isfinal and "(final)" or "") | ||||||
|             for label, next in state.arcs.items(): |             for label, next in sorted(state.arcs.items()): | ||||||
|                 print("    %s -> %d" % (label, dfa.index(next))) |                 print("    %s -> %d" % (label, dfa.index(next))) | ||||||
| 
 | 
 | ||||||
|     def simplify_dfa(self, dfa): |     def simplify_dfa(self, dfa): | ||||||
|  |  | ||||||
|  | @ -9,13 +9,13 @@ | ||||||
| 
 | 
 | ||||||
| # Local imports | # Local imports | ||||||
| from lib2to3 import pytree, refactor | from lib2to3 import pytree, refactor | ||||||
| from lib2to3.pgen2 import driver | from lib2to3.pgen2 import driver as pgen2_driver | ||||||
| 
 | 
 | ||||||
| test_dir = os.path.dirname(__file__) | test_dir = os.path.dirname(__file__) | ||||||
| proj_dir = os.path.normpath(os.path.join(test_dir, "..")) | proj_dir = os.path.normpath(os.path.join(test_dir, "..")) | ||||||
| grammar_path = os.path.join(test_dir, "..", "Grammar.txt") | grammar_path = os.path.join(test_dir, "..", "Grammar.txt") | ||||||
| grammar = driver.load_grammar(grammar_path) | grammar = pgen2_driver.load_grammar(grammar_path) | ||||||
| driver = driver.Driver(grammar, convert=pytree.convert) | driver = pgen2_driver.Driver(grammar, convert=pytree.convert) | ||||||
| 
 | 
 | ||||||
| def parse_string(string): | def parse_string(string): | ||||||
|     return driver.parse_string(reformat(string), debug=True) |     return driver.parse_string(reformat(string), debug=True) | ||||||
|  |  | ||||||
|  | @ -15,11 +15,15 @@ | ||||||
| 
 | 
 | ||||||
| # Python imports | # Python imports | ||||||
| import os | import os | ||||||
|  | import shutil | ||||||
|  | import subprocess | ||||||
|  | import sys | ||||||
|  | import tempfile | ||||||
| import unittest | import unittest | ||||||
| import warnings | import warnings | ||||||
| import subprocess |  | ||||||
| 
 | 
 | ||||||
| # Local imports | # Local imports | ||||||
|  | from lib2to3.pgen2 import driver as pgen2_driver | ||||||
| from lib2to3.pgen2 import tokenize | from lib2to3.pgen2 import tokenize | ||||||
| from ..pgen2.parse import ParseError | from ..pgen2.parse import ParseError | ||||||
| from lib2to3.pygram import python_symbols as syms | from lib2to3.pygram import python_symbols as syms | ||||||
|  | @ -34,6 +38,71 @@ def test_formfeed(self): | ||||||
|         self.assertEqual(t.children[1].children[0].type, syms.print_stmt) |         self.assertEqual(t.children[1].children[0].type, syms.print_stmt) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | class TestPgen2Caching(support.TestCase): | ||||||
|  |     def test_load_grammar_from_txt_file(self): | ||||||
|  |         pgen2_driver.load_grammar(support.grammar_path, save=False, force=True) | ||||||
|  | 
 | ||||||
|  |     def test_load_grammar_from_pickle(self): | ||||||
|  |         # Make a copy of the grammar file in a temp directory we are | ||||||
|  |         # guaranteed to be able to write to. | ||||||
|  |         tmpdir = tempfile.mkdtemp() | ||||||
|  |         try: | ||||||
|  |             grammar_copy = os.path.join( | ||||||
|  |                     tmpdir, os.path.basename(support.grammar_path)) | ||||||
|  |             shutil.copy(support.grammar_path, grammar_copy) | ||||||
|  |             pickle_name = pgen2_driver._generate_pickle_name(grammar_copy) | ||||||
|  | 
 | ||||||
|  |             pgen2_driver.load_grammar(grammar_copy, save=True, force=True) | ||||||
|  |             self.assertTrue(os.path.exists(pickle_name)) | ||||||
|  | 
 | ||||||
|  |             os.unlink(grammar_copy)  # Only the pickle remains... | ||||||
|  |             pgen2_driver.load_grammar(grammar_copy, save=False, force=False) | ||||||
|  |         finally: | ||||||
|  |             shutil.rmtree(tmpdir) | ||||||
|  | 
 | ||||||
|  |     @unittest.skipIf(sys.executable is None, 'sys.executable required') | ||||||
|  |     def test_load_grammar_from_subprocess(self): | ||||||
|  |         tmpdir = tempfile.mkdtemp() | ||||||
|  |         tmpsubdir = os.path.join(tmpdir, 'subdir') | ||||||
|  |         try: | ||||||
|  |             os.mkdir(tmpsubdir) | ||||||
|  |             grammar_base = os.path.basename(support.grammar_path) | ||||||
|  |             grammar_copy = os.path.join(tmpdir, grammar_base) | ||||||
|  |             grammar_sub_copy = os.path.join(tmpsubdir, grammar_base) | ||||||
|  |             shutil.copy(support.grammar_path, grammar_copy) | ||||||
|  |             shutil.copy(support.grammar_path, grammar_sub_copy) | ||||||
|  |             pickle_name = pgen2_driver._generate_pickle_name(grammar_copy) | ||||||
|  |             pickle_sub_name = pgen2_driver._generate_pickle_name( | ||||||
|  |                      grammar_sub_copy) | ||||||
|  |             self.assertNotEqual(pickle_name, pickle_sub_name) | ||||||
|  | 
 | ||||||
|  |             # Generate a pickle file from this process. | ||||||
|  |             pgen2_driver.load_grammar(grammar_copy, save=True, force=True) | ||||||
|  |             self.assertTrue(os.path.exists(pickle_name)) | ||||||
|  | 
 | ||||||
|  |             # Generate a new pickle file in a subprocess with a most likely | ||||||
|  |             # different hash randomization seed. | ||||||
|  |             sub_env = dict(os.environ) | ||||||
|  |             sub_env['PYTHONHASHSEED'] = 'random' | ||||||
|  |             subprocess.check_call( | ||||||
|  |                     [sys.executable, '-c', """ | ||||||
|  | from lib2to3.pgen2 import driver as pgen2_driver | ||||||
|  | pgen2_driver.load_grammar(%r, save=True, force=True) | ||||||
|  |                     """ % (grammar_sub_copy,)], | ||||||
|  |                     env=sub_env) | ||||||
|  |             self.assertTrue(os.path.exists(pickle_sub_name)) | ||||||
|  | 
 | ||||||
|  |             with open(pickle_name, 'rb') as pickle_f_1, \ | ||||||
|  |                     open(pickle_sub_name, 'rb') as pickle_f_2: | ||||||
|  |                 self.assertEqual( | ||||||
|  |                     pickle_f_1.read(), pickle_f_2.read(), | ||||||
|  |                     msg='Grammar caches generated using different hash seeds' | ||||||
|  |                     ' were not identical.') | ||||||
|  |         finally: | ||||||
|  |             shutil.rmtree(tmpdir) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| class GrammarTest(support.TestCase): | class GrammarTest(support.TestCase): | ||||||
|     def validate(self, code): |     def validate(self, code): | ||||||
|         support.parse_string(code) |         support.parse_string(code) | ||||||
|  |  | ||||||
|  | @ -99,6 +99,10 @@ Core and Builtins | ||||||
| Library | Library | ||||||
| ------- | ------- | ||||||
| 
 | 
 | ||||||
|  | - lib2to3.pgen3.driver.load_grammar() now creates a stable cache file | ||||||
|  |   between runs given the same Grammar.txt input regardless of the hash | ||||||
|  |   randomization setting. | ||||||
|  | 
 | ||||||
| - Issue #28005: Allow ImportErrors in encoding implementation to propagate. | - Issue #28005: Allow ImportErrors in encoding implementation to propagate. | ||||||
| 
 | 
 | ||||||
| - Issue #27570: Avoid zero-length memcpy() etc calls with null source | - Issue #27570: Avoid zero-length memcpy() etc calls with null source | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Gregory P. Smith ext:(%20%5BGoogle%20Inc.%5D)
						Gregory P. Smith ext:(%20%5BGoogle%20Inc.%5D)