mirror of
https://github.com/python/cpython.git
synced 2025-11-01 14:11:41 +00:00
lib2to3.pgen3.driver.load_grammar() now creates a stable cache file
between runs given the same Grammar.txt input regardless of the hash randomization setting.
This commit is contained in:
commit
0c578d62fc
6 changed files with 116 additions and 16 deletions
|
|
@ -106,16 +106,19 @@ def parse_string(self, text, debug=False):
|
||||||
return self.parse_tokens(tokens, debug)
|
return self.parse_tokens(tokens, debug)
|
||||||
|
|
||||||
|
|
||||||
|
def _generate_pickle_name(gt):
|
||||||
|
head, tail = os.path.splitext(gt)
|
||||||
|
if tail == ".txt":
|
||||||
|
tail = ""
|
||||||
|
return head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
|
||||||
|
|
||||||
|
|
||||||
def load_grammar(gt="Grammar.txt", gp=None,
|
def load_grammar(gt="Grammar.txt", gp=None,
|
||||||
save=True, force=False, logger=None):
|
save=True, force=False, logger=None):
|
||||||
"""Load the grammar (maybe from a pickle)."""
|
"""Load the grammar (maybe from a pickle)."""
|
||||||
if logger is None:
|
if logger is None:
|
||||||
logger = logging.getLogger()
|
logger = logging.getLogger()
|
||||||
if gp is None:
|
gp = _generate_pickle_name(gt) if gp is None else gp
|
||||||
head, tail = os.path.splitext(gt)
|
|
||||||
if tail == ".txt":
|
|
||||||
tail = ""
|
|
||||||
gp = head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
|
|
||||||
if force or not _newer(gp, gt):
|
if force or not _newer(gp, gt):
|
||||||
logger.info("Generating grammar tables from %s", gt)
|
logger.info("Generating grammar tables from %s", gt)
|
||||||
g = pgen.generate_grammar(gt)
|
g = pgen.generate_grammar(gt)
|
||||||
|
|
@ -124,7 +127,7 @@ def load_grammar(gt="Grammar.txt", gp=None,
|
||||||
try:
|
try:
|
||||||
g.dump(gp)
|
g.dump(gp)
|
||||||
except OSError as e:
|
except OSError as e:
|
||||||
logger.info("Writing failed:"+str(e))
|
logger.info("Writing failed: %s", e)
|
||||||
else:
|
else:
|
||||||
g = grammar.Grammar()
|
g = grammar.Grammar()
|
||||||
g.load(gp)
|
g.load(gp)
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,7 @@
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Python imports
|
# Python imports
|
||||||
|
import collections
|
||||||
import pickle
|
import pickle
|
||||||
|
|
||||||
# Local imports
|
# Local imports
|
||||||
|
|
@ -85,9 +86,21 @@ def __init__(self):
|
||||||
self.start = 256
|
self.start = 256
|
||||||
|
|
||||||
def dump(self, filename):
|
def dump(self, filename):
|
||||||
"""Dump the grammar tables to a pickle file."""
|
"""Dump the grammar tables to a pickle file.
|
||||||
|
|
||||||
|
dump() recursively changes all dict to OrderedDict, so the pickled file
|
||||||
|
is not exactly the same as what was passed in to dump(). load() uses the
|
||||||
|
pickled file to create the tables, but only changes OrderedDict to dict
|
||||||
|
at the top level; it does not recursively change OrderedDict to dict.
|
||||||
|
So, the loaded tables are different from the original tables that were
|
||||||
|
passed to load() in that some of the OrderedDict (from the pickled file)
|
||||||
|
are not changed back to dict. For parsing, this has no effect on
|
||||||
|
performance because OrderedDict uses dict's __getitem__ with nothing in
|
||||||
|
between.
|
||||||
|
"""
|
||||||
with open(filename, "wb") as f:
|
with open(filename, "wb") as f:
|
||||||
pickle.dump(self.__dict__, f, 2)
|
d = _make_deterministic(self.__dict__)
|
||||||
|
pickle.dump(d, f, 2)
|
||||||
|
|
||||||
def load(self, filename):
|
def load(self, filename):
|
||||||
"""Load the grammar tables from a pickle file."""
|
"""Load the grammar tables from a pickle file."""
|
||||||
|
|
@ -124,6 +137,17 @@ def report(self):
|
||||||
print("start", self.start)
|
print("start", self.start)
|
||||||
|
|
||||||
|
|
||||||
|
def _make_deterministic(top):
|
||||||
|
if isinstance(top, dict):
|
||||||
|
return collections.OrderedDict(
|
||||||
|
sorted(((k, _make_deterministic(v)) for k, v in top.items())))
|
||||||
|
if isinstance(top, list):
|
||||||
|
return [_make_deterministic(e) for e in top]
|
||||||
|
if isinstance(top, tuple):
|
||||||
|
return tuple(_make_deterministic(e) for e in top)
|
||||||
|
return top
|
||||||
|
|
||||||
|
|
||||||
# Map from operator to number (since tokenize doesn't do this)
|
# Map from operator to number (since tokenize doesn't do this)
|
||||||
|
|
||||||
opmap_raw = """
|
opmap_raw = """
|
||||||
|
|
|
||||||
|
|
@ -39,7 +39,7 @@ def make_grammar(self):
|
||||||
states = []
|
states = []
|
||||||
for state in dfa:
|
for state in dfa:
|
||||||
arcs = []
|
arcs = []
|
||||||
for label, next in state.arcs.items():
|
for label, next in sorted(state.arcs.items()):
|
||||||
arcs.append((self.make_label(c, label), dfa.index(next)))
|
arcs.append((self.make_label(c, label), dfa.index(next)))
|
||||||
if state.isfinal:
|
if state.isfinal:
|
||||||
arcs.append((0, dfa.index(state)))
|
arcs.append((0, dfa.index(state)))
|
||||||
|
|
@ -52,7 +52,7 @@ def make_grammar(self):
|
||||||
def make_first(self, c, name):
|
def make_first(self, c, name):
|
||||||
rawfirst = self.first[name]
|
rawfirst = self.first[name]
|
||||||
first = {}
|
first = {}
|
||||||
for label in rawfirst:
|
for label in sorted(rawfirst):
|
||||||
ilabel = self.make_label(c, label)
|
ilabel = self.make_label(c, label)
|
||||||
##assert ilabel not in first # XXX failed on <> ... !=
|
##assert ilabel not in first # XXX failed on <> ... !=
|
||||||
first[ilabel] = 1
|
first[ilabel] = 1
|
||||||
|
|
@ -192,7 +192,7 @@ def addclosure(state, base):
|
||||||
for label, next in nfastate.arcs:
|
for label, next in nfastate.arcs:
|
||||||
if label is not None:
|
if label is not None:
|
||||||
addclosure(next, arcs.setdefault(label, {}))
|
addclosure(next, arcs.setdefault(label, {}))
|
||||||
for label, nfaset in arcs.items():
|
for label, nfaset in sorted(arcs.items()):
|
||||||
for st in states:
|
for st in states:
|
||||||
if st.nfaset == nfaset:
|
if st.nfaset == nfaset:
|
||||||
break
|
break
|
||||||
|
|
@ -222,7 +222,7 @@ def dump_dfa(self, name, dfa):
|
||||||
print("Dump of DFA for", name)
|
print("Dump of DFA for", name)
|
||||||
for i, state in enumerate(dfa):
|
for i, state in enumerate(dfa):
|
||||||
print(" State", i, state.isfinal and "(final)" or "")
|
print(" State", i, state.isfinal and "(final)" or "")
|
||||||
for label, next in state.arcs.items():
|
for label, next in sorted(state.arcs.items()):
|
||||||
print(" %s -> %d" % (label, dfa.index(next)))
|
print(" %s -> %d" % (label, dfa.index(next)))
|
||||||
|
|
||||||
def simplify_dfa(self, dfa):
|
def simplify_dfa(self, dfa):
|
||||||
|
|
|
||||||
|
|
@ -9,13 +9,13 @@
|
||||||
|
|
||||||
# Local imports
|
# Local imports
|
||||||
from lib2to3 import pytree, refactor
|
from lib2to3 import pytree, refactor
|
||||||
from lib2to3.pgen2 import driver
|
from lib2to3.pgen2 import driver as pgen2_driver
|
||||||
|
|
||||||
test_dir = os.path.dirname(__file__)
|
test_dir = os.path.dirname(__file__)
|
||||||
proj_dir = os.path.normpath(os.path.join(test_dir, ".."))
|
proj_dir = os.path.normpath(os.path.join(test_dir, ".."))
|
||||||
grammar_path = os.path.join(test_dir, "..", "Grammar.txt")
|
grammar_path = os.path.join(test_dir, "..", "Grammar.txt")
|
||||||
grammar = driver.load_grammar(grammar_path)
|
grammar = pgen2_driver.load_grammar(grammar_path)
|
||||||
driver = driver.Driver(grammar, convert=pytree.convert)
|
driver = pgen2_driver.Driver(grammar, convert=pytree.convert)
|
||||||
|
|
||||||
def parse_string(string):
|
def parse_string(string):
|
||||||
return driver.parse_string(reformat(string), debug=True)
|
return driver.parse_string(reformat(string), debug=True)
|
||||||
|
|
|
||||||
|
|
@ -15,11 +15,15 @@
|
||||||
|
|
||||||
# Python imports
|
# Python imports
|
||||||
import os
|
import os
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
import unittest
|
import unittest
|
||||||
import warnings
|
import warnings
|
||||||
import subprocess
|
|
||||||
|
|
||||||
# Local imports
|
# Local imports
|
||||||
|
from lib2to3.pgen2 import driver as pgen2_driver
|
||||||
from lib2to3.pgen2 import tokenize
|
from lib2to3.pgen2 import tokenize
|
||||||
from ..pgen2.parse import ParseError
|
from ..pgen2.parse import ParseError
|
||||||
from lib2to3.pygram import python_symbols as syms
|
from lib2to3.pygram import python_symbols as syms
|
||||||
|
|
@ -34,6 +38,71 @@ def test_formfeed(self):
|
||||||
self.assertEqual(t.children[1].children[0].type, syms.print_stmt)
|
self.assertEqual(t.children[1].children[0].type, syms.print_stmt)
|
||||||
|
|
||||||
|
|
||||||
|
class TestPgen2Caching(support.TestCase):
|
||||||
|
def test_load_grammar_from_txt_file(self):
|
||||||
|
pgen2_driver.load_grammar(support.grammar_path, save=False, force=True)
|
||||||
|
|
||||||
|
def test_load_grammar_from_pickle(self):
|
||||||
|
# Make a copy of the grammar file in a temp directory we are
|
||||||
|
# guaranteed to be able to write to.
|
||||||
|
tmpdir = tempfile.mkdtemp()
|
||||||
|
try:
|
||||||
|
grammar_copy = os.path.join(
|
||||||
|
tmpdir, os.path.basename(support.grammar_path))
|
||||||
|
shutil.copy(support.grammar_path, grammar_copy)
|
||||||
|
pickle_name = pgen2_driver._generate_pickle_name(grammar_copy)
|
||||||
|
|
||||||
|
pgen2_driver.load_grammar(grammar_copy, save=True, force=True)
|
||||||
|
self.assertTrue(os.path.exists(pickle_name))
|
||||||
|
|
||||||
|
os.unlink(grammar_copy) # Only the pickle remains...
|
||||||
|
pgen2_driver.load_grammar(grammar_copy, save=False, force=False)
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(tmpdir)
|
||||||
|
|
||||||
|
@unittest.skipIf(sys.executable is None, 'sys.executable required')
|
||||||
|
def test_load_grammar_from_subprocess(self):
|
||||||
|
tmpdir = tempfile.mkdtemp()
|
||||||
|
tmpsubdir = os.path.join(tmpdir, 'subdir')
|
||||||
|
try:
|
||||||
|
os.mkdir(tmpsubdir)
|
||||||
|
grammar_base = os.path.basename(support.grammar_path)
|
||||||
|
grammar_copy = os.path.join(tmpdir, grammar_base)
|
||||||
|
grammar_sub_copy = os.path.join(tmpsubdir, grammar_base)
|
||||||
|
shutil.copy(support.grammar_path, grammar_copy)
|
||||||
|
shutil.copy(support.grammar_path, grammar_sub_copy)
|
||||||
|
pickle_name = pgen2_driver._generate_pickle_name(grammar_copy)
|
||||||
|
pickle_sub_name = pgen2_driver._generate_pickle_name(
|
||||||
|
grammar_sub_copy)
|
||||||
|
self.assertNotEqual(pickle_name, pickle_sub_name)
|
||||||
|
|
||||||
|
# Generate a pickle file from this process.
|
||||||
|
pgen2_driver.load_grammar(grammar_copy, save=True, force=True)
|
||||||
|
self.assertTrue(os.path.exists(pickle_name))
|
||||||
|
|
||||||
|
# Generate a new pickle file in a subprocess with a most likely
|
||||||
|
# different hash randomization seed.
|
||||||
|
sub_env = dict(os.environ)
|
||||||
|
sub_env['PYTHONHASHSEED'] = 'random'
|
||||||
|
subprocess.check_call(
|
||||||
|
[sys.executable, '-c', """
|
||||||
|
from lib2to3.pgen2 import driver as pgen2_driver
|
||||||
|
pgen2_driver.load_grammar(%r, save=True, force=True)
|
||||||
|
""" % (grammar_sub_copy,)],
|
||||||
|
env=sub_env)
|
||||||
|
self.assertTrue(os.path.exists(pickle_sub_name))
|
||||||
|
|
||||||
|
with open(pickle_name, 'rb') as pickle_f_1, \
|
||||||
|
open(pickle_sub_name, 'rb') as pickle_f_2:
|
||||||
|
self.assertEqual(
|
||||||
|
pickle_f_1.read(), pickle_f_2.read(),
|
||||||
|
msg='Grammar caches generated using different hash seeds'
|
||||||
|
' were not identical.')
|
||||||
|
finally:
|
||||||
|
shutil.rmtree(tmpdir)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class GrammarTest(support.TestCase):
|
class GrammarTest(support.TestCase):
|
||||||
def validate(self, code):
|
def validate(self, code):
|
||||||
support.parse_string(code)
|
support.parse_string(code)
|
||||||
|
|
|
||||||
|
|
@ -99,6 +99,10 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- lib2to3.pgen3.driver.load_grammar() now creates a stable cache file
|
||||||
|
between runs given the same Grammar.txt input regardless of the hash
|
||||||
|
randomization setting.
|
||||||
|
|
||||||
- Issue #28005: Allow ImportErrors in encoding implementation to propagate.
|
- Issue #28005: Allow ImportErrors in encoding implementation to propagate.
|
||||||
|
|
||||||
- Issue #27570: Avoid zero-length memcpy() etc calls with null source
|
- Issue #27570: Avoid zero-length memcpy() etc calls with null source
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue