mirror of
https://github.com/python/cpython.git
synced 2026-01-06 07:22:09 +00:00
closes bpo-31650: PEP 552 (Deterministic pycs) implementation (#4575)
Python now supports checking bytecode cache up-to-dateness with a hash of the source contents rather than volatile source metadata. See the PEP for details. While a fairly straightforward idea, quite a lot of code had to be modified due to the pervasiveness of pyc implementation details in the codebase. Changes in this commit include: - The core changes to importlib to understand how to read, validate, and regenerate hash-based pycs. - Support for generating hash-based pycs in py_compile and compileall. - Modifications to our siphash implementation to support passing a custom key. We then expose it to importlib through _imp. - Updates to all places in the interpreter, standard library, and tests that manually generate or parse pyc files to grok the new format. - Support in the interpreter command line code for long options like --check-hash-based-pycs. - Tests and documentation for all of the above.
This commit is contained in:
parent
28d8d14013
commit
42aa93b8ff
33 changed files with 3364 additions and 2505 deletions
|
|
@ -5,6 +5,8 @@
|
|||
|
||||
#include "Python-ast.h"
|
||||
#undef Yield /* undefine macro conflicting with winbase.h */
|
||||
#include "internal/hash.h"
|
||||
#include "internal/import.h"
|
||||
#include "internal/pystate.h"
|
||||
#include "errcode.h"
|
||||
#include "marshal.h"
|
||||
|
|
@ -2184,6 +2186,34 @@ _imp_exec_builtin_impl(PyObject *module, PyObject *mod)
|
|||
return exec_builtin_or_dynamic(mod);
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
_imp.source_hash
|
||||
|
||||
key: long
|
||||
source: Py_buffer
|
||||
[clinic start generated code]*/
|
||||
|
||||
static PyObject *
|
||||
_imp_source_hash_impl(PyObject *module, long key, Py_buffer *source)
|
||||
/*[clinic end generated code: output=edb292448cf399ea input=9aaad1e590089789]*/
|
||||
{
|
||||
uint64_t hash = _Py_KeyedHash((uint64_t)key, source->buf, source->len);
|
||||
#if !PY_LITTLE_ENDIAN
|
||||
// Force to little-endian. There really ought to be a succinct standard way
|
||||
// to do this.
|
||||
union {
|
||||
uint64_t x;
|
||||
unsigned char data[sizeof(uint64_t)];
|
||||
} pun;
|
||||
pun.x = hash;
|
||||
for (size_t i = 0; i < sizeof(pun.data); i++) {
|
||||
pun.data[sizeof(pun.data) - i - 1] = pun.data[i];
|
||||
}
|
||||
hash = pun.x;
|
||||
#endif
|
||||
return PyBytes_FromStringAndSize((const char *)&hash, sizeof(hash));
|
||||
}
|
||||
|
||||
|
||||
PyDoc_STRVAR(doc_imp,
|
||||
"(Extremely) low-level import machinery bits as used by importlib and imp.");
|
||||
|
|
@ -2203,6 +2233,7 @@ static PyMethodDef imp_methods[] = {
|
|||
_IMP_EXEC_DYNAMIC_METHODDEF
|
||||
_IMP_EXEC_BUILTIN_METHODDEF
|
||||
_IMP__FIX_CO_FILENAME_METHODDEF
|
||||
_IMP_SOURCE_HASH_METHODDEF
|
||||
{NULL, NULL} /* sentinel */
|
||||
};
|
||||
|
||||
|
|
@ -2219,6 +2250,8 @@ static struct PyModuleDef impmodule = {
|
|||
NULL
|
||||
};
|
||||
|
||||
const char *_Py_CheckHashBasedPycsMode = "default";
|
||||
|
||||
PyMODINIT_FUNC
|
||||
PyInit_imp(void)
|
||||
{
|
||||
|
|
@ -2230,6 +2263,15 @@ PyInit_imp(void)
|
|||
d = PyModule_GetDict(m);
|
||||
if (d == NULL)
|
||||
goto failure;
|
||||
PyObject *pyc_mode = PyUnicode_FromString(_Py_CheckHashBasedPycsMode);
|
||||
if (pyc_mode == NULL) {
|
||||
goto failure;
|
||||
}
|
||||
if (PyDict_SetItemString(d, "check_hash_based_pycs", pyc_mode) < 0) {
|
||||
Py_DECREF(pyc_mode);
|
||||
goto failure;
|
||||
}
|
||||
Py_DECREF(pyc_mode);
|
||||
|
||||
return m;
|
||||
failure:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue