mirror of
https://github.com/python/cpython.git
synced 2025-12-31 04:23:37 +00:00
gh-117045: Add code object to function version cache (#117028)
Changes to the function version cache: - In addition to the function object, also store the code object, and allow the latter to be retrieved even if the function has been evicted. - Stop assigning new function versions after a critical attribute (e.g. `__code__`) has been modified; the version is permanently reset to zero in this case. - Changes to `__annotations__` are no longer considered critical. (This fixes gh-109998.) Changes to the Tier 2 optimization machinery: - If we cannot map a function version to a function, but it is still mapped to a code object, we continue projecting the trace. The operand of the `_PUSH_FRAME` and `_POP_FRAME` opcodes can be either NULL, a function object, or a code object with the lowest bit set. This allows us to trace through code that calls an ephemeral function, i.e., a function that may not be alive when we are constructing the executor, e.g. a generator expression or certain nested functions. We will lose globals removal inside such functions, but we can still do other peephole operations (and even possibly [call inlining](https://github.com/python/cpython/pull/116290), if we decide to do it), which only need the code object. As before, if we cannot retrieve the code object from the cache, we stop projecting.
This commit is contained in:
parent
c85d84166a
commit
570a82d46a
8 changed files with 208 additions and 95 deletions
|
|
@ -1710,6 +1710,7 @@ code_dealloc(PyCodeObject *co)
|
|||
}
|
||||
Py_SET_REFCNT(co, 0);
|
||||
|
||||
_PyFunction_ClearCodeByVersion(co->co_version);
|
||||
if (co->co_extra != NULL) {
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
_PyCodeObjectExtra *co_extra = co->co_extra;
|
||||
|
|
|
|||
|
|
@ -218,43 +218,61 @@ PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname
|
|||
}
|
||||
|
||||
/*
|
||||
Function versions
|
||||
-----------------
|
||||
(This is purely internal documentation. There are no public APIs here.)
|
||||
|
||||
Function versions are used to detect when a function object has been
|
||||
updated, invalidating inline cache data used by the `CALL` bytecode
|
||||
(notably `CALL_PY_EXACT_ARGS` and a few other `CALL` specializations).
|
||||
Function (and code) versions
|
||||
----------------------------
|
||||
|
||||
They are also used by the Tier 2 superblock creation code to find
|
||||
the function being called (and from there the code object).
|
||||
The Tier 1 specializer generates CALL variants that can be invalidated
|
||||
by changes to critical function attributes:
|
||||
|
||||
How does a function's `func_version` field get initialized?
|
||||
- __code__
|
||||
- __defaults__
|
||||
- __kwdefaults__
|
||||
- __closure__
|
||||
|
||||
- `PyFunction_New` and friends initialize it to 0.
|
||||
- The `MAKE_FUNCTION` instruction sets it from the code's `co_version`.
|
||||
- It is reset to 0 when various attributes like `__code__` are set.
|
||||
- A new version is allocated by `_PyFunction_GetVersionForCurrentState`
|
||||
when the specializer needs a version and the version is 0.
|
||||
For this purpose function objects have a 32-bit func_version member
|
||||
that the specializer writes to the specialized instruction's inline
|
||||
cache and which is checked by a guard on the specialized instructions.
|
||||
|
||||
The latter allocates versions using a counter in the interpreter state,
|
||||
`interp->func_state.next_version`.
|
||||
When the counter wraps around to 0, no more versions are allocated.
|
||||
There is one other special case: functions with a non-standard
|
||||
`vectorcall` field are not given a version.
|
||||
The MAKE_FUNCTION bytecode sets func_version from the code object's
|
||||
co_version field. The latter is initialized from a counter in the
|
||||
interpreter state (interp->func_state.next_version) and never changes.
|
||||
When this counter overflows, it remains zero and the specializer loses
|
||||
the ability to specialize calls to new functions.
|
||||
|
||||
When the function version is 0, the `CALL` bytecode is not specialized.
|
||||
The func_version is reset to zero when any of the critical attributes
|
||||
is modified; after this point the specializer will no longer specialize
|
||||
calls to this function, and the guard will always fail.
|
||||
|
||||
Code object versions
|
||||
--------------------
|
||||
The function and code version cache
|
||||
-----------------------------------
|
||||
|
||||
So where to code objects get their `co_version`?
|
||||
They share the same counter, `interp->func_state.next_version`.
|
||||
The Tier 2 optimizer now has a problem, since it needs to find the
|
||||
function and code objects given only the version number from the inline
|
||||
cache. Our solution is to maintain a cache mapping version numbers to
|
||||
function and code objects. To limit the cache size we could hash
|
||||
the version number, but for now we simply use it modulo the table size.
|
||||
|
||||
There are some corner cases (e.g. generator expressions) where we will
|
||||
be unable to find the function object in the cache but we can still
|
||||
find the code object. For this reason the cache stores both the
|
||||
function object and the code object.
|
||||
|
||||
The cache doesn't contain strong references; cache entries are
|
||||
invalidated whenever the function or code object is deallocated.
|
||||
|
||||
Invariants
|
||||
----------
|
||||
|
||||
These should hold at any time except when one of the cache-mutating
|
||||
functions is running.
|
||||
|
||||
- For any slot s at index i:
|
||||
- s->func == NULL or s->func->func_version % FUNC_VERSION_CACHE_SIZE == i
|
||||
- s->code == NULL or s->code->co_version % FUNC_VERSION_CACHE_SIZE == i
|
||||
if s->func != NULL, then s->func->func_code == s->code
|
||||
|
||||
Code objects get a new `co_version` allocated from this counter upon
|
||||
creation. Since code objects are nominally immutable, `co_version` can
|
||||
not be invalidated. The only way it can be 0 is when 2**32 or more
|
||||
code objects have been created during the process's lifetime.
|
||||
(The counter isn't reset by `fork()`, extending the lifetime.)
|
||||
*/
|
||||
|
||||
void
|
||||
|
|
@ -262,28 +280,61 @@ _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version)
|
|||
{
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
if (func->func_version != 0) {
|
||||
PyFunctionObject **slot =
|
||||
struct _func_version_cache_item *slot =
|
||||
interp->func_state.func_version_cache
|
||||
+ (func->func_version % FUNC_VERSION_CACHE_SIZE);
|
||||
if (*slot == func) {
|
||||
*slot = NULL;
|
||||
if (slot->func == func) {
|
||||
slot->func = NULL;
|
||||
// Leave slot->code alone, there may be use for it.
|
||||
}
|
||||
}
|
||||
func->func_version = version;
|
||||
if (version != 0) {
|
||||
interp->func_state.func_version_cache[
|
||||
version % FUNC_VERSION_CACHE_SIZE] = func;
|
||||
struct _func_version_cache_item *slot =
|
||||
interp->func_state.func_version_cache
|
||||
+ (version % FUNC_VERSION_CACHE_SIZE);
|
||||
slot->func = func;
|
||||
slot->code = func->func_code;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
_PyFunction_ClearCodeByVersion(uint32_t version)
|
||||
{
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
struct _func_version_cache_item *slot =
|
||||
interp->func_state.func_version_cache
|
||||
+ (version % FUNC_VERSION_CACHE_SIZE);
|
||||
if (slot->code) {
|
||||
assert(PyCode_Check(slot->code));
|
||||
PyCodeObject *code = (PyCodeObject *)slot->code;
|
||||
if (code->co_version == version) {
|
||||
slot->code = NULL;
|
||||
slot->func = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PyFunctionObject *
|
||||
_PyFunction_LookupByVersion(uint32_t version)
|
||||
_PyFunction_LookupByVersion(uint32_t version, PyObject **p_code)
|
||||
{
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
PyFunctionObject *func = interp->func_state.func_version_cache[
|
||||
version % FUNC_VERSION_CACHE_SIZE];
|
||||
if (func != NULL && func->func_version == version) {
|
||||
return func;
|
||||
struct _func_version_cache_item *slot =
|
||||
interp->func_state.func_version_cache
|
||||
+ (version % FUNC_VERSION_CACHE_SIZE);
|
||||
if (slot->code) {
|
||||
assert(PyCode_Check(slot->code));
|
||||
PyCodeObject *code = (PyCodeObject *)slot->code;
|
||||
if (code->co_version == version) {
|
||||
*p_code = slot->code;
|
||||
}
|
||||
}
|
||||
else {
|
||||
*p_code = NULL;
|
||||
}
|
||||
if (slot->func && slot->func->func_version == version) {
|
||||
assert(slot->func->func_code == slot->code);
|
||||
return slot->func;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
|
@ -291,19 +342,7 @@ _PyFunction_LookupByVersion(uint32_t version)
|
|||
uint32_t
|
||||
_PyFunction_GetVersionForCurrentState(PyFunctionObject *func)
|
||||
{
|
||||
if (func->func_version != 0) {
|
||||
return func->func_version;
|
||||
}
|
||||
if (func->vectorcall != _PyFunction_Vectorcall) {
|
||||
return 0;
|
||||
}
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
if (interp->func_state.next_version == 0) {
|
||||
return 0;
|
||||
}
|
||||
uint32_t v = interp->func_state.next_version++;
|
||||
_PyFunction_SetVersion(func, v);
|
||||
return v;
|
||||
return func->func_version;
|
||||
}
|
||||
|
||||
PyObject *
|
||||
|
|
@ -507,7 +546,6 @@ PyFunction_SetAnnotations(PyObject *op, PyObject *annotations)
|
|||
"non-dict annotations");
|
||||
return -1;
|
||||
}
|
||||
_PyFunction_SetVersion((PyFunctionObject *)op, 0);
|
||||
Py_XSETREF(((PyFunctionObject *)op)->func_annotations, annotations);
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -731,7 +769,6 @@ func_set_annotations(PyFunctionObject *op, PyObject *value, void *Py_UNUSED(igno
|
|||
"__annotations__ must be set to a dict object");
|
||||
return -1;
|
||||
}
|
||||
_PyFunction_SetVersion(op, 0);
|
||||
Py_XSETREF(op->func_annotations, Py_XNewRef(value));
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue