GH-136410: Faster side exits by using a cold exit stub (GH-136411)

This commit is contained in:
Mark Shannon 2025-08-01 16:26:07 +01:00 committed by GitHub
parent 718e0c89ba
commit e7b55f564d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 387 additions and 267 deletions

View file

@ -205,8 +205,8 @@ static int executor_clear(PyObject *executor);
static void unlink_executor(_PyExecutorObject *executor);
static void
free_executor(_PyExecutorObject *self)
void
_PyExecutor_Free(_PyExecutorObject *self)
{
#ifdef _Py_JIT
_PyJIT_Free(self);
@ -242,7 +242,7 @@ _Py_ClearExecutorDeletionList(PyInterpreterState *interp)
}
else {
*prev_to_next_ptr = exec->vm_data.links.next;
free_executor(exec);
_PyExecutor_Free(exec);
}
exec = *prev_to_next_ptr;
}
@ -1129,7 +1129,7 @@ sanity_check(_PyExecutorObject *executor)
}
bool ended = false;
uint32_t i = 0;
CHECK(executor->trace[0].opcode == _START_EXECUTOR);
CHECK(executor->trace[0].opcode == _START_EXECUTOR || executor->trace[0].opcode == _COLD_EXIT);
for (; i < executor->code_size; i++) {
const _PyUOpInstruction *inst = &executor->trace[i];
uint16_t opcode = inst->opcode;
@ -1182,9 +1182,11 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil
}
/* Initialize exits */
_PyExecutorObject *cold = _PyExecutor_GetColdExecutor();
for (int i = 0; i < exit_count; i++) {
executor->exits[i].executor = NULL;
executor->exits[i].index = i;
executor->exits[i].temperature = initial_temperature_backoff_counter();
executor->exits[i].executor = cold;
}
int next_exit = exit_count-1;
_PyUOpInstruction *dest = (_PyUOpInstruction *)&executor->trace[length];
@ -1462,6 +1464,46 @@ _Py_ExecutorInit(_PyExecutorObject *executor, const _PyBloomFilter *dependency_s
link_executor(executor);
}
_PyExecutorObject *
_PyExecutor_GetColdExecutor(void)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
if (interp->cold_executor != NULL) {
return interp->cold_executor;
}
_PyExecutorObject *cold = allocate_executor(0, 1);
if (cold == NULL) {
Py_FatalError("Cannot allocate core JIT code");
}
((_PyUOpInstruction *)cold->trace)->opcode = _COLD_EXIT;
#ifdef _Py_JIT
cold->jit_code = NULL;
cold->jit_side_entry = NULL;
cold->jit_size = 0;
// This is initialized to true so we can prevent the executor
// from being immediately detected as cold and invalidated.
cold->vm_data.warm = true;
if (_PyJIT_Compile(cold, cold->trace, 1)) {
Py_DECREF(cold);
Py_FatalError("Cannot allocate core JIT code");
}
#endif
_Py_SetImmortal((PyObject *)cold);
interp->cold_executor = cold;
return cold;
}
void
_PyExecutor_ClearExit(_PyExitData *exit)
{
if (exit == NULL) {
return;
}
_PyExecutorObject *old = exit->executor;
exit->executor = _PyExecutor_GetColdExecutor();
Py_DECREF(old);
}
/* Detaches the executor from the code object (if any) that
* holds a reference to it */
void
@ -1492,14 +1534,18 @@ executor_clear(PyObject *op)
assert(executor->vm_data.valid == 1);
unlink_executor(executor);
executor->vm_data.valid = 0;
/* It is possible for an executor to form a reference
* cycle with itself, so decref'ing a side exit could
* free the executor unless we hold a strong reference to it
*/
_PyExecutorObject *cold = _PyExecutor_GetColdExecutor();
Py_INCREF(executor);
for (uint32_t i = 0; i < executor->exit_count; i++) {
executor->exits[i].temperature = initial_unreachable_backoff_counter();
Py_CLEAR(executor->exits[i].executor);
_PyExecutorObject *e = executor->exits[i].executor;
executor->exits[i].executor = cold;
Py_DECREF(e);
}
_Py_ExecutorDetach(executor);
Py_DECREF(executor);
@ -1741,4 +1787,11 @@ _PyDumpExecutors(FILE *out)
return -1;
}
void
_PyExecutor_Free(struct _PyExecutorObject *self)
{
/* This should never be called */
Py_UNREACHABLE();
}
#endif /* _Py_TIER2 */