mirror of
https://github.com/python/cpython.git
synced 2025-11-09 10:01:42 +00:00
GH-137959: Replace shim code in jitted code with a single trampoline function. (GH-137961)
This commit is contained in:
parent
c056a089d8
commit
a8d9d94784
17 changed files with 166 additions and 104 deletions
|
|
@ -2971,7 +2971,7 @@ dummy_func(
|
|||
assert(tstate->current_executor == NULL);
|
||||
assert(executor != tstate->interp->cold_executor);
|
||||
tstate->jit_exit = NULL;
|
||||
GOTO_TIER_TWO(executor);
|
||||
TIER1_TO_TIER2(executor);
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
|
@ -3037,7 +3037,7 @@ dummy_func(
|
|||
}
|
||||
assert(executor != tstate->interp->cold_executor);
|
||||
tstate->jit_exit = NULL;
|
||||
GOTO_TIER_TWO(executor);
|
||||
TIER1_TO_TIER2(executor);
|
||||
#else
|
||||
Py_FatalError("ENTER_EXECUTOR is not supported in this build");
|
||||
#endif /* _Py_TIER2 */
|
||||
|
|
@ -5257,7 +5257,7 @@ dummy_func(
|
|||
}
|
||||
#endif
|
||||
tstate->jit_exit = exit;
|
||||
GOTO_TIER_TWO(exit->executor);
|
||||
TIER2_TO_TIER2(exit->executor);
|
||||
}
|
||||
|
||||
tier2 op(_CHECK_VALIDITY, (--)) {
|
||||
|
|
@ -5353,7 +5353,7 @@ dummy_func(
|
|||
|
||||
tier2 op(_START_EXECUTOR, (executor/4 --)) {
|
||||
#ifndef _Py_JIT
|
||||
current_executor = (_PyExecutorObject*)executor;
|
||||
assert(current_executor == (_PyExecutorObject*)executor);
|
||||
#endif
|
||||
assert(tstate->jit_exit == NULL || tstate->jit_exit->executor == current_executor);
|
||||
tstate->current_executor = (PyObject *)executor;
|
||||
|
|
@ -5434,7 +5434,7 @@ dummy_func(
|
|||
}
|
||||
assert(tstate->jit_exit == exit);
|
||||
exit->executor = executor;
|
||||
GOTO_TIER_TWO(exit->executor);
|
||||
TIER2_TO_TIER2(exit->executor);
|
||||
}
|
||||
|
||||
label(pop_2_error) {
|
||||
|
|
|
|||
|
|
@ -275,7 +275,8 @@ maybe_lltrace_resume_frame(_PyInterpreterFrame *frame, PyObject *globals)
|
|||
}
|
||||
int r = PyDict_Contains(globals, &_Py_ID(__lltrace__));
|
||||
if (r < 0) {
|
||||
return -1;
|
||||
PyErr_Clear();
|
||||
return 0;
|
||||
}
|
||||
int lltrace = r * 5; // Levels 1-4 only trace uops
|
||||
if (!lltrace) {
|
||||
|
|
@ -1109,11 +1110,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
|
|||
#endif
|
||||
}
|
||||
|
||||
#if defined(_Py_TIER2) && !defined(_Py_JIT)
|
||||
/* Tier 2 interpreter state */
|
||||
_PyExecutorObject *current_executor = NULL;
|
||||
const _PyUOpInstruction *next_uop = NULL;
|
||||
#endif
|
||||
#if Py_TAIL_CALL_INTERP
|
||||
# if Py_STATS
|
||||
return _TAIL_CALL_start_frame(frame, NULL, tstate, NULL, 0, lastopcode);
|
||||
|
|
@ -1126,14 +1122,41 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
|
|||
#endif
|
||||
|
||||
|
||||
early_exit:
|
||||
assert(_PyErr_Occurred(tstate));
|
||||
_Py_LeaveRecursiveCallPy(tstate);
|
||||
assert(frame->owner != FRAME_OWNED_BY_INTERPRETER);
|
||||
// GH-99729: We need to unlink the frame *before* clearing it:
|
||||
_PyInterpreterFrame *dying = frame;
|
||||
frame = tstate->current_frame = dying->previous;
|
||||
_PyEval_FrameClearAndPop(tstate, dying);
|
||||
frame->return_offset = 0;
|
||||
assert(frame->owner == FRAME_OWNED_BY_INTERPRETER);
|
||||
/* Restore previous frame and exit */
|
||||
tstate->current_frame = frame->previous;
|
||||
return NULL;
|
||||
}
|
||||
#ifdef _Py_TIER2
|
||||
|
||||
// Tier 2 is also here!
|
||||
enter_tier_two:
|
||||
|
||||
#ifdef _Py_JIT
|
||||
assert(0);
|
||||
_PyJitEntryFuncPtr _Py_jit_entry = _Py_LazyJitTrampoline;
|
||||
#else
|
||||
_PyJitEntryFuncPtr _Py_jit_entry = _PyTier2Interpreter;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(_Py_TIER2) && !defined(_Py_JIT)
|
||||
|
||||
_Py_CODEUNIT *
|
||||
_PyTier2Interpreter(
|
||||
_PyExecutorObject *current_executor, _PyInterpreterFrame *frame,
|
||||
_PyStackRef *stack_pointer, PyThreadState *tstate
|
||||
) {
|
||||
const _PyUOpInstruction *next_uop;
|
||||
int oparg;
|
||||
tier2_start:
|
||||
|
||||
next_uop = current_executor->trace;
|
||||
assert(next_uop->opcode == _START_EXECUTOR || next_uop->opcode == _COLD_EXIT);
|
||||
|
||||
#undef LOAD_IP
|
||||
#define LOAD_IP(UNUSED) (void)0
|
||||
|
|
@ -1151,7 +1174,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
|
|||
#undef ENABLE_SPECIALIZATION_FT
|
||||
#define ENABLE_SPECIALIZATION_FT 0
|
||||
|
||||
; // dummy statement after a label, before a declaration
|
||||
uint16_t uopcode;
|
||||
#ifdef Py_STATS
|
||||
int lastuop = 0;
|
||||
|
|
@ -1225,24 +1247,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
|
|||
next_uop = current_executor->trace + target;
|
||||
goto tier2_dispatch;
|
||||
|
||||
#endif // _Py_JIT
|
||||
|
||||
}
|
||||
#endif // _Py_TIER2
|
||||
|
||||
early_exit:
|
||||
assert(_PyErr_Occurred(tstate));
|
||||
_Py_LeaveRecursiveCallPy(tstate);
|
||||
assert(frame->owner != FRAME_OWNED_BY_INTERPRETER);
|
||||
// GH-99729: We need to unlink the frame *before* clearing it:
|
||||
_PyInterpreterFrame *dying = frame;
|
||||
frame = tstate->current_frame = dying->previous;
|
||||
_PyEval_FrameClearAndPop(tstate, dying);
|
||||
frame->return_offset = 0;
|
||||
assert(frame->owner == FRAME_OWNED_BY_INTERPRETER);
|
||||
/* Restore previous frame and exit */
|
||||
tstate->current_frame = frame->previous;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#ifdef DO_NOT_OPTIMIZE_INTERP_LOOP
|
||||
# pragma optimize("", on)
|
||||
|
|
|
|||
|
|
@ -133,9 +133,6 @@ do { \
|
|||
_PyFrame_SetStackPointer(frame, stack_pointer); \
|
||||
int lltrace = maybe_lltrace_resume_frame(frame, GLOBALS()); \
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame); \
|
||||
if (lltrace < 0) { \
|
||||
JUMP_TO_LABEL(exit_unwind); \
|
||||
} \
|
||||
frame->lltrace = lltrace; \
|
||||
} while (0)
|
||||
#else
|
||||
|
|
@ -354,16 +351,10 @@ _PyFrame_SetStackPointer(frame, stack_pointer)
|
|||
|
||||
/* Tier-switching macros. */
|
||||
|
||||
#ifdef _Py_JIT
|
||||
#define GOTO_TIER_TWO(EXECUTOR) \
|
||||
#define TIER1_TO_TIER2(EXECUTOR) \
|
||||
do { \
|
||||
OPT_STAT_INC(traces_executed); \
|
||||
_PyExecutorObject *_executor = (EXECUTOR); \
|
||||
jit_func jitted = _executor->jit_code; \
|
||||
/* Keep the shim frame alive via the executor: */ \
|
||||
Py_INCREF(_executor); \
|
||||
next_instr = jitted(frame, stack_pointer, tstate); \
|
||||
Py_DECREF(_executor); \
|
||||
next_instr = _Py_jit_entry((EXECUTOR), frame, stack_pointer, tstate); \
|
||||
frame = tstate->current_frame; \
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame); \
|
||||
if (next_instr == NULL) { \
|
||||
|
|
@ -372,31 +363,21 @@ do { \
|
|||
} \
|
||||
DISPATCH(); \
|
||||
} while (0)
|
||||
#else
|
||||
#define GOTO_TIER_TWO(EXECUTOR) \
|
||||
do { \
|
||||
OPT_STAT_INC(traces_executed); \
|
||||
_PyExecutorObject *_executor = (EXECUTOR); \
|
||||
next_uop = _executor->trace; \
|
||||
assert(next_uop->opcode == _START_EXECUTOR || next_uop->opcode == _COLD_EXIT); \
|
||||
goto enter_tier_two; \
|
||||
|
||||
#define TIER2_TO_TIER2(EXECUTOR) \
|
||||
do { \
|
||||
OPT_STAT_INC(traces_executed); \
|
||||
current_executor = (EXECUTOR); \
|
||||
goto tier2_start; \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
#define GOTO_TIER_ONE(TARGET) \
|
||||
do \
|
||||
{ \
|
||||
tstate->current_executor = NULL; \
|
||||
next_instr = (TARGET); \
|
||||
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); \
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer); \
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame); \
|
||||
if (next_instr == NULL) \
|
||||
{ \
|
||||
next_instr = frame->instr_ptr; \
|
||||
goto error; \
|
||||
} \
|
||||
DISPATCH(); \
|
||||
return TARGET; \
|
||||
} while (0)
|
||||
|
||||
#define CURRENT_OPARG() (next_uop[-1].oparg)
|
||||
|
|
|
|||
6
Python/executor_cases.c.h
generated
6
Python/executor_cases.c.h
generated
|
|
@ -7122,7 +7122,7 @@
|
|||
}
|
||||
#endif
|
||||
tstate->jit_exit = exit;
|
||||
GOTO_TIER_TWO(exit->executor);
|
||||
TIER2_TO_TIER2(exit->executor);
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -7400,7 +7400,7 @@
|
|||
case _START_EXECUTOR: {
|
||||
PyObject *executor = (PyObject *)CURRENT_OPERAND0();
|
||||
#ifndef _Py_JIT
|
||||
current_executor = (_PyExecutorObject*)executor;
|
||||
assert(current_executor == (_PyExecutorObject*)executor);
|
||||
#endif
|
||||
assert(tstate->jit_exit == NULL || tstate->jit_exit->executor == current_executor);
|
||||
tstate->current_executor = (PyObject *)executor;
|
||||
|
|
@ -7503,7 +7503,7 @@
|
|||
}
|
||||
assert(tstate->jit_exit == exit);
|
||||
exit->executor = executor;
|
||||
GOTO_TIER_TWO(exit->executor);
|
||||
TIER2_TO_TIER2(exit->executor);
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
|||
4
Python/generated_cases.c.h
generated
4
Python/generated_cases.c.h
generated
|
|
@ -5493,7 +5493,7 @@
|
|||
}
|
||||
assert(executor != tstate->interp->cold_executor);
|
||||
tstate->jit_exit = NULL;
|
||||
GOTO_TIER_TWO(executor);
|
||||
TIER1_TO_TIER2(executor);
|
||||
#else
|
||||
Py_FatalError("ENTER_EXECUTOR is not supported in this build");
|
||||
#endif /* _Py_TIER2 */
|
||||
|
|
@ -7667,7 +7667,7 @@
|
|||
assert(tstate->current_executor == NULL);
|
||||
assert(executor != tstate->interp->cold_executor);
|
||||
tstate->jit_exit = NULL;
|
||||
GOTO_TIER_TWO(executor);
|
||||
TIER1_TO_TIER2(executor);
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
|
|
|||
78
Python/jit.c
78
Python/jit.c
|
|
@ -494,10 +494,6 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz
|
|||
size_t code_size = 0;
|
||||
size_t data_size = 0;
|
||||
jit_state state = {0};
|
||||
group = &shim;
|
||||
code_size += group->code_size;
|
||||
data_size += group->data_size;
|
||||
combine_symbol_mask(group->trampoline_mask, state.trampolines.mask);
|
||||
for (size_t i = 0; i < length; i++) {
|
||||
const _PyUOpInstruction *instruction = &trace[i];
|
||||
group = &stencil_groups[instruction->opcode];
|
||||
|
|
@ -539,13 +535,6 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz
|
|||
unsigned char *code = memory;
|
||||
state.trampolines.mem = memory + code_size;
|
||||
unsigned char *data = memory + code_size + state.trampolines.size + code_padding;
|
||||
// Compile the shim, which handles converting between the native
|
||||
// calling convention and the calling convention used by jitted code
|
||||
// (which may be different for efficiency reasons).
|
||||
group = &shim;
|
||||
group->emit(code, data, executor, NULL, &state);
|
||||
code += group->code_size;
|
||||
data += group->data_size;
|
||||
assert(trace[0].opcode == _START_EXECUTOR || trace[0].opcode == _COLD_EXIT);
|
||||
for (size_t i = 0; i < length; i++) {
|
||||
const _PyUOpInstruction *instruction = &trace[i];
|
||||
|
|
@ -566,11 +555,75 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz
|
|||
return -1;
|
||||
}
|
||||
executor->jit_code = memory;
|
||||
executor->jit_side_entry = memory + shim.code_size;
|
||||
executor->jit_size = total_size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* One-off compilation of the jit entry trampoline
|
||||
* We compile this once only as it effectively a normal
|
||||
* function, but we need to use the JIT because it needs
|
||||
* to understand the jit-specific calling convention.
|
||||
*/
|
||||
static _PyJitEntryFuncPtr
|
||||
compile_trampoline(void)
|
||||
{
|
||||
_PyExecutorObject dummy;
|
||||
const StencilGroup *group;
|
||||
size_t code_size = 0;
|
||||
size_t data_size = 0;
|
||||
jit_state state = {0};
|
||||
group = &trampoline;
|
||||
code_size += group->code_size;
|
||||
data_size += group->data_size;
|
||||
combine_symbol_mask(group->trampoline_mask, state.trampolines.mask);
|
||||
// Round up to the nearest page:
|
||||
size_t page_size = get_page_size();
|
||||
assert((page_size & (page_size - 1)) == 0);
|
||||
size_t code_padding = DATA_ALIGN - ((code_size + state.trampolines.size) & (DATA_ALIGN - 1));
|
||||
size_t padding = page_size - ((code_size + state.trampolines.size + code_padding + data_size) & (page_size - 1));
|
||||
size_t total_size = code_size + state.trampolines.size + code_padding + data_size + padding;
|
||||
unsigned char *memory = jit_alloc(total_size);
|
||||
if (memory == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
unsigned char *code = memory;
|
||||
state.trampolines.mem = memory + code_size;
|
||||
unsigned char *data = memory + code_size + state.trampolines.size + code_padding;
|
||||
// Compile the shim, which handles converting between the native
|
||||
// calling convention and the calling convention used by jitted code
|
||||
// (which may be different for efficiency reasons).
|
||||
group = &trampoline;
|
||||
group->emit(code, data, &dummy, NULL, &state);
|
||||
code += group->code_size;
|
||||
data += group->data_size;
|
||||
assert(code == memory + code_size);
|
||||
assert(data == memory + code_size + state.trampolines.size + code_padding + data_size);
|
||||
if (mark_executable(memory, total_size)) {
|
||||
jit_free(memory, total_size);
|
||||
return NULL;
|
||||
}
|
||||
return (_PyJitEntryFuncPtr)memory;
|
||||
}
|
||||
|
||||
static PyMutex lazy_jit_mutex = { 0 };
|
||||
|
||||
_Py_CODEUNIT *
|
||||
_Py_LazyJitTrampoline(
|
||||
_PyExecutorObject *executor, _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate
|
||||
) {
|
||||
PyMutex_Lock(&lazy_jit_mutex);
|
||||
if (_Py_jit_entry == _Py_LazyJitTrampoline) {
|
||||
_PyJitEntryFuncPtr trampoline = compile_trampoline();
|
||||
if (trampoline == NULL) {
|
||||
PyMutex_Unlock(&lazy_jit_mutex);
|
||||
Py_FatalError("Cannot allocate core JIT code");
|
||||
}
|
||||
_Py_jit_entry = trampoline;
|
||||
}
|
||||
PyMutex_Unlock(&lazy_jit_mutex);
|
||||
return _Py_jit_entry(executor, frame, stack_pointer, tstate);
|
||||
}
|
||||
|
||||
void
|
||||
_PyJIT_Free(_PyExecutorObject *executor)
|
||||
{
|
||||
|
|
@ -578,7 +631,6 @@ _PyJIT_Free(_PyExecutorObject *executor)
|
|||
size_t size = executor->jit_size;
|
||||
if (memory) {
|
||||
executor->jit_code = NULL;
|
||||
executor->jit_side_entry = NULL;
|
||||
executor->jit_size = 0;
|
||||
if (jit_free(memory, size)) {
|
||||
PyErr_FormatUnraisable("Exception ignored while "
|
||||
|
|
|
|||
|
|
@ -1238,7 +1238,6 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil
|
|||
#endif
|
||||
#ifdef _Py_JIT
|
||||
executor->jit_code = NULL;
|
||||
executor->jit_side_entry = NULL;
|
||||
executor->jit_size = 0;
|
||||
// This is initialized to true so we can prevent the executor
|
||||
// from being immediately detected as cold and invalidated.
|
||||
|
|
@ -1490,7 +1489,6 @@ _PyExecutor_GetColdExecutor(void)
|
|||
((_PyUOpInstruction *)cold->trace)->opcode = _COLD_EXIT;
|
||||
#ifdef _Py_JIT
|
||||
cold->jit_code = NULL;
|
||||
cold->jit_side_entry = NULL;
|
||||
cold->jit_size = 0;
|
||||
// This is initialized to true so we can prevent the executor
|
||||
// from being immediately detected as cold and invalidated.
|
||||
|
|
|
|||
|
|
@ -494,6 +494,11 @@ free_interpreter(PyInterpreterState *interp)
|
|||
static inline int check_interpreter_whence(long);
|
||||
#endif
|
||||
|
||||
extern _Py_CODEUNIT *
|
||||
_Py_LazyJitTrampoline(
|
||||
struct _PyExecutorObject *exec, _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate
|
||||
);
|
||||
|
||||
/* Get the interpreter state to a minimal consistent state.
|
||||
Further init happens in pylifecycle.c before it can be used.
|
||||
All fields not initialized here are expected to be zeroed out,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue