mirror of
https://github.com/python/cpython.git
synced 2025-11-02 06:31:29 +00:00
GH-137959: Replace shim code in jitted code with a single trampoline function. (GH-137961)
This commit is contained in:
parent
c056a089d8
commit
a8d9d94784
17 changed files with 166 additions and 104 deletions
|
|
@ -123,6 +123,22 @@ _PyEval_EvalFrame(PyThreadState *tstate, _PyInterpreterFrame *frame, int throwfl
|
||||||
return tstate->interp->eval_frame(tstate, frame, throwflag);
|
return tstate->interp->eval_frame(tstate, frame, throwflag);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef _Py_TIER2
|
||||||
|
#ifdef _Py_JIT
|
||||||
|
_Py_CODEUNIT *_Py_LazyJitTrampoline(
|
||||||
|
struct _PyExecutorObject *current_executor, _PyInterpreterFrame *frame,
|
||||||
|
_PyStackRef *stack_pointer, PyThreadState *tstate
|
||||||
|
);
|
||||||
|
#else
|
||||||
|
_Py_CODEUNIT *_PyTier2Interpreter(
|
||||||
|
struct _PyExecutorObject *current_executor, _PyInterpreterFrame *frame,
|
||||||
|
_PyStackRef *stack_pointer, PyThreadState *tstate
|
||||||
|
);
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
extern _PyJitEntryFuncPtr _Py_jit_entry;
|
||||||
|
|
||||||
extern PyObject*
|
extern PyObject*
|
||||||
_PyEval_Vector(PyThreadState *tstate,
|
_PyEval_Vector(PyThreadState *tstate,
|
||||||
PyFunctionObject *func, PyObject *locals,
|
PyFunctionObject *func, PyObject *locals,
|
||||||
|
|
|
||||||
|
|
@ -765,6 +765,7 @@ struct _Py_unique_id_pool {
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
typedef _Py_CODEUNIT *(*_PyJitEntryFuncPtr)(struct _PyExecutorObject *exec, _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate);
|
||||||
|
|
||||||
/* PyInterpreterState holds the global state for one of the runtime's
|
/* PyInterpreterState holds the global state for one of the runtime's
|
||||||
interpreters. Typically the initial (main) interpreter is the only one.
|
interpreters. Typically the initial (main) interpreter is the only one.
|
||||||
|
|
|
||||||
|
|
@ -82,7 +82,6 @@ typedef struct _PyExecutorObject {
|
||||||
uint32_t code_size;
|
uint32_t code_size;
|
||||||
size_t jit_size;
|
size_t jit_size;
|
||||||
void *jit_code;
|
void *jit_code;
|
||||||
void *jit_side_entry;
|
|
||||||
_PyExitData exits[1];
|
_PyExitData exits[1];
|
||||||
} _PyExecutorObject;
|
} _PyExecutorObject;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,2 @@
|
||||||
|
Replace the shim code added to every piece of jitted code with a single
|
||||||
|
trampoline function.
|
||||||
|
|
@ -2971,7 +2971,7 @@ dummy_func(
|
||||||
assert(tstate->current_executor == NULL);
|
assert(tstate->current_executor == NULL);
|
||||||
assert(executor != tstate->interp->cold_executor);
|
assert(executor != tstate->interp->cold_executor);
|
||||||
tstate->jit_exit = NULL;
|
tstate->jit_exit = NULL;
|
||||||
GOTO_TIER_TWO(executor);
|
TIER1_TO_TIER2(executor);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|
@ -3037,7 +3037,7 @@ dummy_func(
|
||||||
}
|
}
|
||||||
assert(executor != tstate->interp->cold_executor);
|
assert(executor != tstate->interp->cold_executor);
|
||||||
tstate->jit_exit = NULL;
|
tstate->jit_exit = NULL;
|
||||||
GOTO_TIER_TWO(executor);
|
TIER1_TO_TIER2(executor);
|
||||||
#else
|
#else
|
||||||
Py_FatalError("ENTER_EXECUTOR is not supported in this build");
|
Py_FatalError("ENTER_EXECUTOR is not supported in this build");
|
||||||
#endif /* _Py_TIER2 */
|
#endif /* _Py_TIER2 */
|
||||||
|
|
@ -5257,7 +5257,7 @@ dummy_func(
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
tstate->jit_exit = exit;
|
tstate->jit_exit = exit;
|
||||||
GOTO_TIER_TWO(exit->executor);
|
TIER2_TO_TIER2(exit->executor);
|
||||||
}
|
}
|
||||||
|
|
||||||
tier2 op(_CHECK_VALIDITY, (--)) {
|
tier2 op(_CHECK_VALIDITY, (--)) {
|
||||||
|
|
@ -5353,7 +5353,7 @@ dummy_func(
|
||||||
|
|
||||||
tier2 op(_START_EXECUTOR, (executor/4 --)) {
|
tier2 op(_START_EXECUTOR, (executor/4 --)) {
|
||||||
#ifndef _Py_JIT
|
#ifndef _Py_JIT
|
||||||
current_executor = (_PyExecutorObject*)executor;
|
assert(current_executor == (_PyExecutorObject*)executor);
|
||||||
#endif
|
#endif
|
||||||
assert(tstate->jit_exit == NULL || tstate->jit_exit->executor == current_executor);
|
assert(tstate->jit_exit == NULL || tstate->jit_exit->executor == current_executor);
|
||||||
tstate->current_executor = (PyObject *)executor;
|
tstate->current_executor = (PyObject *)executor;
|
||||||
|
|
@ -5434,7 +5434,7 @@ dummy_func(
|
||||||
}
|
}
|
||||||
assert(tstate->jit_exit == exit);
|
assert(tstate->jit_exit == exit);
|
||||||
exit->executor = executor;
|
exit->executor = executor;
|
||||||
GOTO_TIER_TWO(exit->executor);
|
TIER2_TO_TIER2(exit->executor);
|
||||||
}
|
}
|
||||||
|
|
||||||
label(pop_2_error) {
|
label(pop_2_error) {
|
||||||
|
|
|
||||||
|
|
@ -275,7 +275,8 @@ maybe_lltrace_resume_frame(_PyInterpreterFrame *frame, PyObject *globals)
|
||||||
}
|
}
|
||||||
int r = PyDict_Contains(globals, &_Py_ID(__lltrace__));
|
int r = PyDict_Contains(globals, &_Py_ID(__lltrace__));
|
||||||
if (r < 0) {
|
if (r < 0) {
|
||||||
return -1;
|
PyErr_Clear();
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
int lltrace = r * 5; // Levels 1-4 only trace uops
|
int lltrace = r * 5; // Levels 1-4 only trace uops
|
||||||
if (!lltrace) {
|
if (!lltrace) {
|
||||||
|
|
@ -1109,11 +1110,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(_Py_TIER2) && !defined(_Py_JIT)
|
|
||||||
/* Tier 2 interpreter state */
|
|
||||||
_PyExecutorObject *current_executor = NULL;
|
|
||||||
const _PyUOpInstruction *next_uop = NULL;
|
|
||||||
#endif
|
|
||||||
#if Py_TAIL_CALL_INTERP
|
#if Py_TAIL_CALL_INTERP
|
||||||
# if Py_STATS
|
# if Py_STATS
|
||||||
return _TAIL_CALL_start_frame(frame, NULL, tstate, NULL, 0, lastopcode);
|
return _TAIL_CALL_start_frame(frame, NULL, tstate, NULL, 0, lastopcode);
|
||||||
|
|
@ -1126,14 +1122,41 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
early_exit:
|
||||||
|
assert(_PyErr_Occurred(tstate));
|
||||||
|
_Py_LeaveRecursiveCallPy(tstate);
|
||||||
|
assert(frame->owner != FRAME_OWNED_BY_INTERPRETER);
|
||||||
|
// GH-99729: We need to unlink the frame *before* clearing it:
|
||||||
|
_PyInterpreterFrame *dying = frame;
|
||||||
|
frame = tstate->current_frame = dying->previous;
|
||||||
|
_PyEval_FrameClearAndPop(tstate, dying);
|
||||||
|
frame->return_offset = 0;
|
||||||
|
assert(frame->owner == FRAME_OWNED_BY_INTERPRETER);
|
||||||
|
/* Restore previous frame and exit */
|
||||||
|
tstate->current_frame = frame->previous;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
#ifdef _Py_TIER2
|
#ifdef _Py_TIER2
|
||||||
|
|
||||||
// Tier 2 is also here!
|
|
||||||
enter_tier_two:
|
|
||||||
|
|
||||||
#ifdef _Py_JIT
|
#ifdef _Py_JIT
|
||||||
assert(0);
|
_PyJitEntryFuncPtr _Py_jit_entry = _Py_LazyJitTrampoline;
|
||||||
#else
|
#else
|
||||||
|
_PyJitEntryFuncPtr _Py_jit_entry = _PyTier2Interpreter;
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(_Py_TIER2) && !defined(_Py_JIT)
|
||||||
|
|
||||||
|
_Py_CODEUNIT *
|
||||||
|
_PyTier2Interpreter(
|
||||||
|
_PyExecutorObject *current_executor, _PyInterpreterFrame *frame,
|
||||||
|
_PyStackRef *stack_pointer, PyThreadState *tstate
|
||||||
|
) {
|
||||||
|
const _PyUOpInstruction *next_uop;
|
||||||
|
int oparg;
|
||||||
|
tier2_start:
|
||||||
|
|
||||||
|
next_uop = current_executor->trace;
|
||||||
|
assert(next_uop->opcode == _START_EXECUTOR || next_uop->opcode == _COLD_EXIT);
|
||||||
|
|
||||||
#undef LOAD_IP
|
#undef LOAD_IP
|
||||||
#define LOAD_IP(UNUSED) (void)0
|
#define LOAD_IP(UNUSED) (void)0
|
||||||
|
|
@ -1151,7 +1174,6 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
|
||||||
#undef ENABLE_SPECIALIZATION_FT
|
#undef ENABLE_SPECIALIZATION_FT
|
||||||
#define ENABLE_SPECIALIZATION_FT 0
|
#define ENABLE_SPECIALIZATION_FT 0
|
||||||
|
|
||||||
; // dummy statement after a label, before a declaration
|
|
||||||
uint16_t uopcode;
|
uint16_t uopcode;
|
||||||
#ifdef Py_STATS
|
#ifdef Py_STATS
|
||||||
int lastuop = 0;
|
int lastuop = 0;
|
||||||
|
|
@ -1225,24 +1247,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
|
||||||
next_uop = current_executor->trace + target;
|
next_uop = current_executor->trace + target;
|
||||||
goto tier2_dispatch;
|
goto tier2_dispatch;
|
||||||
|
|
||||||
#endif // _Py_JIT
|
}
|
||||||
|
|
||||||
#endif // _Py_TIER2
|
#endif // _Py_TIER2
|
||||||
|
|
||||||
early_exit:
|
|
||||||
assert(_PyErr_Occurred(tstate));
|
|
||||||
_Py_LeaveRecursiveCallPy(tstate);
|
|
||||||
assert(frame->owner != FRAME_OWNED_BY_INTERPRETER);
|
|
||||||
// GH-99729: We need to unlink the frame *before* clearing it:
|
|
||||||
_PyInterpreterFrame *dying = frame;
|
|
||||||
frame = tstate->current_frame = dying->previous;
|
|
||||||
_PyEval_FrameClearAndPop(tstate, dying);
|
|
||||||
frame->return_offset = 0;
|
|
||||||
assert(frame->owner == FRAME_OWNED_BY_INTERPRETER);
|
|
||||||
/* Restore previous frame and exit */
|
|
||||||
tstate->current_frame = frame->previous;
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef DO_NOT_OPTIMIZE_INTERP_LOOP
|
#ifdef DO_NOT_OPTIMIZE_INTERP_LOOP
|
||||||
# pragma optimize("", on)
|
# pragma optimize("", on)
|
||||||
|
|
|
||||||
|
|
@ -133,9 +133,6 @@ do { \
|
||||||
_PyFrame_SetStackPointer(frame, stack_pointer); \
|
_PyFrame_SetStackPointer(frame, stack_pointer); \
|
||||||
int lltrace = maybe_lltrace_resume_frame(frame, GLOBALS()); \
|
int lltrace = maybe_lltrace_resume_frame(frame, GLOBALS()); \
|
||||||
stack_pointer = _PyFrame_GetStackPointer(frame); \
|
stack_pointer = _PyFrame_GetStackPointer(frame); \
|
||||||
if (lltrace < 0) { \
|
|
||||||
JUMP_TO_LABEL(exit_unwind); \
|
|
||||||
} \
|
|
||||||
frame->lltrace = lltrace; \
|
frame->lltrace = lltrace; \
|
||||||
} while (0)
|
} while (0)
|
||||||
#else
|
#else
|
||||||
|
|
@ -354,16 +351,10 @@ _PyFrame_SetStackPointer(frame, stack_pointer)
|
||||||
|
|
||||||
/* Tier-switching macros. */
|
/* Tier-switching macros. */
|
||||||
|
|
||||||
#ifdef _Py_JIT
|
#define TIER1_TO_TIER2(EXECUTOR) \
|
||||||
#define GOTO_TIER_TWO(EXECUTOR) \
|
|
||||||
do { \
|
do { \
|
||||||
OPT_STAT_INC(traces_executed); \
|
OPT_STAT_INC(traces_executed); \
|
||||||
_PyExecutorObject *_executor = (EXECUTOR); \
|
next_instr = _Py_jit_entry((EXECUTOR), frame, stack_pointer, tstate); \
|
||||||
jit_func jitted = _executor->jit_code; \
|
|
||||||
/* Keep the shim frame alive via the executor: */ \
|
|
||||||
Py_INCREF(_executor); \
|
|
||||||
next_instr = jitted(frame, stack_pointer, tstate); \
|
|
||||||
Py_DECREF(_executor); \
|
|
||||||
frame = tstate->current_frame; \
|
frame = tstate->current_frame; \
|
||||||
stack_pointer = _PyFrame_GetStackPointer(frame); \
|
stack_pointer = _PyFrame_GetStackPointer(frame); \
|
||||||
if (next_instr == NULL) { \
|
if (next_instr == NULL) { \
|
||||||
|
|
@ -372,31 +363,21 @@ do { \
|
||||||
} \
|
} \
|
||||||
DISPATCH(); \
|
DISPATCH(); \
|
||||||
} while (0)
|
} while (0)
|
||||||
#else
|
|
||||||
#define GOTO_TIER_TWO(EXECUTOR) \
|
#define TIER2_TO_TIER2(EXECUTOR) \
|
||||||
do { \
|
do { \
|
||||||
OPT_STAT_INC(traces_executed); \
|
OPT_STAT_INC(traces_executed); \
|
||||||
_PyExecutorObject *_executor = (EXECUTOR); \
|
current_executor = (EXECUTOR); \
|
||||||
next_uop = _executor->trace; \
|
goto tier2_start; \
|
||||||
assert(next_uop->opcode == _START_EXECUTOR || next_uop->opcode == _COLD_EXIT); \
|
|
||||||
goto enter_tier_two; \
|
|
||||||
} while (0)
|
} while (0)
|
||||||
#endif
|
|
||||||
|
|
||||||
#define GOTO_TIER_ONE(TARGET) \
|
#define GOTO_TIER_ONE(TARGET) \
|
||||||
do \
|
do \
|
||||||
{ \
|
{ \
|
||||||
tstate->current_executor = NULL; \
|
tstate->current_executor = NULL; \
|
||||||
next_instr = (TARGET); \
|
|
||||||
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); \
|
OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); \
|
||||||
_PyFrame_SetStackPointer(frame, stack_pointer); \
|
_PyFrame_SetStackPointer(frame, stack_pointer); \
|
||||||
stack_pointer = _PyFrame_GetStackPointer(frame); \
|
return TARGET; \
|
||||||
if (next_instr == NULL) \
|
|
||||||
{ \
|
|
||||||
next_instr = frame->instr_ptr; \
|
|
||||||
goto error; \
|
|
||||||
} \
|
|
||||||
DISPATCH(); \
|
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define CURRENT_OPARG() (next_uop[-1].oparg)
|
#define CURRENT_OPARG() (next_uop[-1].oparg)
|
||||||
|
|
|
||||||
6
Python/executor_cases.c.h
generated
6
Python/executor_cases.c.h
generated
|
|
@ -7122,7 +7122,7 @@
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
tstate->jit_exit = exit;
|
tstate->jit_exit = exit;
|
||||||
GOTO_TIER_TWO(exit->executor);
|
TIER2_TO_TIER2(exit->executor);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -7400,7 +7400,7 @@
|
||||||
case _START_EXECUTOR: {
|
case _START_EXECUTOR: {
|
||||||
PyObject *executor = (PyObject *)CURRENT_OPERAND0();
|
PyObject *executor = (PyObject *)CURRENT_OPERAND0();
|
||||||
#ifndef _Py_JIT
|
#ifndef _Py_JIT
|
||||||
current_executor = (_PyExecutorObject*)executor;
|
assert(current_executor == (_PyExecutorObject*)executor);
|
||||||
#endif
|
#endif
|
||||||
assert(tstate->jit_exit == NULL || tstate->jit_exit->executor == current_executor);
|
assert(tstate->jit_exit == NULL || tstate->jit_exit->executor == current_executor);
|
||||||
tstate->current_executor = (PyObject *)executor;
|
tstate->current_executor = (PyObject *)executor;
|
||||||
|
|
@ -7503,7 +7503,7 @@
|
||||||
}
|
}
|
||||||
assert(tstate->jit_exit == exit);
|
assert(tstate->jit_exit == exit);
|
||||||
exit->executor = executor;
|
exit->executor = executor;
|
||||||
GOTO_TIER_TWO(exit->executor);
|
TIER2_TO_TIER2(exit->executor);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
4
Python/generated_cases.c.h
generated
4
Python/generated_cases.c.h
generated
|
|
@ -5493,7 +5493,7 @@
|
||||||
}
|
}
|
||||||
assert(executor != tstate->interp->cold_executor);
|
assert(executor != tstate->interp->cold_executor);
|
||||||
tstate->jit_exit = NULL;
|
tstate->jit_exit = NULL;
|
||||||
GOTO_TIER_TWO(executor);
|
TIER1_TO_TIER2(executor);
|
||||||
#else
|
#else
|
||||||
Py_FatalError("ENTER_EXECUTOR is not supported in this build");
|
Py_FatalError("ENTER_EXECUTOR is not supported in this build");
|
||||||
#endif /* _Py_TIER2 */
|
#endif /* _Py_TIER2 */
|
||||||
|
|
@ -7667,7 +7667,7 @@
|
||||||
assert(tstate->current_executor == NULL);
|
assert(tstate->current_executor == NULL);
|
||||||
assert(executor != tstate->interp->cold_executor);
|
assert(executor != tstate->interp->cold_executor);
|
||||||
tstate->jit_exit = NULL;
|
tstate->jit_exit = NULL;
|
||||||
GOTO_TIER_TWO(executor);
|
TIER1_TO_TIER2(executor);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|
|
||||||
78
Python/jit.c
78
Python/jit.c
|
|
@ -494,10 +494,6 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz
|
||||||
size_t code_size = 0;
|
size_t code_size = 0;
|
||||||
size_t data_size = 0;
|
size_t data_size = 0;
|
||||||
jit_state state = {0};
|
jit_state state = {0};
|
||||||
group = &shim;
|
|
||||||
code_size += group->code_size;
|
|
||||||
data_size += group->data_size;
|
|
||||||
combine_symbol_mask(group->trampoline_mask, state.trampolines.mask);
|
|
||||||
for (size_t i = 0; i < length; i++) {
|
for (size_t i = 0; i < length; i++) {
|
||||||
const _PyUOpInstruction *instruction = &trace[i];
|
const _PyUOpInstruction *instruction = &trace[i];
|
||||||
group = &stencil_groups[instruction->opcode];
|
group = &stencil_groups[instruction->opcode];
|
||||||
|
|
@ -539,13 +535,6 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz
|
||||||
unsigned char *code = memory;
|
unsigned char *code = memory;
|
||||||
state.trampolines.mem = memory + code_size;
|
state.trampolines.mem = memory + code_size;
|
||||||
unsigned char *data = memory + code_size + state.trampolines.size + code_padding;
|
unsigned char *data = memory + code_size + state.trampolines.size + code_padding;
|
||||||
// Compile the shim, which handles converting between the native
|
|
||||||
// calling convention and the calling convention used by jitted code
|
|
||||||
// (which may be different for efficiency reasons).
|
|
||||||
group = &shim;
|
|
||||||
group->emit(code, data, executor, NULL, &state);
|
|
||||||
code += group->code_size;
|
|
||||||
data += group->data_size;
|
|
||||||
assert(trace[0].opcode == _START_EXECUTOR || trace[0].opcode == _COLD_EXIT);
|
assert(trace[0].opcode == _START_EXECUTOR || trace[0].opcode == _COLD_EXIT);
|
||||||
for (size_t i = 0; i < length; i++) {
|
for (size_t i = 0; i < length; i++) {
|
||||||
const _PyUOpInstruction *instruction = &trace[i];
|
const _PyUOpInstruction *instruction = &trace[i];
|
||||||
|
|
@ -566,11 +555,75 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
executor->jit_code = memory;
|
executor->jit_code = memory;
|
||||||
executor->jit_side_entry = memory + shim.code_size;
|
|
||||||
executor->jit_size = total_size;
|
executor->jit_size = total_size;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* One-off compilation of the jit entry trampoline
|
||||||
|
* We compile this once only as it effectively a normal
|
||||||
|
* function, but we need to use the JIT because it needs
|
||||||
|
* to understand the jit-specific calling convention.
|
||||||
|
*/
|
||||||
|
static _PyJitEntryFuncPtr
|
||||||
|
compile_trampoline(void)
|
||||||
|
{
|
||||||
|
_PyExecutorObject dummy;
|
||||||
|
const StencilGroup *group;
|
||||||
|
size_t code_size = 0;
|
||||||
|
size_t data_size = 0;
|
||||||
|
jit_state state = {0};
|
||||||
|
group = &trampoline;
|
||||||
|
code_size += group->code_size;
|
||||||
|
data_size += group->data_size;
|
||||||
|
combine_symbol_mask(group->trampoline_mask, state.trampolines.mask);
|
||||||
|
// Round up to the nearest page:
|
||||||
|
size_t page_size = get_page_size();
|
||||||
|
assert((page_size & (page_size - 1)) == 0);
|
||||||
|
size_t code_padding = DATA_ALIGN - ((code_size + state.trampolines.size) & (DATA_ALIGN - 1));
|
||||||
|
size_t padding = page_size - ((code_size + state.trampolines.size + code_padding + data_size) & (page_size - 1));
|
||||||
|
size_t total_size = code_size + state.trampolines.size + code_padding + data_size + padding;
|
||||||
|
unsigned char *memory = jit_alloc(total_size);
|
||||||
|
if (memory == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
unsigned char *code = memory;
|
||||||
|
state.trampolines.mem = memory + code_size;
|
||||||
|
unsigned char *data = memory + code_size + state.trampolines.size + code_padding;
|
||||||
|
// Compile the shim, which handles converting between the native
|
||||||
|
// calling convention and the calling convention used by jitted code
|
||||||
|
// (which may be different for efficiency reasons).
|
||||||
|
group = &trampoline;
|
||||||
|
group->emit(code, data, &dummy, NULL, &state);
|
||||||
|
code += group->code_size;
|
||||||
|
data += group->data_size;
|
||||||
|
assert(code == memory + code_size);
|
||||||
|
assert(data == memory + code_size + state.trampolines.size + code_padding + data_size);
|
||||||
|
if (mark_executable(memory, total_size)) {
|
||||||
|
jit_free(memory, total_size);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return (_PyJitEntryFuncPtr)memory;
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyMutex lazy_jit_mutex = { 0 };
|
||||||
|
|
||||||
|
_Py_CODEUNIT *
|
||||||
|
_Py_LazyJitTrampoline(
|
||||||
|
_PyExecutorObject *executor, _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate
|
||||||
|
) {
|
||||||
|
PyMutex_Lock(&lazy_jit_mutex);
|
||||||
|
if (_Py_jit_entry == _Py_LazyJitTrampoline) {
|
||||||
|
_PyJitEntryFuncPtr trampoline = compile_trampoline();
|
||||||
|
if (trampoline == NULL) {
|
||||||
|
PyMutex_Unlock(&lazy_jit_mutex);
|
||||||
|
Py_FatalError("Cannot allocate core JIT code");
|
||||||
|
}
|
||||||
|
_Py_jit_entry = trampoline;
|
||||||
|
}
|
||||||
|
PyMutex_Unlock(&lazy_jit_mutex);
|
||||||
|
return _Py_jit_entry(executor, frame, stack_pointer, tstate);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
_PyJIT_Free(_PyExecutorObject *executor)
|
_PyJIT_Free(_PyExecutorObject *executor)
|
||||||
{
|
{
|
||||||
|
|
@ -578,7 +631,6 @@ _PyJIT_Free(_PyExecutorObject *executor)
|
||||||
size_t size = executor->jit_size;
|
size_t size = executor->jit_size;
|
||||||
if (memory) {
|
if (memory) {
|
||||||
executor->jit_code = NULL;
|
executor->jit_code = NULL;
|
||||||
executor->jit_side_entry = NULL;
|
|
||||||
executor->jit_size = 0;
|
executor->jit_size = 0;
|
||||||
if (jit_free(memory, size)) {
|
if (jit_free(memory, size)) {
|
||||||
PyErr_FormatUnraisable("Exception ignored while "
|
PyErr_FormatUnraisable("Exception ignored while "
|
||||||
|
|
|
||||||
|
|
@ -1238,7 +1238,6 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil
|
||||||
#endif
|
#endif
|
||||||
#ifdef _Py_JIT
|
#ifdef _Py_JIT
|
||||||
executor->jit_code = NULL;
|
executor->jit_code = NULL;
|
||||||
executor->jit_side_entry = NULL;
|
|
||||||
executor->jit_size = 0;
|
executor->jit_size = 0;
|
||||||
// This is initialized to true so we can prevent the executor
|
// This is initialized to true so we can prevent the executor
|
||||||
// from being immediately detected as cold and invalidated.
|
// from being immediately detected as cold and invalidated.
|
||||||
|
|
@ -1490,7 +1489,6 @@ _PyExecutor_GetColdExecutor(void)
|
||||||
((_PyUOpInstruction *)cold->trace)->opcode = _COLD_EXIT;
|
((_PyUOpInstruction *)cold->trace)->opcode = _COLD_EXIT;
|
||||||
#ifdef _Py_JIT
|
#ifdef _Py_JIT
|
||||||
cold->jit_code = NULL;
|
cold->jit_code = NULL;
|
||||||
cold->jit_side_entry = NULL;
|
|
||||||
cold->jit_size = 0;
|
cold->jit_size = 0;
|
||||||
// This is initialized to true so we can prevent the executor
|
// This is initialized to true so we can prevent the executor
|
||||||
// from being immediately detected as cold and invalidated.
|
// from being immediately detected as cold and invalidated.
|
||||||
|
|
|
||||||
|
|
@ -494,6 +494,11 @@ free_interpreter(PyInterpreterState *interp)
|
||||||
static inline int check_interpreter_whence(long);
|
static inline int check_interpreter_whence(long);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
extern _Py_CODEUNIT *
|
||||||
|
_Py_LazyJitTrampoline(
|
||||||
|
struct _PyExecutorObject *exec, _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate
|
||||||
|
);
|
||||||
|
|
||||||
/* Get the interpreter state to a minimal consistent state.
|
/* Get the interpreter state to a minimal consistent state.
|
||||||
Further init happens in pylifecycle.c before it can be used.
|
Further init happens in pylifecycle.c before it can be used.
|
||||||
All fields not initialized here are expected to be zeroed out,
|
All fields not initialized here are expected to be zeroed out,
|
||||||
|
|
|
||||||
|
|
@ -191,8 +191,8 @@ async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]:
|
||||||
with tempfile.TemporaryDirectory() as tempdir:
|
with tempfile.TemporaryDirectory() as tempdir:
|
||||||
work = pathlib.Path(tempdir).resolve()
|
work = pathlib.Path(tempdir).resolve()
|
||||||
async with asyncio.TaskGroup() as group:
|
async with asyncio.TaskGroup() as group:
|
||||||
coro = self._compile("shim", TOOLS_JIT / "shim.c", work)
|
coro = self._compile("trampoline", TOOLS_JIT / "trampoline.c", work)
|
||||||
tasks.append(group.create_task(coro, name="shim"))
|
tasks.append(group.create_task(coro, name="trampoline"))
|
||||||
template = TOOLS_JIT_TEMPLATE_C.read_text()
|
template = TOOLS_JIT_TEMPLATE_C.read_text()
|
||||||
for case, opname in cases_and_opnames:
|
for case, opname in cases_and_opnames:
|
||||||
# Write out a copy of the template with *only* this case
|
# Write out a copy of the template with *only* this case
|
||||||
|
|
|
||||||
|
|
@ -22,11 +22,11 @@ def _dump_footer(
|
||||||
yield " symbol_mask trampoline_mask;"
|
yield " symbol_mask trampoline_mask;"
|
||||||
yield "} StencilGroup;"
|
yield "} StencilGroup;"
|
||||||
yield ""
|
yield ""
|
||||||
yield f"static const StencilGroup shim = {groups['shim'].as_c('shim')};"
|
yield f"static const StencilGroup trampoline = {groups['trampoline'].as_c('trampoline')};"
|
||||||
yield ""
|
yield ""
|
||||||
yield "static const StencilGroup stencil_groups[MAX_UOP_ID + 1] = {"
|
yield "static const StencilGroup stencil_groups[MAX_UOP_ID + 1] = {"
|
||||||
for opname, group in sorted(groups.items()):
|
for opname, group in sorted(groups.items()):
|
||||||
if opname == "shim":
|
if opname == "trampoline":
|
||||||
continue
|
continue
|
||||||
yield f" [{opname}] = {group.as_c(opname)},"
|
yield f" [{opname}] = {group.as_c(opname)},"
|
||||||
yield "};"
|
yield "};"
|
||||||
|
|
|
||||||
|
|
@ -1,15 +0,0 @@
|
||||||
#include "Python.h"
|
|
||||||
|
|
||||||
#include "pycore_ceval.h"
|
|
||||||
#include "pycore_frame.h"
|
|
||||||
#include "pycore_jit.h"
|
|
||||||
|
|
||||||
#include "jit.h"
|
|
||||||
|
|
||||||
_Py_CODEUNIT *
|
|
||||||
_JIT_ENTRY(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate)
|
|
||||||
{
|
|
||||||
// Note that this is *not* a tail call:
|
|
||||||
DECLARE_TARGET(_JIT_CONTINUE);
|
|
||||||
return _JIT_CONTINUE(frame, stack_pointer, tstate);
|
|
||||||
}
|
|
||||||
|
|
@ -46,12 +46,12 @@
|
||||||
#undef CURRENT_TARGET
|
#undef CURRENT_TARGET
|
||||||
#define CURRENT_TARGET() (_target)
|
#define CURRENT_TARGET() (_target)
|
||||||
|
|
||||||
#undef GOTO_TIER_TWO
|
#undef TIER2_TO_TIER2
|
||||||
#define GOTO_TIER_TWO(EXECUTOR) \
|
#define TIER2_TO_TIER2(EXECUTOR) \
|
||||||
do { \
|
do { \
|
||||||
OPT_STAT_INC(traces_executed); \
|
OPT_STAT_INC(traces_executed); \
|
||||||
_PyExecutorObject *_executor = (EXECUTOR); \
|
_PyExecutorObject *_executor = (EXECUTOR); \
|
||||||
jit_func_preserve_none jitted = _executor->jit_side_entry; \
|
jit_func_preserve_none jitted = _executor->jit_code; \
|
||||||
__attribute__((musttail)) return jitted(frame, stack_pointer, tstate); \
|
__attribute__((musttail)) return jitted(frame, stack_pointer, tstate); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
|
|
||||||
16
Tools/jit/trampoline.c
Normal file
16
Tools/jit/trampoline.c
Normal file
|
|
@ -0,0 +1,16 @@
|
||||||
|
#include "Python.h"
|
||||||
|
|
||||||
|
#include "pycore_ceval.h"
|
||||||
|
#include "pycore_frame.h"
|
||||||
|
#include "pycore_jit.h"
|
||||||
|
|
||||||
|
#include "jit.h"
|
||||||
|
|
||||||
|
_Py_CODEUNIT *
|
||||||
|
_JIT_ENTRY(
|
||||||
|
_PyExecutorObject *exec, _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate
|
||||||
|
) {
|
||||||
|
typedef DECLARE_TARGET((*jit_func));
|
||||||
|
jit_func jitted = (jit_func)exec->jit_code;
|
||||||
|
return jitted(frame, stack_pointer, tstate);
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue