mirror of
https://github.com/python/cpython.git
synced 2026-01-29 02:32:18 +00:00
GH-139109: Partial reworking of JIT data structures (GH-144105)
* Halve size of buffers by reusing combined trace + optimizer buffers for TOS caching * Add simple buffer struct for more maintainable handling of buffers * Decouple JIT structs from thread state struct * Ensure terminator is added to trace, when optimizer gives up
This commit is contained in:
parent
fb690c38ca
commit
d77aaa7311
10 changed files with 228 additions and 215 deletions
|
|
@ -433,7 +433,7 @@ do { \
|
|||
JUMP_TO_LABEL(error); \
|
||||
} \
|
||||
if (keep_tracing_bit) { \
|
||||
assert(((_PyThreadStateImpl *)tstate)->jit_tracer_state->prev_state.code_curr_size == 2); \
|
||||
assert(uop_buffer_length(&((_PyThreadStateImpl *)tstate)->jit_tracer_state->code_buffer)); \
|
||||
ENTER_TRACING(); \
|
||||
DISPATCH_NON_TRACING(); \
|
||||
} \
|
||||
|
|
|
|||
|
|
@ -188,9 +188,6 @@ _PyOptimizer_Optimize(
|
|||
}
|
||||
insert_executor(code, start, index, executor);
|
||||
}
|
||||
else {
|
||||
executor->vm_data.code = NULL;
|
||||
}
|
||||
executor->vm_data.chain_depth = chain_depth;
|
||||
assert(executor->vm_data.valid);
|
||||
_PyExitData *exit = _tstate->jit_tracer_state->initial_state.exit;
|
||||
|
|
@ -547,52 +544,43 @@ guard_ip_uop[MAX_UOP_ID + 1] = {
|
|||
#endif
|
||||
|
||||
|
||||
static inline int
|
||||
static inline void
|
||||
add_to_trace(
|
||||
_PyUOpInstruction *trace,
|
||||
int trace_length,
|
||||
_PyJitUopBuffer *trace,
|
||||
uint16_t opcode,
|
||||
uint16_t oparg,
|
||||
uint64_t operand,
|
||||
uint32_t target)
|
||||
{
|
||||
trace[trace_length].opcode = opcode;
|
||||
trace[trace_length].format = UOP_FORMAT_TARGET;
|
||||
trace[trace_length].target = target;
|
||||
trace[trace_length].oparg = oparg;
|
||||
trace[trace_length].operand0 = operand;
|
||||
_PyUOpInstruction *inst = trace->next;
|
||||
inst->opcode = opcode;
|
||||
inst->format = UOP_FORMAT_TARGET;
|
||||
inst->target = target;
|
||||
inst->oparg = oparg;
|
||||
inst->operand0 = operand;
|
||||
#ifdef Py_STATS
|
||||
trace[trace_length].execution_count = 0;
|
||||
inst->execution_count = 0;
|
||||
#endif
|
||||
return trace_length + 1;
|
||||
trace->next++;
|
||||
}
|
||||
|
||||
|
||||
#ifdef Py_DEBUG
|
||||
#define ADD_TO_TRACE(OPCODE, OPARG, OPERAND, TARGET) \
|
||||
assert(trace_length < max_length); \
|
||||
trace_length = add_to_trace(trace, trace_length, (OPCODE), (OPARG), (OPERAND), (TARGET)); \
|
||||
add_to_trace(trace, (OPCODE), (OPARG), (OPERAND), (TARGET)); \
|
||||
if (lltrace >= 2) { \
|
||||
printf("%4d ADD_TO_TRACE: ", trace_length); \
|
||||
_PyUOpPrint(&trace[trace_length-1]); \
|
||||
printf("%4d ADD_TO_TRACE: ", uop_buffer_length(trace)); \
|
||||
_PyUOpPrint(uop_buffer_last(trace)); \
|
||||
printf("\n"); \
|
||||
}
|
||||
#else
|
||||
#define ADD_TO_TRACE(OPCODE, OPARG, OPERAND, TARGET) \
|
||||
assert(trace_length < max_length); \
|
||||
trace_length = add_to_trace(trace, trace_length, (OPCODE), (OPARG), (OPERAND), (TARGET));
|
||||
add_to_trace(trace, (OPCODE), (OPARG), (OPERAND), (TARGET))
|
||||
#endif
|
||||
|
||||
#define INSTR_IP(INSTR, CODE) \
|
||||
((uint32_t)((INSTR) - ((_Py_CODEUNIT *)(CODE)->co_code_adaptive)))
|
||||
|
||||
// Reserve space for n uops
|
||||
#define RESERVE_RAW(n, opname) \
|
||||
if (trace_length + (n) > max_length) { \
|
||||
DPRINTF(2, "No room for %s (need %d, got %d)\n", \
|
||||
(opname), (n), max_length - trace_length); \
|
||||
OPT_STAT_INC(trace_too_long); \
|
||||
goto full; \
|
||||
}
|
||||
|
||||
static int
|
||||
is_terminator(const _PyUOpInstruction *uop)
|
||||
|
|
@ -629,9 +617,7 @@ _PyJit_translate_single_bytecode_to_trace(
|
|||
PyCodeObject *old_code = tracer->prev_state.instr_code;
|
||||
bool progress_needed = (tracer->initial_state.chain_depth % MAX_CHAIN_DEPTH) == 0;
|
||||
_PyBloomFilter *dependencies = &tracer->prev_state.dependencies;
|
||||
int trace_length = tracer->prev_state.code_curr_size;
|
||||
_PyUOpInstruction *trace = tracer->code_buffer;
|
||||
int max_length = tracer->prev_state.code_max_size;
|
||||
_PyJitUopBuffer *trace = &tracer->code_buffer;
|
||||
|
||||
_Py_CODEUNIT *this_instr = tracer->prev_state.instr;
|
||||
_Py_CODEUNIT *target_instr = this_instr;
|
||||
|
|
@ -670,15 +656,13 @@ _PyJit_translate_single_bytecode_to_trace(
|
|||
}
|
||||
}
|
||||
|
||||
int old_stack_level = tracer->prev_state.instr_stacklevel;
|
||||
|
||||
// Strange control-flow
|
||||
bool has_dynamic_jump_taken = OPCODE_HAS_UNPREDICTABLE_JUMP(opcode) &&
|
||||
(next_instr != this_instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]]);
|
||||
|
||||
/* Special case the first instruction,
|
||||
* so that we can guarantee forward progress */
|
||||
if (progress_needed && tracer->prev_state.code_curr_size < CODE_SIZE_NO_PROGRESS) {
|
||||
if (progress_needed && uop_buffer_length(&tracer->code_buffer) < CODE_SIZE_NO_PROGRESS) {
|
||||
if (OPCODE_HAS_EXIT(opcode) || OPCODE_HAS_DEOPT(opcode)) {
|
||||
opcode = _PyOpcode_Deopt[opcode];
|
||||
}
|
||||
|
|
@ -694,7 +678,7 @@ _PyJit_translate_single_bytecode_to_trace(
|
|||
|
||||
int is_sys_tracing = (tstate->c_tracefunc != NULL) || (tstate->c_profilefunc != NULL);
|
||||
if (is_sys_tracing) {
|
||||
goto full;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (stop_tracing_opcode == _DEOPT) {
|
||||
|
|
@ -710,7 +694,7 @@ _PyJit_translate_single_bytecode_to_trace(
|
|||
goto done;
|
||||
}
|
||||
|
||||
DPRINTF(2, "%p %d: %s(%d) %d %d\n", old_code, target, _PyOpcode_OpName[opcode], oparg, needs_guard_ip, old_stack_level);
|
||||
DPRINTF(2, "%p %d: %s(%d) %d\n", old_code, target, _PyOpcode_OpName[opcode], oparg, needs_guard_ip);
|
||||
|
||||
#ifdef Py_DEBUG
|
||||
if (oparg > 255) {
|
||||
|
|
@ -719,7 +703,7 @@ _PyJit_translate_single_bytecode_to_trace(
|
|||
#endif
|
||||
|
||||
if (!tracer->prev_state.dependencies_still_valid) {
|
||||
goto full;
|
||||
goto done;
|
||||
}
|
||||
|
||||
// This happens when a recursive call happens that we can't trace. Such as Python -> C -> Python calls
|
||||
|
|
@ -734,16 +718,14 @@ _PyJit_translate_single_bytecode_to_trace(
|
|||
unsupported:
|
||||
{
|
||||
// Rewind to previous instruction and replace with _EXIT_TRACE.
|
||||
_PyUOpInstruction *curr = &trace[trace_length-1];
|
||||
while (curr->opcode != _SET_IP && trace_length > 2) {
|
||||
trace_length--;
|
||||
curr = &trace[trace_length-1];
|
||||
_PyUOpInstruction *curr = uop_buffer_last(trace);
|
||||
while (curr->opcode != _SET_IP && uop_buffer_length(trace) > 2) {
|
||||
trace->next--;
|
||||
curr = uop_buffer_last(trace);
|
||||
}
|
||||
assert(curr->opcode == _SET_IP || trace_length == 2);
|
||||
assert(curr->opcode == _SET_IP || uop_buffer_length(trace) == 2);
|
||||
if (curr->opcode == _SET_IP) {
|
||||
int32_t old_target = (int32_t)uop_get_target(curr);
|
||||
curr++;
|
||||
trace_length++;
|
||||
curr->opcode = _DEOPT;
|
||||
curr->format = UOP_FORMAT_TARGET;
|
||||
curr->target = old_target;
|
||||
|
|
@ -752,7 +734,6 @@ _PyJit_translate_single_bytecode_to_trace(
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
if (opcode == NOP) {
|
||||
return 1;
|
||||
}
|
||||
|
|
@ -766,7 +747,7 @@ _PyJit_translate_single_bytecode_to_trace(
|
|||
}
|
||||
|
||||
// One for possible _DEOPT, one because _CHECK_VALIDITY itself might _DEOPT
|
||||
max_length -= 2;
|
||||
trace->end -= 2;
|
||||
|
||||
const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode];
|
||||
|
||||
|
|
@ -775,18 +756,28 @@ _PyJit_translate_single_bytecode_to_trace(
|
|||
|
||||
|
||||
if (OPCODE_HAS_EXIT(opcode)) {
|
||||
// Make space for side exit and final _EXIT_TRACE:
|
||||
max_length--;
|
||||
// Make space for side exit
|
||||
trace->end--;
|
||||
}
|
||||
if (OPCODE_HAS_ERROR(opcode)) {
|
||||
// Make space for error stub and final _EXIT_TRACE:
|
||||
max_length--;
|
||||
// Make space for error stub
|
||||
trace->end--;
|
||||
}
|
||||
if (OPCODE_HAS_DEOPT(opcode)) {
|
||||
// Make space for side exit
|
||||
trace->end--;
|
||||
}
|
||||
|
||||
// _GUARD_IP leads to an exit.
|
||||
max_length -= needs_guard_ip;
|
||||
trace->end -= needs_guard_ip;
|
||||
|
||||
RESERVE_RAW(expansion->nuops + needs_guard_ip + 2 + (!OPCODE_HAS_NO_SAVE_IP(opcode)), "uop and various checks");
|
||||
int space_needed = expansion->nuops + needs_guard_ip + 2 + (!OPCODE_HAS_NO_SAVE_IP(opcode));
|
||||
if (uop_buffer_remaining_space(trace) < space_needed) {
|
||||
DPRINTF(2, "No room for expansions and guards (need %d, got %d)\n",
|
||||
space_needed, uop_buffer_remaining_space(trace));
|
||||
OPT_STAT_INC(trace_too_long);
|
||||
goto done;
|
||||
}
|
||||
|
||||
ADD_TO_TRACE(_CHECK_VALIDITY, 0, 0, target);
|
||||
|
||||
|
|
@ -825,7 +816,7 @@ _PyJit_translate_single_bytecode_to_trace(
|
|||
{
|
||||
if ((next_instr != tracer->initial_state.close_loop_instr) &&
|
||||
(next_instr != tracer->initial_state.start_instr) &&
|
||||
tracer->prev_state.code_curr_size > CODE_SIZE_NO_PROGRESS &&
|
||||
uop_buffer_length(&tracer->code_buffer) > CODE_SIZE_NO_PROGRESS &&
|
||||
// For side exits, we don't want to terminate them early.
|
||||
tracer->initial_state.exit == NULL &&
|
||||
// These are coroutines, and we want to unroll those usually.
|
||||
|
|
@ -836,7 +827,7 @@ _PyJit_translate_single_bytecode_to_trace(
|
|||
// inner loop might start and let the traces rejoin.
|
||||
OPT_STAT_INC(inner_loop);
|
||||
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
|
||||
trace[trace_length-1].operand1 = true; // is_control_flow
|
||||
uop_buffer_last(trace)->operand1 = true; // is_control_flow
|
||||
DPRINTF(2, "JUMP_BACKWARD not to top ends trace %p %p %p\n", next_instr,
|
||||
tracer->initial_state.close_loop_instr, tracer->initial_state.start_instr);
|
||||
goto done;
|
||||
|
|
@ -913,19 +904,19 @@ _PyJit_translate_single_bytecode_to_trace(
|
|||
}
|
||||
break;
|
||||
case OPERAND1_1:
|
||||
assert(trace[trace_length-1].opcode == uop);
|
||||
assert(uop_buffer_last(trace)->opcode == uop);
|
||||
operand = read_u16(&this_instr[offset].cache);
|
||||
trace[trace_length-1].operand1 = operand;
|
||||
uop_buffer_last(trace)->operand1 = operand;
|
||||
continue;
|
||||
case OPERAND1_2:
|
||||
assert(trace[trace_length-1].opcode == uop);
|
||||
assert(uop_buffer_last(trace)->opcode == uop);
|
||||
operand = read_u32(&this_instr[offset].cache);
|
||||
trace[trace_length-1].operand1 = operand;
|
||||
uop_buffer_last(trace)->operand1 = operand;
|
||||
continue;
|
||||
case OPERAND1_4:
|
||||
assert(trace[trace_length-1].opcode == uop);
|
||||
assert(uop_buffer_last(trace)->opcode == uop);
|
||||
operand = read_u64(&this_instr[offset].cache);
|
||||
trace[trace_length-1].operand1 = operand;
|
||||
uop_buffer_last(trace)->operand1 = operand;
|
||||
continue;
|
||||
default:
|
||||
fprintf(stderr,
|
||||
|
|
@ -955,7 +946,7 @@ _PyJit_translate_single_bytecode_to_trace(
|
|||
}
|
||||
}
|
||||
ADD_TO_TRACE(uop, oparg, operand, target);
|
||||
trace[trace_length - 1].operand1 = PyStackRef_IsNone(frame->f_executable) ? 2 : ((int)(frame->stackpointer - _PyFrame_Stackbase(frame)));
|
||||
uop_buffer_last(trace)->operand1 = PyStackRef_IsNone(frame->f_executable) ? 2 : ((int)(frame->stackpointer - _PyFrame_Stackbase(frame)));
|
||||
break;
|
||||
}
|
||||
if (uop == _BINARY_OP_INPLACE_ADD_UNICODE) {
|
||||
|
|
@ -973,9 +964,9 @@ _PyJit_translate_single_bytecode_to_trace(
|
|||
} // End switch (opcode)
|
||||
|
||||
if (needs_guard_ip) {
|
||||
uint16_t guard_ip = guard_ip_uop[trace[trace_length-1].opcode];
|
||||
uint16_t guard_ip = guard_ip_uop[uop_buffer_last(trace)->opcode];
|
||||
if (guard_ip == 0) {
|
||||
DPRINTF(1, "Unknown uop needing guard ip %s\n", _PyOpcode_uop_name[trace[trace_length-1].opcode]);
|
||||
DPRINTF(1, "Unknown uop needing guard ip %s\n", _PyOpcode_uop_name[uop_buffer_last(trace)->opcode]);
|
||||
Py_UNREACHABLE();
|
||||
}
|
||||
ADD_TO_TRACE(guard_ip, 0, (uintptr_t)next_instr, 0);
|
||||
|
|
@ -983,7 +974,7 @@ _PyJit_translate_single_bytecode_to_trace(
|
|||
// Loop back to the start
|
||||
int is_first_instr = tracer->initial_state.close_loop_instr == next_instr ||
|
||||
tracer->initial_state.start_instr == next_instr;
|
||||
if (is_first_instr && tracer->prev_state.code_curr_size > CODE_SIZE_NO_PROGRESS) {
|
||||
if (is_first_instr && uop_buffer_length(trace) > CODE_SIZE_NO_PROGRESS) {
|
||||
if (needs_guard_ip) {
|
||||
ADD_TO_TRACE(_SET_IP, 0, (uintptr_t)next_instr, 0);
|
||||
}
|
||||
|
|
@ -991,27 +982,13 @@ _PyJit_translate_single_bytecode_to_trace(
|
|||
goto done;
|
||||
}
|
||||
DPRINTF(2, "Trace continuing\n");
|
||||
tracer->prev_state.code_curr_size = trace_length;
|
||||
tracer->prev_state.code_max_size = max_length;
|
||||
return 1;
|
||||
done:
|
||||
DPRINTF(2, "Trace done\n");
|
||||
tracer->prev_state.code_curr_size = trace_length;
|
||||
tracer->prev_state.code_max_size = max_length;
|
||||
return 0;
|
||||
full:
|
||||
DPRINTF(2, "Trace full\n");
|
||||
if (!is_terminator(&tracer->code_buffer[trace_length-1])) {
|
||||
// Undo the last few instructions.
|
||||
trace_length = tracer->prev_state.code_curr_size;
|
||||
max_length = tracer->prev_state.code_max_size;
|
||||
// We previously reversed one.
|
||||
max_length += 1;
|
||||
if (!is_terminator(uop_buffer_last(trace))) {
|
||||
ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target);
|
||||
trace[trace_length-1].operand1 = true; // is_control_flow
|
||||
uop_buffer_last(trace)->operand1 = true; // is_control_flow
|
||||
}
|
||||
tracer->prev_state.code_curr_size = trace_length;
|
||||
tracer->prev_state.code_max_size = max_length;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -1059,11 +1036,12 @@ _PyJit_TryInitializeTracing(
|
|||
2 * INSTR_IP(close_loop_instr, code),
|
||||
chain_depth);
|
||||
#endif
|
||||
add_to_trace(tracer->code_buffer, 0, _START_EXECUTOR, 0, (uintptr_t)start_instr, INSTR_IP(start_instr, code));
|
||||
add_to_trace(tracer->code_buffer, 1, _MAKE_WARM, 0, 0, 0);
|
||||
tracer->prev_state.code_curr_size = CODE_SIZE_EMPTY;
|
||||
/* Set up tracing buffer*/
|
||||
_PyJitUopBuffer *trace = &tracer->code_buffer;
|
||||
uop_buffer_init(trace, &tracer->uop_array[0], UOP_MAX_TRACE_LENGTH);
|
||||
ADD_TO_TRACE(_START_EXECUTOR, 0, (uintptr_t)start_instr, INSTR_IP(start_instr, code));
|
||||
ADD_TO_TRACE(_MAKE_WARM, 0, 0, 0);
|
||||
|
||||
tracer->prev_state.code_max_size = UOP_MAX_TRACE_LENGTH/2;
|
||||
tracer->initial_state.start_instr = start_instr;
|
||||
tracer->initial_state.close_loop_instr = close_loop_instr;
|
||||
tracer->initial_state.code = (PyCodeObject *)Py_NewRef(code);
|
||||
|
|
@ -1122,8 +1100,7 @@ _PyJit_FinalizeTracing(PyThreadState *tstate, int err)
|
|||
Py_CLEAR(tracer->initial_state.func);
|
||||
Py_CLEAR(tracer->initial_state.executor);
|
||||
Py_CLEAR(tracer->prev_state.instr_code);
|
||||
tracer->prev_state.code_curr_size = CODE_SIZE_EMPTY;
|
||||
tracer->prev_state.code_max_size = UOP_MAX_TRACE_LENGTH/2 - 1;
|
||||
uop_buffer_init(&tracer->code_buffer, &tracer->uop_array[0], UOP_MAX_TRACE_LENGTH);
|
||||
tracer->is_tracing = false;
|
||||
}
|
||||
|
||||
|
|
@ -1137,7 +1114,6 @@ _PyJit_TracerFree(_PyThreadStateImpl *_tstate)
|
|||
}
|
||||
|
||||
#undef RESERVE
|
||||
#undef RESERVE_RAW
|
||||
#undef INSTR_IP
|
||||
#undef ADD_TO_TRACE
|
||||
#undef DPRINTF
|
||||
|
|
@ -1467,39 +1443,47 @@ int effective_trace_length(_PyUOpInstruction *buffer, int length)
|
|||
|
||||
|
||||
static int
|
||||
stack_allocate(_PyUOpInstruction *buffer, int length)
|
||||
stack_allocate(_PyUOpInstruction *buffer, _PyUOpInstruction *output, int length)
|
||||
{
|
||||
assert(buffer[0].opcode == _START_EXECUTOR);
|
||||
for (int i = length-1; i >= 0; i--) {
|
||||
buffer[i*2+1] = buffer[i];
|
||||
buffer[i*2].format = UOP_FORMAT_TARGET;
|
||||
buffer[i*2].oparg = 0;
|
||||
buffer[i*2].target = 0;
|
||||
/* The input buffer and output buffers will overlap.
|
||||
Make sure that we can move instructions to the output
|
||||
without overwriting the input. */
|
||||
if (buffer == output) {
|
||||
// This can only happen if optimizer has not been run
|
||||
for (int i = 0; i < length; i++) {
|
||||
buffer[i + UOP_MAX_TRACE_LENGTH] = buffer[i];
|
||||
}
|
||||
buffer += UOP_MAX_TRACE_LENGTH;
|
||||
}
|
||||
else {
|
||||
assert(output + UOP_MAX_TRACE_LENGTH == buffer);
|
||||
}
|
||||
int depth = 0;
|
||||
_PyUOpInstruction *write = output;
|
||||
for (int i = 0; i < length; i++) {
|
||||
_PyUOpInstruction *spill_or_reload = &buffer[i*2];
|
||||
int uop = buffer[i*2+1].opcode;
|
||||
int uop = buffer[i].opcode;
|
||||
if (uop == _NOP) {
|
||||
// leave _NOPs to be cleaned up later
|
||||
spill_or_reload->opcode = _NOP;
|
||||
continue;
|
||||
}
|
||||
int new_depth = _PyUop_Caching[uop].best[depth];
|
||||
if (new_depth == depth) {
|
||||
spill_or_reload->opcode = _NOP;
|
||||
}
|
||||
else {
|
||||
spill_or_reload->opcode = _PyUop_SpillsAndReloads[depth][new_depth];
|
||||
if (new_depth != depth) {
|
||||
write->opcode = _PyUop_SpillsAndReloads[depth][new_depth];
|
||||
assert(write->opcode != 0);
|
||||
write->format = UOP_FORMAT_TARGET;
|
||||
write->oparg = 0;
|
||||
write->target = 0;
|
||||
write++;
|
||||
depth = new_depth;
|
||||
}
|
||||
*write = buffer[i];
|
||||
uint16_t new_opcode = _PyUop_Caching[uop].entries[depth].opcode;
|
||||
assert(new_opcode != 0);
|
||||
assert(spill_or_reload->opcode != 0);
|
||||
buffer[i*2+1].opcode = new_opcode;
|
||||
write->opcode = new_opcode;
|
||||
write++;
|
||||
depth = _PyUop_Caching[uop].entries[depth].output;
|
||||
}
|
||||
return length*2;
|
||||
return write - output;
|
||||
}
|
||||
|
||||
static int
|
||||
|
|
@ -1512,28 +1496,28 @@ uop_optimize(
|
|||
_PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
|
||||
assert(_tstate->jit_tracer_state != NULL);
|
||||
_PyBloomFilter *dependencies = &_tstate->jit_tracer_state->prev_state.dependencies;
|
||||
_PyUOpInstruction *buffer = _tstate->jit_tracer_state->code_buffer;
|
||||
_PyUOpInstruction *buffer = _tstate->jit_tracer_state->code_buffer.start;
|
||||
OPT_STAT_INC(attempts);
|
||||
bool is_noopt = !tstate->interp->opt_config.uops_optimize_enabled;
|
||||
int curr_stackentries = _tstate->jit_tracer_state->initial_state.stack_depth;
|
||||
int length = _tstate->jit_tracer_state->prev_state.code_curr_size;
|
||||
int length = uop_buffer_length(&_tstate->jit_tracer_state->code_buffer);
|
||||
if (length <= CODE_SIZE_NO_PROGRESS) {
|
||||
return 0;
|
||||
}
|
||||
assert(length > 0);
|
||||
assert(length < UOP_MAX_TRACE_LENGTH/2);
|
||||
assert(length < UOP_MAX_TRACE_LENGTH);
|
||||
OPT_STAT_INC(traces_created);
|
||||
if (!is_noopt) {
|
||||
_PyUOpInstruction *output = &_tstate->jit_tracer_state->uop_array[UOP_MAX_TRACE_LENGTH];
|
||||
length = _Py_uop_analyze_and_optimize(
|
||||
_tstate,
|
||||
buffer, length,
|
||||
curr_stackentries, dependencies);
|
||||
_tstate, buffer, length, curr_stackentries,
|
||||
output, dependencies);
|
||||
if (length <= 0) {
|
||||
return length;
|
||||
}
|
||||
buffer = _tstate->jit_tracer_state->out_buffer;
|
||||
buffer = output;
|
||||
}
|
||||
assert(length < UOP_MAX_TRACE_LENGTH/2);
|
||||
assert(length < UOP_MAX_TRACE_LENGTH);
|
||||
assert(length >= 1);
|
||||
/* Fix up */
|
||||
for (int pc = 0; pc < length; pc++) {
|
||||
|
|
@ -1549,7 +1533,9 @@ uop_optimize(
|
|||
assert(_PyOpcode_uop_name[buffer[pc].opcode]);
|
||||
}
|
||||
OPT_HIST(effective_trace_length(buffer, length), optimized_trace_length_hist);
|
||||
length = stack_allocate(buffer, length);
|
||||
_PyUOpInstruction *output = &_tstate->jit_tracer_state->uop_array[0];
|
||||
length = stack_allocate(buffer, output, length);
|
||||
buffer = output;
|
||||
length = prepare_for_execution(buffer, length);
|
||||
assert(length <= UOP_MAX_TRACE_LENGTH);
|
||||
_PyExecutorObject *executor = make_executor_from_uops(
|
||||
|
|
@ -1707,6 +1693,7 @@ _Py_ExecutorInit(_PyExecutorObject *executor, const _PyBloomFilter *dependency_s
|
|||
{
|
||||
executor->vm_data.valid = true;
|
||||
executor->vm_data.pending_deletion = 0;
|
||||
executor->vm_data.code = NULL;
|
||||
for (int i = 0; i < _Py_BLOOM_FILTER_WORDS; i++) {
|
||||
executor->vm_data.bloom.bits[i] = dependency_set->bits[i];
|
||||
}
|
||||
|
|
|
|||
|
|
@ -203,14 +203,14 @@ static inline void
|
|||
add_op(JitOptContext *ctx, _PyUOpInstruction *this_instr,
|
||||
uint16_t opcode, uint16_t oparg, uintptr_t operand0)
|
||||
{
|
||||
_PyUOpInstruction *out = &ctx->out_buffer[ctx->out_len];
|
||||
_PyUOpInstruction *out = ctx->out_buffer.next;
|
||||
out->opcode = (opcode);
|
||||
out->format = this_instr->format;
|
||||
out->oparg = (oparg);
|
||||
out->target = this_instr->target;
|
||||
out->operand0 = (operand0);
|
||||
out->operand1 = this_instr->operand1;
|
||||
ctx->out_len++;
|
||||
ctx->out_buffer.next++;
|
||||
}
|
||||
|
||||
/* Shortened forms for convenience, used in optimizer_bytecodes.c */
|
||||
|
|
@ -430,6 +430,7 @@ optimize_uops(
|
|||
_PyUOpInstruction *trace,
|
||||
int trace_len,
|
||||
int curr_stacklen,
|
||||
_PyUOpInstruction *output,
|
||||
_PyBloomFilter *dependencies
|
||||
)
|
||||
{
|
||||
|
|
@ -440,7 +441,7 @@ optimize_uops(
|
|||
JitOptContext *ctx = &tstate->jit_tracer_state->opt_context;
|
||||
uint32_t opcode = UINT16_MAX;
|
||||
|
||||
ctx->out_buffer = tstate->jit_tracer_state->out_buffer;
|
||||
uop_buffer_init(&ctx->out_buffer, output, UOP_MAX_TRACE_LENGTH);
|
||||
|
||||
// Make sure that watchers are set up
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
|
|
@ -458,14 +459,20 @@ optimize_uops(
|
|||
ctx->curr_frame_depth++;
|
||||
ctx->frame = frame;
|
||||
|
||||
ctx->out_len = 0;
|
||||
|
||||
_PyUOpInstruction *this_instr = NULL;
|
||||
JitOptRef *stack_pointer = ctx->frame->stack_pointer;
|
||||
|
||||
for (int i = 0; !ctx->done; i++) {
|
||||
assert(i < trace_len);
|
||||
for (int i = 0; i < trace_len; i++) {
|
||||
this_instr = &trace[i];
|
||||
if (ctx->done) {
|
||||
// Don't do any more optimization, but
|
||||
// we still need to reach a terminator for corrctness.
|
||||
*(ctx->out_buffer.next++) = *this_instr;
|
||||
if (is_terminator_uop(this_instr)) {
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
int oparg = this_instr->oparg;
|
||||
opcode = this_instr->opcode;
|
||||
|
|
@ -485,6 +492,8 @@ optimize_uops(
|
|||
}
|
||||
#endif
|
||||
|
||||
_PyUOpInstruction *out_ptr = ctx->out_buffer.next;
|
||||
|
||||
switch (opcode) {
|
||||
|
||||
#include "optimizer_cases.c.h"
|
||||
|
|
@ -494,8 +503,8 @@ optimize_uops(
|
|||
Py_UNREACHABLE();
|
||||
}
|
||||
// If no ADD_OP was called during this iteration, copy the original instruction
|
||||
if (ctx->out_len == i) {
|
||||
ctx->out_buffer[ctx->out_len++] = *this_instr;
|
||||
if (ctx->out_buffer.next == out_ptr) {
|
||||
*(ctx->out_buffer.next++) = *this_instr;
|
||||
}
|
||||
assert(ctx->frame != NULL);
|
||||
if (!CURRENT_FRAME_IS_INIT_SHIM()) {
|
||||
|
|
@ -526,20 +535,11 @@ optimize_uops(
|
|||
* would be no benefit in retrying later */
|
||||
_Py_uop_abstractcontext_fini(ctx);
|
||||
// Check that the trace ends with a proper terminator
|
||||
if (ctx->out_len > 0) {
|
||||
_PyUOpInstruction *last_uop = &ctx->out_buffer[ctx->out_len - 1];
|
||||
if (!is_terminator_uop(last_uop)) {
|
||||
// Copy remaining uops from original trace until we find a terminator
|
||||
for (int i = ctx->out_len; i < trace_len; i++) {
|
||||
ctx->out_buffer[ctx->out_len++] = trace[i];
|
||||
if (is_terminator_uop(&trace[i])) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (uop_buffer_length(&ctx->out_buffer) > 0) {
|
||||
assert(is_terminator_uop(uop_buffer_last(&ctx->out_buffer)));
|
||||
}
|
||||
|
||||
return ctx->out_len;
|
||||
return uop_buffer_length(&ctx->out_buffer);
|
||||
|
||||
error:
|
||||
DPRINTF(3, "\n");
|
||||
|
|
@ -696,14 +696,15 @@ _Py_uop_analyze_and_optimize(
|
|||
_PyUOpInstruction *buffer,
|
||||
int length,
|
||||
int curr_stacklen,
|
||||
_PyUOpInstruction *output,
|
||||
_PyBloomFilter *dependencies
|
||||
)
|
||||
{
|
||||
OPT_STAT_INC(optimizer_attempts);
|
||||
|
||||
length = optimize_uops(
|
||||
tstate, buffer,
|
||||
length, curr_stacklen, dependencies);
|
||||
tstate, buffer, length, curr_stacklen,
|
||||
output, dependencies);
|
||||
|
||||
if (length == 0) {
|
||||
return length;
|
||||
|
|
@ -711,7 +712,7 @@ _Py_uop_analyze_and_optimize(
|
|||
|
||||
assert(length > 0);
|
||||
|
||||
length = remove_unneeded_uops(tstate->jit_tracer_state->out_buffer, length);
|
||||
length = remove_unneeded_uops(output, length);
|
||||
assert(length > 0);
|
||||
|
||||
OPT_STAT_INC(optimizer_successes);
|
||||
|
|
|
|||
|
|
@ -194,7 +194,6 @@ dummy_func(void) {
|
|||
_Py_BloomFilter_Add(dependencies, type);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -798,7 +797,7 @@ dummy_func(void) {
|
|||
if (sym_is_const(ctx, callable) && sym_matches_type(callable, &PyFunction_Type)) {
|
||||
assert(PyFunction_Check(sym_get_const(ctx, callable)));
|
||||
ADD_OP(_CHECK_FUNCTION_VERSION_INLINE, 0, func_version);
|
||||
ctx->out_buffer[ctx->out_len - 1].operand1 = (uintptr_t)sym_get_const(ctx, callable);
|
||||
uop_buffer_last(&ctx->out_buffer)->operand1 = (uintptr_t)sym_get_const(ctx, callable);
|
||||
}
|
||||
sym_set_type(callable, &PyFunction_Type);
|
||||
}
|
||||
|
|
@ -808,7 +807,7 @@ dummy_func(void) {
|
|||
PyMethodObject *method = (PyMethodObject *)sym_get_const(ctx, callable);
|
||||
assert(PyMethod_Check(method));
|
||||
ADD_OP(_CHECK_FUNCTION_VERSION_INLINE, 0, func_version);
|
||||
ctx->out_buffer[ctx->out_len - 1].operand1 = (uintptr_t)method->im_func;
|
||||
uop_buffer_last(&ctx->out_buffer)->operand1 = (uintptr_t)method->im_func;
|
||||
}
|
||||
sym_set_type(callable, &PyMethod_Type);
|
||||
}
|
||||
|
|
@ -1570,7 +1569,7 @@ dummy_func(void) {
|
|||
ctx->frame->globals_watched = true;
|
||||
}
|
||||
if (ctx->frame->globals_checked_version != version && this_instr[-1].opcode == _NOP) {
|
||||
REPLACE_OP(&ctx->out_buffer[ctx->out_len - 1], _GUARD_GLOBALS_VERSION, 0, version);
|
||||
REPLACE_OP(uop_buffer_last(&ctx->out_buffer), _GUARD_GLOBALS_VERSION, 0, version);
|
||||
ctx->frame->globals_checked_version = version;
|
||||
}
|
||||
if (ctx->frame->globals_checked_version == version) {
|
||||
|
|
|
|||
6
Python/optimizer_cases.c.h
generated
6
Python/optimizer_cases.c.h
generated
|
|
@ -1557,7 +1557,7 @@
|
|||
ctx->frame->globals_watched = true;
|
||||
}
|
||||
if (ctx->frame->globals_checked_version != version && this_instr[-1].opcode == _NOP) {
|
||||
REPLACE_OP(&ctx->out_buffer[ctx->out_len - 1], _GUARD_GLOBALS_VERSION, 0, version);
|
||||
REPLACE_OP(uop_buffer_last(&ctx->out_buffer), _GUARD_GLOBALS_VERSION, 0, version);
|
||||
ctx->frame->globals_checked_version = version;
|
||||
}
|
||||
if (ctx->frame->globals_checked_version == version) {
|
||||
|
|
@ -2861,7 +2861,7 @@
|
|||
if (sym_is_const(ctx, callable) && sym_matches_type(callable, &PyFunction_Type)) {
|
||||
assert(PyFunction_Check(sym_get_const(ctx, callable)));
|
||||
ADD_OP(_CHECK_FUNCTION_VERSION_INLINE, 0, func_version);
|
||||
ctx->out_buffer[ctx->out_len - 1].operand1 = (uintptr_t)sym_get_const(ctx, callable);
|
||||
uop_buffer_last(&ctx->out_buffer)->operand1 = (uintptr_t)sym_get_const(ctx, callable);
|
||||
}
|
||||
sym_set_type(callable, &PyFunction_Type);
|
||||
break;
|
||||
|
|
@ -2879,7 +2879,7 @@
|
|||
PyMethodObject *method = (PyMethodObject *)sym_get_const(ctx, callable);
|
||||
assert(PyMethod_Check(method));
|
||||
ADD_OP(_CHECK_FUNCTION_VERSION_INLINE, 0, func_version);
|
||||
ctx->out_buffer[ctx->out_len - 1].operand1 = (uintptr_t)method->im_func;
|
||||
uop_buffer_last(&ctx->out_buffer)->operand1 = (uintptr_t)method->im_func;
|
||||
}
|
||||
sym_set_type(callable, &PyMethod_Type);
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -24,7 +24,6 @@
|
|||
#include "pycore_stackref.h" // Py_STACKREF_DEBUG
|
||||
#include "pycore_stats.h" // FT_STAT_WORLD_STOP_INC()
|
||||
#include "pycore_time.h" // _PyTime_Init()
|
||||
#include "pycore_uop.h" // UOP_BUFFER_SIZE
|
||||
#include "pycore_uniqueid.h" // _PyObject_FinalizePerThreadRefcounts()
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue