mirror of
https://github.com/python/cpython.git
synced 2026-01-30 03:02:30 +00:00
GH-139109: Partial reworking of JIT data structures (GH-144105)
* Halve size of buffers by reusing combined trace + optimizer buffers for TOS caching * Add simple buffer struct for more maintainable handling of buffers * Decouple JIT structs from thread state struct * Ensure terminator is added to trace, when optimizer gives up
This commit is contained in:
parent
fb690c38ca
commit
d77aaa7311
10 changed files with 228 additions and 215 deletions
|
|
@ -203,14 +203,14 @@ static inline void
|
|||
add_op(JitOptContext *ctx, _PyUOpInstruction *this_instr,
|
||||
uint16_t opcode, uint16_t oparg, uintptr_t operand0)
|
||||
{
|
||||
_PyUOpInstruction *out = &ctx->out_buffer[ctx->out_len];
|
||||
_PyUOpInstruction *out = ctx->out_buffer.next;
|
||||
out->opcode = (opcode);
|
||||
out->format = this_instr->format;
|
||||
out->oparg = (oparg);
|
||||
out->target = this_instr->target;
|
||||
out->operand0 = (operand0);
|
||||
out->operand1 = this_instr->operand1;
|
||||
ctx->out_len++;
|
||||
ctx->out_buffer.next++;
|
||||
}
|
||||
|
||||
/* Shortened forms for convenience, used in optimizer_bytecodes.c */
|
||||
|
|
@ -430,6 +430,7 @@ optimize_uops(
|
|||
_PyUOpInstruction *trace,
|
||||
int trace_len,
|
||||
int curr_stacklen,
|
||||
_PyUOpInstruction *output,
|
||||
_PyBloomFilter *dependencies
|
||||
)
|
||||
{
|
||||
|
|
@ -440,7 +441,7 @@ optimize_uops(
|
|||
JitOptContext *ctx = &tstate->jit_tracer_state->opt_context;
|
||||
uint32_t opcode = UINT16_MAX;
|
||||
|
||||
ctx->out_buffer = tstate->jit_tracer_state->out_buffer;
|
||||
uop_buffer_init(&ctx->out_buffer, output, UOP_MAX_TRACE_LENGTH);
|
||||
|
||||
// Make sure that watchers are set up
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
|
|
@ -458,14 +459,20 @@ optimize_uops(
|
|||
ctx->curr_frame_depth++;
|
||||
ctx->frame = frame;
|
||||
|
||||
ctx->out_len = 0;
|
||||
|
||||
_PyUOpInstruction *this_instr = NULL;
|
||||
JitOptRef *stack_pointer = ctx->frame->stack_pointer;
|
||||
|
||||
for (int i = 0; !ctx->done; i++) {
|
||||
assert(i < trace_len);
|
||||
for (int i = 0; i < trace_len; i++) {
|
||||
this_instr = &trace[i];
|
||||
if (ctx->done) {
|
||||
// Don't do any more optimization, but
|
||||
// we still need to reach a terminator for corrctness.
|
||||
*(ctx->out_buffer.next++) = *this_instr;
|
||||
if (is_terminator_uop(this_instr)) {
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
int oparg = this_instr->oparg;
|
||||
opcode = this_instr->opcode;
|
||||
|
|
@ -485,6 +492,8 @@ optimize_uops(
|
|||
}
|
||||
#endif
|
||||
|
||||
_PyUOpInstruction *out_ptr = ctx->out_buffer.next;
|
||||
|
||||
switch (opcode) {
|
||||
|
||||
#include "optimizer_cases.c.h"
|
||||
|
|
@ -494,8 +503,8 @@ optimize_uops(
|
|||
Py_UNREACHABLE();
|
||||
}
|
||||
// If no ADD_OP was called during this iteration, copy the original instruction
|
||||
if (ctx->out_len == i) {
|
||||
ctx->out_buffer[ctx->out_len++] = *this_instr;
|
||||
if (ctx->out_buffer.next == out_ptr) {
|
||||
*(ctx->out_buffer.next++) = *this_instr;
|
||||
}
|
||||
assert(ctx->frame != NULL);
|
||||
if (!CURRENT_FRAME_IS_INIT_SHIM()) {
|
||||
|
|
@ -526,20 +535,11 @@ optimize_uops(
|
|||
* would be no benefit in retrying later */
|
||||
_Py_uop_abstractcontext_fini(ctx);
|
||||
// Check that the trace ends with a proper terminator
|
||||
if (ctx->out_len > 0) {
|
||||
_PyUOpInstruction *last_uop = &ctx->out_buffer[ctx->out_len - 1];
|
||||
if (!is_terminator_uop(last_uop)) {
|
||||
// Copy remaining uops from original trace until we find a terminator
|
||||
for (int i = ctx->out_len; i < trace_len; i++) {
|
||||
ctx->out_buffer[ctx->out_len++] = trace[i];
|
||||
if (is_terminator_uop(&trace[i])) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (uop_buffer_length(&ctx->out_buffer) > 0) {
|
||||
assert(is_terminator_uop(uop_buffer_last(&ctx->out_buffer)));
|
||||
}
|
||||
|
||||
return ctx->out_len;
|
||||
return uop_buffer_length(&ctx->out_buffer);
|
||||
|
||||
error:
|
||||
DPRINTF(3, "\n");
|
||||
|
|
@ -696,14 +696,15 @@ _Py_uop_analyze_and_optimize(
|
|||
_PyUOpInstruction *buffer,
|
||||
int length,
|
||||
int curr_stacklen,
|
||||
_PyUOpInstruction *output,
|
||||
_PyBloomFilter *dependencies
|
||||
)
|
||||
{
|
||||
OPT_STAT_INC(optimizer_attempts);
|
||||
|
||||
length = optimize_uops(
|
||||
tstate, buffer,
|
||||
length, curr_stacklen, dependencies);
|
||||
tstate, buffer, length, curr_stacklen,
|
||||
output, dependencies);
|
||||
|
||||
if (length == 0) {
|
||||
return length;
|
||||
|
|
@ -711,7 +712,7 @@ _Py_uop_analyze_and_optimize(
|
|||
|
||||
assert(length > 0);
|
||||
|
||||
length = remove_unneeded_uops(tstate->jit_tracer_state->out_buffer, length);
|
||||
length = remove_unneeded_uops(output, length);
|
||||
assert(length > 0);
|
||||
|
||||
OPT_STAT_INC(optimizer_successes);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue