gh-148211: decompose [_POP_TWO/_INSERT_2]_LOAD_CONST_INLINE_BORROW in JIT (GH-148357)

This commit is contained in:
Neko Asakura 2026-04-11 06:27:51 -04:00 committed by GitHub
parent 639f218f9c
commit 72006a71b2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 1151 additions and 1418 deletions

View file

@ -5918,11 +5918,6 @@ dummy_func(
value = PyStackRef_FromPyObjectNew(ptr);
}
tier2 pure op (_POP_TOP_LOAD_CONST_INLINE, (ptr/4, pop -- value)) {
PyStackRef_CLOSE(pop);
value = PyStackRef_FromPyObjectNew(ptr);
}
tier2 pure op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) {
value = PyStackRef_FromPyObjectBorrow(ptr);
}
@ -5948,24 +5943,6 @@ dummy_func(
PyStackRef_CLOSE(callable);
}
tier2 op(_POP_TOP_LOAD_CONST_INLINE_BORROW, (ptr/4, pop -- value)) {
PyStackRef_CLOSE(pop);
value = PyStackRef_FromPyObjectBorrow(ptr);
}
tier2 op(_POP_TWO_LOAD_CONST_INLINE_BORROW, (ptr/4, pop1, pop2 -- value)) {
PyStackRef_CLOSE(pop2);
PyStackRef_CLOSE(pop1);
value = PyStackRef_FromPyObjectBorrow(ptr);
}
tier2 op(_INSERT_2_LOAD_CONST_INLINE_BORROW, (ptr/4, left, right -- res, l, r)) {
res = PyStackRef_FromPyObjectBorrow(ptr);
l = left;
r = right;
INPUTS_DEAD();
}
tier2 op(_SHUFFLE_3_LOAD_CONST_INLINE_BORROW, (ptr/4, callable, null, arg -- res, a, c)) {
res = PyStackRef_FromPyObjectBorrow(ptr);
a = arg;

View file

@ -21234,26 +21234,6 @@
break;
}
case _POP_TOP_LOAD_CONST_INLINE_r11: {
CHECK_CURRENT_CACHED_VALUES(1);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
_PyStackRef pop;
_PyStackRef value;
_PyStackRef _stack_item_0 = _tos_cache0;
pop = _stack_item_0;
PyObject *ptr = (PyObject *)CURRENT_OPERAND0_64();
_PyFrame_SetStackPointer(frame, stack_pointer);
PyStackRef_CLOSE(pop);
stack_pointer = _PyFrame_GetStackPointer(frame);
value = PyStackRef_FromPyObjectNew(ptr);
_tos_cache0 = value;
_tos_cache1 = PyStackRef_ZERO_BITS;
_tos_cache2 = PyStackRef_ZERO_BITS;
SET_CURRENT_CACHED_VALUES(1);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
break;
}
case _LOAD_CONST_INLINE_BORROW_r01: {
CHECK_CURRENT_CACHED_VALUES(0);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
@ -21390,130 +21370,6 @@
break;
}
case _POP_TOP_LOAD_CONST_INLINE_BORROW_r11: {
CHECK_CURRENT_CACHED_VALUES(1);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
_PyStackRef pop;
_PyStackRef value;
_PyStackRef _stack_item_0 = _tos_cache0;
pop = _stack_item_0;
PyObject *ptr = (PyObject *)CURRENT_OPERAND0_64();
_PyFrame_SetStackPointer(frame, stack_pointer);
PyStackRef_CLOSE(pop);
stack_pointer = _PyFrame_GetStackPointer(frame);
value = PyStackRef_FromPyObjectBorrow(ptr);
_tos_cache0 = value;
_tos_cache1 = PyStackRef_ZERO_BITS;
_tos_cache2 = PyStackRef_ZERO_BITS;
SET_CURRENT_CACHED_VALUES(1);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
break;
}
case _POP_TWO_LOAD_CONST_INLINE_BORROW_r21: {
CHECK_CURRENT_CACHED_VALUES(2);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
_PyStackRef pop2;
_PyStackRef pop1;
_PyStackRef value;
_PyStackRef _stack_item_0 = _tos_cache0;
_PyStackRef _stack_item_1 = _tos_cache1;
pop2 = _stack_item_1;
pop1 = _stack_item_0;
PyObject *ptr = (PyObject *)CURRENT_OPERAND0_64();
stack_pointer[0] = pop1;
stack_pointer += 1;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
_PyFrame_SetStackPointer(frame, stack_pointer);
PyStackRef_CLOSE(pop2);
stack_pointer = _PyFrame_GetStackPointer(frame);
stack_pointer += -1;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
_PyFrame_SetStackPointer(frame, stack_pointer);
PyStackRef_CLOSE(pop1);
stack_pointer = _PyFrame_GetStackPointer(frame);
value = PyStackRef_FromPyObjectBorrow(ptr);
_tos_cache0 = value;
_tos_cache1 = PyStackRef_ZERO_BITS;
_tos_cache2 = PyStackRef_ZERO_BITS;
SET_CURRENT_CACHED_VALUES(1);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
break;
}
case _INSERT_2_LOAD_CONST_INLINE_BORROW_r03: {
CHECK_CURRENT_CACHED_VALUES(0);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
_PyStackRef right;
_PyStackRef left;
_PyStackRef res;
_PyStackRef l;
_PyStackRef r;
right = stack_pointer[-1];
left = stack_pointer[-2];
PyObject *ptr = (PyObject *)CURRENT_OPERAND0_64();
res = PyStackRef_FromPyObjectBorrow(ptr);
l = left;
r = right;
_tos_cache2 = r;
_tos_cache1 = l;
_tos_cache0 = res;
SET_CURRENT_CACHED_VALUES(3);
stack_pointer += -2;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
break;
}
case _INSERT_2_LOAD_CONST_INLINE_BORROW_r13: {
CHECK_CURRENT_CACHED_VALUES(1);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
_PyStackRef right;
_PyStackRef left;
_PyStackRef res;
_PyStackRef l;
_PyStackRef r;
_PyStackRef _stack_item_0 = _tos_cache0;
right = _stack_item_0;
left = stack_pointer[-1];
PyObject *ptr = (PyObject *)CURRENT_OPERAND0_64();
res = PyStackRef_FromPyObjectBorrow(ptr);
l = left;
r = right;
_tos_cache2 = r;
_tos_cache1 = l;
_tos_cache0 = res;
SET_CURRENT_CACHED_VALUES(3);
stack_pointer += -1;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
break;
}
case _INSERT_2_LOAD_CONST_INLINE_BORROW_r23: {
CHECK_CURRENT_CACHED_VALUES(2);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
_PyStackRef right;
_PyStackRef left;
_PyStackRef res;
_PyStackRef l;
_PyStackRef r;
_PyStackRef _stack_item_0 = _tos_cache0;
_PyStackRef _stack_item_1 = _tos_cache1;
right = _stack_item_1;
left = _stack_item_0;
PyObject *ptr = (PyObject *)CURRENT_OPERAND0_64();
res = PyStackRef_FromPyObjectBorrow(ptr);
l = left;
r = right;
_tos_cache2 = r;
_tos_cache1 = l;
_tos_cache0 = res;
SET_CURRENT_CACHED_VALUES(3);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
break;
}
case _SHUFFLE_3_LOAD_CONST_INLINE_BORROW_r03: {
CHECK_CURRENT_CACHED_VALUES(0);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());

View file

@ -677,9 +677,6 @@ const uint16_t op_without_push[MAX_UOP_ID + 1] = {
[_LOAD_FAST] = _NOP,
[_LOAD_FAST_BORROW] = _NOP,
[_LOAD_SMALL_INT] = _NOP,
[_POP_TOP_LOAD_CONST_INLINE] = _POP_TOP,
[_POP_TOP_LOAD_CONST_INLINE_BORROW] = _POP_TOP,
[_POP_TWO_LOAD_CONST_INLINE_BORROW] = _POP_TWO,
};
const bool op_skip[MAX_UOP_ID + 1] = {
@ -695,10 +692,7 @@ const uint16_t op_without_pop[MAX_UOP_ID + 1] = {
[_POP_TOP_INT] = _NOP,
[_POP_TOP_FLOAT] = _NOP,
[_POP_TOP_UNICODE] = _NOP,
[_POP_TOP_LOAD_CONST_INLINE] = _LOAD_CONST_INLINE,
[_POP_TOP_LOAD_CONST_INLINE_BORROW] = _LOAD_CONST_INLINE_BORROW,
[_POP_TWO] = _POP_TOP,
[_POP_TWO_LOAD_CONST_INLINE_BORROW] = _POP_TOP_LOAD_CONST_INLINE_BORROW,
[_POP_CALL_TWO] = _POP_CALL_ONE,
[_POP_CALL_ONE] = _POP_CALL,
};
@ -739,9 +733,9 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
default:
{
// Cancel out pushes and pops, repeatedly. So:
// _LOAD_FAST + _POP_TWO_LOAD_CONST_INLINE_BORROW + _POP_TOP
// _LOAD_FAST + _POP_TWO + _LOAD_CONST_INLINE_BORROW + _POP_TOP
// ...becomes:
// _NOP + _POP_TOP + _NOP
// _NOP + _POP_TOP + _NOP + _NOP
while (op_without_pop[opcode] || op_without_pop_null[opcode]) {
_PyUOpInstruction *last = &buffer[pc - 1];
while (op_skip[last->opcode]) {

View file

@ -645,8 +645,10 @@
if (sym_is_const(ctx, res)) {
PyObject *result = sym_get_const(ctx, res);
if (_Py_IsImmortal(result)) {
// Replace with _INSERT_2_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result
ADD_OP(_INSERT_2_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
// Replace with _LOAD_CONST_INLINE_BORROW + _SWAP + _SWAP since we have two inputs and an immortal result
ADD_OP(_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
ADD_OP(_SWAP, 3, 0);
ADD_OP(_SWAP, 2, 0);
}
}
CHECK_STACK_BOUNDS(1);
@ -715,8 +717,10 @@
if (sym_is_const(ctx, res)) {
PyObject *result = sym_get_const(ctx, res);
if (_Py_IsImmortal(result)) {
// Replace with _INSERT_2_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result
ADD_OP(_INSERT_2_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
// Replace with _LOAD_CONST_INLINE_BORROW + _SWAP + _SWAP since we have two inputs and an immortal result
ADD_OP(_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
ADD_OP(_SWAP, 3, 0);
ADD_OP(_SWAP, 2, 0);
}
}
CHECK_STACK_BOUNDS(1);
@ -785,8 +789,10 @@
if (sym_is_const(ctx, res)) {
PyObject *result = sym_get_const(ctx, res);
if (_Py_IsImmortal(result)) {
// Replace with _INSERT_2_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result
ADD_OP(_INSERT_2_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
// Replace with _LOAD_CONST_INLINE_BORROW + _SWAP + _SWAP since we have two inputs and an immortal result
ADD_OP(_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
ADD_OP(_SWAP, 3, 0);
ADD_OP(_SWAP, 2, 0);
}
}
CHECK_STACK_BOUNDS(1);
@ -1510,8 +1516,10 @@
if (sym_is_const(ctx, res)) {
PyObject *result = sym_get_const(ctx, res);
if (_Py_IsImmortal(result)) {
// Replace with _INSERT_2_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result
ADD_OP(_INSERT_2_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
// Replace with _LOAD_CONST_INLINE_BORROW + _SWAP + _SWAP since we have two inputs and an immortal result
ADD_OP(_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
ADD_OP(_SWAP, 3, 0);
ADD_OP(_SWAP, 2, 0);
}
}
CHECK_STACK_BOUNDS(1);
@ -2690,8 +2698,9 @@
if (sym_is_const(ctx, res)) {
PyObject *result = sym_get_const(ctx, res);
if (_Py_IsImmortal(result)) {
// Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result
ADD_OP(_POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
// Replace with _POP_TWO + _LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result
ADD_OP(_POP_TWO, 0, 0);
ADD_OP(_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
}
}
CHECK_STACK_BOUNDS(-1);
@ -2761,8 +2770,10 @@
if (sym_is_const(ctx, res)) {
PyObject *result = sym_get_const(ctx, res);
if (_Py_IsImmortal(result)) {
// Replace with _INSERT_2_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result
ADD_OP(_INSERT_2_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
// Replace with _LOAD_CONST_INLINE_BORROW + _SWAP + _SWAP since we have two inputs and an immortal result
ADD_OP(_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
ADD_OP(_SWAP, 3, 0);
ADD_OP(_SWAP, 2, 0);
}
}
CHECK_STACK_BOUNDS(1);
@ -2834,8 +2845,10 @@
if (sym_is_const(ctx, res)) {
PyObject *result = sym_get_const(ctx, res);
if (_Py_IsImmortal(result)) {
// Replace with _INSERT_2_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result
ADD_OP(_INSERT_2_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
// Replace with _LOAD_CONST_INLINE_BORROW + _SWAP + _SWAP since we have two inputs and an immortal result
ADD_OP(_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
ADD_OP(_SWAP, 3, 0);
ADD_OP(_SWAP, 2, 0);
}
}
CHECK_STACK_BOUNDS(1);
@ -2896,8 +2909,10 @@
if (sym_is_const(ctx, res)) {
PyObject *result = sym_get_const(ctx, res);
if (_Py_IsImmortal(result)) {
// Replace with _INSERT_2_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result
ADD_OP(_INSERT_2_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
// Replace with _LOAD_CONST_INLINE_BORROW + _SWAP + _SWAP since we have two inputs and an immortal result
ADD_OP(_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
ADD_OP(_SWAP, 3, 0);
ADD_OP(_SWAP, 2, 0);
}
}
CHECK_STACK_BOUNDS(1);
@ -2976,8 +2991,10 @@
if (sym_is_const(ctx, b)) {
PyObject *result = sym_get_const(ctx, b);
if (_Py_IsImmortal(result)) {
// Replace with _INSERT_2_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result
ADD_OP(_INSERT_2_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
// Replace with _LOAD_CONST_INLINE_BORROW + _SWAP + _SWAP since we have two inputs and an immortal result
ADD_OP(_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
ADD_OP(_SWAP, 3, 0);
ADD_OP(_SWAP, 2, 0);
}
}
CHECK_STACK_BOUNDS(1);
@ -3083,8 +3100,10 @@
if (sym_is_const(ctx, b)) {
PyObject *result = sym_get_const(ctx, b);
if (_Py_IsImmortal(result)) {
// Replace with _INSERT_2_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result
ADD_OP(_INSERT_2_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
// Replace with _LOAD_CONST_INLINE_BORROW + _SWAP + _SWAP since we have two inputs and an immortal result
ADD_OP(_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
ADD_OP(_SWAP, 3, 0);
ADD_OP(_SWAP, 2, 0);
}
}
CHECK_STACK_BOUNDS(1);
@ -4694,8 +4713,10 @@
if (sym_is_const(ctx, res)) {
PyObject *result = sym_get_const(ctx, res);
if (_Py_IsImmortal(result)) {
// Replace with _INSERT_2_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result
ADD_OP(_INSERT_2_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
// Replace with _LOAD_CONST_INLINE_BORROW + _SWAP + _SWAP since we have two inputs and an immortal result
ADD_OP(_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
ADD_OP(_SWAP, 3, 0);
ADD_OP(_SWAP, 2, 0);
}
}
CHECK_STACK_BOUNDS(1);
@ -4926,13 +4947,6 @@
break;
}
case _POP_TOP_LOAD_CONST_INLINE: {
JitOptRef value;
value = sym_new_not_null(ctx);
stack_pointer[-1] = value;
break;
}
case _LOAD_CONST_INLINE_BORROW: {
JitOptRef value;
PyObject *ptr = (PyObject *)this_instr->operand0;
@ -4965,39 +4979,6 @@
break;
}
case _POP_TOP_LOAD_CONST_INLINE_BORROW: {
JitOptRef value;
value = sym_new_not_null(ctx);
stack_pointer[-1] = value;
break;
}
case _POP_TWO_LOAD_CONST_INLINE_BORROW: {
JitOptRef value;
value = sym_new_not_null(ctx);
CHECK_STACK_BOUNDS(-1);
stack_pointer[-2] = value;
stack_pointer += -1;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
break;
}
case _INSERT_2_LOAD_CONST_INLINE_BORROW: {
JitOptRef res;
JitOptRef l;
JitOptRef r;
res = sym_new_not_null(ctx);
l = sym_new_not_null(ctx);
r = sym_new_not_null(ctx);
CHECK_STACK_BOUNDS(1);
stack_pointer[-2] = res;
stack_pointer[-1] = l;
stack_pointer[0] = r;
stack_pointer += 1;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
break;
}
case _SHUFFLE_3_LOAD_CONST_INLINE_BORROW: {
JitOptRef res;
JitOptRef a;