gh-143092: Make CALL_LIST_APPEND and BINARY_OP_INPLACE_ADD_UNICODE normal instructions (GH-143124)

These super instructions need many special cases in the interpreter, specializer, and JIT. It's best we convert them to normal instructions.
This commit is contained in:
Ken Jin 2025-12-25 06:03:00 +08:00 committed by GitHub
parent 594a4631c3
commit cf6758ff9e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 167 additions and 205 deletions

View file

@ -588,7 +588,7 @@ dummy_func(
BINARY_OP_SUBSCR_STR_INT,
BINARY_OP_SUBSCR_DICT,
BINARY_OP_SUBSCR_GETITEM,
// BINARY_OP_INPLACE_ADD_UNICODE, // See comments at that opcode.
BINARY_OP_INPLACE_ADD_UNICODE,
BINARY_OP_EXTEND,
};
@ -762,13 +762,10 @@ dummy_func(
macro(BINARY_OP_ADD_UNICODE) =
_GUARD_TOS_UNICODE + _GUARD_NOS_UNICODE + unused/5 + _BINARY_OP_ADD_UNICODE + _POP_TOP_UNICODE + _POP_TOP_UNICODE;
// This is a subtle one. It's a super-instruction for
// BINARY_OP_ADD_UNICODE followed by STORE_FAST
// where the store goes into the left argument.
// So the inputs are the same as for all BINARY_OP
// specializations, but there is no output.
// At the end we just skip over the STORE_FAST.
op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right --)) {
// This is a subtle one. We write NULL to the local
// of the following STORE_FAST and leave the result for STORE_FAST
// later to store.
op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right -- res)) {
PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
assert(PyUnicode_CheckExact(left_o));
assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(right)));
@ -796,20 +793,16 @@ dummy_func(
* that the string is safe to mutate.
*/
assert(Py_REFCNT(left_o) >= 2 || !PyStackRef_IsHeapSafe(left));
PyObject *temp = PyStackRef_AsPyObjectSteal(*target_local);
PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
PyUnicode_Append(&temp, right_o);
PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc);
DEAD(right);
PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc);
DEAD(left);
PyObject *temp = PyStackRef_AsPyObjectSteal(*target_local);
PyObject *right_o = PyStackRef_AsPyObjectSteal(right);
PyUnicode_Append(&temp, right_o);
*target_local = PyStackRef_FromPyObjectSteal(temp);
Py_DECREF(right_o);
ERROR_IF(PyStackRef_IsNull(*target_local));
#if TIER_ONE
// The STORE_FAST is already done. This is done here in tier one,
// and during trace projection in tier two:
assert(next_instr->op.code == STORE_FAST);
SKIP_OVER(1);
#endif
ERROR_IF(temp == NULL);
res = PyStackRef_FromPyObjectSteal(temp);
*target_local = PyStackRef_NULL;
}
op(_GUARD_BINARY_OP_EXTEND, (descr/4, left, right -- left, right)) {
@ -4330,8 +4323,7 @@ dummy_func(
DEOPT_IF(callable_o != interp->callable_cache.list_append);
}
// This is secretly a super-instruction
op(_CALL_LIST_APPEND, (callable, self, arg -- c, s)) {
op(_CALL_LIST_APPEND, (callable, self, arg -- none, c, s)) {
assert(oparg == 1);
PyObject *self_o = PyStackRef_AsPyObjectBorrow(self);
@ -4344,13 +4336,9 @@ dummy_func(
}
c = callable;
s = self;
INPUTS_DEAD();
#if TIER_ONE
// Skip the following POP_TOP. This is done here in tier one, and
// during trace projection in tier two:
assert(next_instr->op.code == POP_TOP);
SKIP_OVER(1);
#endif
DEAD(callable);
DEAD(self);
none = PyStackRef_None;
}
op(_CALL_METHOD_DESCRIPTOR_O, (callable, self_or_null, args[oparg] -- res)) {
@ -5598,15 +5586,9 @@ dummy_func(
// Super instructions. Instruction deopted. There's a mismatch in what the stack expects
// in the optimizer. So we have to reflect in the trace correctly.
_PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
if ((_tstate->jit_tracer_state.prev_state.instr->op.code == CALL_LIST_APPEND &&
opcode == POP_TOP) ||
(_tstate->jit_tracer_state.prev_state.instr->op.code == BINARY_OP_INPLACE_ADD_UNICODE &&
opcode == STORE_FAST)) {
_tstate->jit_tracer_state.prev_state.instr_is_super = true;
}
else {
_tstate->jit_tracer_state.prev_state.instr = next_instr;
}
// JIT should have disabled super instructions, as we can
// do these optimizations ourselves in the JIT.
_tstate->jit_tracer_state.prev_state.instr = next_instr;
PyObject *prev_code = PyStackRef_AsPyObjectBorrow(frame->f_executable);
if (_tstate->jit_tracer_state.prev_state.instr_code != (PyCodeObject *)prev_code) {
Py_SETREF(_tstate->jit_tracer_state.prev_state.instr_code, (PyCodeObject*)Py_NewRef((prev_code)));

View file

@ -4291,11 +4291,12 @@
break;
}
case _BINARY_OP_INPLACE_ADD_UNICODE_r20: {
case _BINARY_OP_INPLACE_ADD_UNICODE_r21: {
CHECK_CURRENT_CACHED_VALUES(2);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
_PyStackRef right;
_PyStackRef left;
_PyStackRef res;
_PyStackRef _stack_item_0 = _tos_cache0;
_PyStackRef _stack_item_1 = _tos_cache1;
right = _stack_item_1;
@ -4321,29 +4322,31 @@
}
STAT_INC(BINARY_OP, hit);
assert(Py_REFCNT(left_o) >= 2 || !PyStackRef_IsHeapSafe(left));
PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc);
PyObject *temp = PyStackRef_AsPyObjectSteal(*target_local);
PyObject *right_o = PyStackRef_AsPyObjectSteal(right);
PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
stack_pointer[0] = left;
stack_pointer[1] = right;
stack_pointer += 2;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
_PyFrame_SetStackPointer(frame, stack_pointer);
PyUnicode_Append(&temp, right_o);
stack_pointer = _PyFrame_GetStackPointer(frame);
*target_local = PyStackRef_FromPyObjectSteal(temp);
_PyFrame_SetStackPointer(frame, stack_pointer);
Py_DECREF(right_o);
stack_pointer = _PyFrame_GetStackPointer(frame);
if (PyStackRef_IsNull(*target_local)) {
PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc);
PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc);
if (temp == NULL) {
stack_pointer += -2;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
SET_CURRENT_CACHED_VALUES(0);
JUMP_TO_ERROR();
}
#if TIER_ONE
assert(next_instr->op.code == STORE_FAST);
SKIP_OVER(1);
#endif
_tos_cache0 = PyStackRef_ZERO_BITS;
res = PyStackRef_FromPyObjectSteal(temp);
*target_local = PyStackRef_NULL;
_tos_cache0 = res;
_tos_cache1 = PyStackRef_ZERO_BITS;
_tos_cache2 = PyStackRef_ZERO_BITS;
SET_CURRENT_CACHED_VALUES(0);
SET_CURRENT_CACHED_VALUES(1);
stack_pointer += -2;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
break;
}
@ -13902,12 +13905,13 @@
break;
}
case _CALL_LIST_APPEND_r02: {
case _CALL_LIST_APPEND_r03: {
CHECK_CURRENT_CACHED_VALUES(0);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
_PyStackRef arg;
_PyStackRef self;
_PyStackRef callable;
_PyStackRef none;
_PyStackRef c;
_PyStackRef s;
oparg = CURRENT_OPARG();
@ -13930,26 +13934,24 @@
}
c = callable;
s = self;
#if TIER_ONE
assert(next_instr->op.code == POP_TOP);
SKIP_OVER(1);
#endif
_tos_cache1 = s;
_tos_cache0 = c;
SET_CURRENT_CACHED_VALUES(2);
none = PyStackRef_None;
_tos_cache2 = s;
_tos_cache1 = c;
_tos_cache0 = none;
SET_CURRENT_CACHED_VALUES(3);
stack_pointer += -3;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
break;
}
case _CALL_LIST_APPEND_r12: {
case _CALL_LIST_APPEND_r13: {
CHECK_CURRENT_CACHED_VALUES(1);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
_PyStackRef arg;
_PyStackRef self;
_PyStackRef callable;
_PyStackRef none;
_PyStackRef c;
_PyStackRef s;
_PyStackRef _stack_item_0 = _tos_cache0;
@ -13976,26 +13978,24 @@
}
c = callable;
s = self;
#if TIER_ONE
assert(next_instr->op.code == POP_TOP);
SKIP_OVER(1);
#endif
_tos_cache1 = s;
_tos_cache0 = c;
SET_CURRENT_CACHED_VALUES(2);
none = PyStackRef_None;
_tos_cache2 = s;
_tos_cache1 = c;
_tos_cache0 = none;
SET_CURRENT_CACHED_VALUES(3);
stack_pointer += -2;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
break;
}
case _CALL_LIST_APPEND_r22: {
case _CALL_LIST_APPEND_r23: {
CHECK_CURRENT_CACHED_VALUES(2);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
_PyStackRef arg;
_PyStackRef self;
_PyStackRef callable;
_PyStackRef none;
_PyStackRef c;
_PyStackRef s;
_PyStackRef _stack_item_0 = _tos_cache0;
@ -14025,26 +14025,24 @@
}
c = callable;
s = self;
#if TIER_ONE
assert(next_instr->op.code == POP_TOP);
SKIP_OVER(1);
#endif
_tos_cache1 = s;
_tos_cache0 = c;
SET_CURRENT_CACHED_VALUES(2);
none = PyStackRef_None;
_tos_cache2 = s;
_tos_cache1 = c;
_tos_cache0 = none;
SET_CURRENT_CACHED_VALUES(3);
stack_pointer += -1;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
break;
}
case _CALL_LIST_APPEND_r32: {
case _CALL_LIST_APPEND_r33: {
CHECK_CURRENT_CACHED_VALUES(3);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
_PyStackRef arg;
_PyStackRef self;
_PyStackRef callable;
_PyStackRef none;
_PyStackRef c;
_PyStackRef s;
_PyStackRef _stack_item_0 = _tos_cache0;
@ -14077,14 +14075,11 @@
}
c = callable;
s = self;
#if TIER_ONE
assert(next_instr->op.code == POP_TOP);
SKIP_OVER(1);
#endif
_tos_cache1 = s;
_tos_cache0 = c;
SET_CURRENT_CACHED_VALUES(2);
none = PyStackRef_None;
_tos_cache2 = s;
_tos_cache1 = c;
_tos_cache0 = none;
SET_CURRENT_CACHED_VALUES(3);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
break;
}

View file

@ -382,6 +382,7 @@
_PyStackRef nos;
_PyStackRef left;
_PyStackRef right;
_PyStackRef res;
// _GUARD_TOS_UNICODE
{
value = stack_pointer[-1];
@ -426,27 +427,22 @@
}
STAT_INC(BINARY_OP, hit);
assert(Py_REFCNT(left_o) >= 2 || !PyStackRef_IsHeapSafe(left));
PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc);
PyObject *temp = PyStackRef_AsPyObjectSteal(*target_local);
PyObject *right_o = PyStackRef_AsPyObjectSteal(right);
stack_pointer += -2;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
_PyFrame_SetStackPointer(frame, stack_pointer);
PyUnicode_Append(&temp, right_o);
stack_pointer = _PyFrame_GetStackPointer(frame);
*target_local = PyStackRef_FromPyObjectSteal(temp);
_PyFrame_SetStackPointer(frame, stack_pointer);
Py_DECREF(right_o);
stack_pointer = _PyFrame_GetStackPointer(frame);
if (PyStackRef_IsNull(*target_local)) {
JUMP_TO_LABEL(error);
PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc);
PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc);
if (temp == NULL) {
JUMP_TO_LABEL(pop_2_error);
}
#if TIER_ONE
assert(next_instr->op.code == STORE_FAST);
SKIP_OVER(1);
#endif
res = PyStackRef_FromPyObjectSteal(temp);
*target_local = PyStackRef_NULL;
}
stack_pointer[-2] = res;
stack_pointer += -1;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
DISPATCH();
}
@ -3250,6 +3246,7 @@
_PyStackRef nos;
_PyStackRef self;
_PyStackRef arg;
_PyStackRef none;
_PyStackRef c;
_PyStackRef s;
_PyStackRef value;
@ -3304,17 +3301,14 @@
}
c = callable;
s = self;
#if TIER_ONE
assert(next_instr->op.code == POP_TOP);
SKIP_OVER(1);
#endif
none = PyStackRef_None;
}
// _POP_TOP
{
value = s;
stack_pointer[-3] = c;
stack_pointer += -2;
stack_pointer[-3] = none;
stack_pointer[-2] = c;
stack_pointer += -1;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
_PyFrame_SetStackPointer(frame, stack_pointer);
PyStackRef_XCLOSE(value);
@ -11425,15 +11419,7 @@
DISPATCH();
}
_PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
if ((_tstate->jit_tracer_state.prev_state.instr->op.code == CALL_LIST_APPEND &&
opcode == POP_TOP) ||
(_tstate->jit_tracer_state.prev_state.instr->op.code == BINARY_OP_INPLACE_ADD_UNICODE &&
opcode == STORE_FAST)) {
_tstate->jit_tracer_state.prev_state.instr_is_super = true;
}
else {
_tstate->jit_tracer_state.prev_state.instr = next_instr;
}
_tstate->jit_tracer_state.prev_state.instr = next_instr;
PyObject *prev_code = PyStackRef_AsPyObjectBorrow(frame->f_executable);
if (_tstate->jit_tracer_state.prev_state.instr_code != (PyCodeObject *)prev_code) {
_PyFrame_SetStackPointer(frame, stack_pointer);

View file

@ -703,12 +703,6 @@ _PyJit_translate_single_bytecode_to_trace(
}
#endif
// Skip over super instructions.
if (_tstate->jit_tracer_state.prev_state.instr_is_super) {
_tstate->jit_tracer_state.prev_state.instr_is_super = false;
return 1;
}
if (opcode == ENTER_EXECUTOR) {
goto full;
}
@ -1077,7 +1071,6 @@ _PyJit_TryInitializeTracing(
_tstate->jit_tracer_state.prev_state.instr_frame = frame;
_tstate->jit_tracer_state.prev_state.instr_oparg = oparg;
_tstate->jit_tracer_state.prev_state.instr_stacklevel = curr_stackdepth;
_tstate->jit_tracer_state.prev_state.instr_is_super = false;
assert(curr_instr->op.code == JUMP_BACKWARD_JIT || (exit != NULL));
_tstate->jit_tracer_state.initial_state.jump_backward_instr = curr_instr;

View file

@ -299,14 +299,12 @@ dummy_func(void) {
}
op(_BINARY_OP_ADD_UNICODE, (left, right -- res, l, r)) {
REPLACE_OPCODE_IF_EVALUATES_PURE(left, right);
res = sym_new_type(ctx, &PyUnicode_Type);
l = left;
r = right;
}
op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right --)) {
JitOptRef res;
op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right -- res)) {
if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) {
assert(PyUnicode_CheckExact(sym_get_const(ctx, left)));
assert(PyUnicode_CheckExact(sym_get_const(ctx, right)));
@ -320,8 +318,7 @@ dummy_func(void) {
else {
res = sym_new_type(ctx, &PyUnicode_Type);
}
// _STORE_FAST:
GETLOCAL(this_instr->operand0) = res;
GETLOCAL(this_instr->operand0) = sym_new_null(ctx);
}
op(_BINARY_OP_SUBSCR_INIT_CALL, (container, sub, getitem -- new_frame)) {
@ -1043,10 +1040,11 @@ dummy_func(void) {
sym_set_const(flag, Py_True);
}
op(_CALL_LIST_APPEND, (callable, self, arg -- c, s)) {
op(_CALL_LIST_APPEND, (callable, self, arg -- none, c, s)) {
(void)(arg);
c = callable;
s = self;
none = sym_new_const(ctx, Py_None);
}
op(_GUARD_IS_FALSE_POP, (flag -- )) {

View file

@ -646,42 +646,6 @@
JitOptRef r;
right = stack_pointer[-1];
left = stack_pointer[-2];
if (
sym_is_safe_const(ctx, left) &&
sym_is_safe_const(ctx, right)
) {
JitOptRef left_sym = left;
JitOptRef right_sym = right;
_PyStackRef left = sym_get_const_as_stackref(ctx, left_sym);
_PyStackRef right = sym_get_const_as_stackref(ctx, right_sym);
_PyStackRef res_stackref;
_PyStackRef l_stackref;
_PyStackRef r_stackref;
/* Start of uop copied from bytecodes for constant evaluation */
PyObject *left_o = PyStackRef_AsPyObjectBorrow(left);
PyObject *right_o = PyStackRef_AsPyObjectBorrow(right);
assert(PyUnicode_CheckExact(left_o));
assert(PyUnicode_CheckExact(right_o));
STAT_INC(BINARY_OP, hit);
PyObject *res_o = PyUnicode_Concat(left_o, right_o);
res_stackref = PyStackRef_FromPyObjectSteal(res_o);
if (PyStackRef_IsNull(res)) {
JUMP_TO_LABEL(error);
}
l_stackref = left;
r_stackref = right;
/* End of uop copied from bytecodes for constant evaluation */
res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref));
l = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(l_stackref));
r = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(r_stackref));
CHECK_STACK_BOUNDS(1);
stack_pointer[-2] = res;
stack_pointer[-1] = l;
stack_pointer[0] = r;
stack_pointer += 1;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
break;
}
res = sym_new_type(ctx, &PyUnicode_Type);
l = left;
r = right;
@ -697,9 +661,9 @@
case _BINARY_OP_INPLACE_ADD_UNICODE: {
JitOptRef right;
JitOptRef left;
JitOptRef res;
right = stack_pointer[-1];
left = stack_pointer[-2];
JitOptRef res;
if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) {
assert(PyUnicode_CheckExact(sym_get_const(ctx, left)));
assert(PyUnicode_CheckExact(sym_get_const(ctx, right)));
@ -708,15 +672,18 @@
goto error;
}
res = sym_new_const(ctx, temp);
CHECK_STACK_BOUNDS(-1);
stack_pointer[-2] = res;
stack_pointer += -1;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
Py_DECREF(temp);
}
else {
res = sym_new_type(ctx, &PyUnicode_Type);
stack_pointer += -1;
}
GETLOCAL(this_instr->operand0) = res;
CHECK_STACK_BOUNDS(-2);
stack_pointer += -2;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
GETLOCAL(this_instr->operand0) = sym_new_null(ctx);
stack_pointer[-1] = res;
break;
}
@ -2958,6 +2925,7 @@
JitOptRef arg;
JitOptRef self;
JitOptRef callable;
JitOptRef none;
JitOptRef c;
JitOptRef s;
arg = stack_pointer[-1];
@ -2966,11 +2934,10 @@
(void)(arg);
c = callable;
s = self;
CHECK_STACK_BOUNDS(-1);
stack_pointer[-3] = c;
stack_pointer[-2] = s;
stack_pointer += -1;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
none = sym_new_const(ctx, Py_None);
stack_pointer[-3] = none;
stack_pointer[-2] = c;
stack_pointer[-1] = s;
break;
}

View file

@ -1623,10 +1623,8 @@ specialize_method_descriptor(PyMethodDescrObject *descr, PyObject *self_or_null,
}
PyInterpreterState *interp = _PyInterpreterState_GET();
PyObject *list_append = interp->callable_cache.list_append;
_Py_CODEUNIT next = instr[INLINE_CACHE_ENTRIES_CALL + 1];
bool pop = (next.op.code == POP_TOP);
int oparg = instr->op.arg;
if ((PyObject *)descr == list_append && oparg == 1 && pop) {
if ((PyObject *)descr == list_append && oparg == 1) {
assert(self_or_null != NULL);
if (PyList_CheckExact(self_or_null)) {
specialize(instr, CALL_LIST_APPEND);