gh-143732: add specialization for FOR_ITER (GH-148745)

This commit is contained in:
Neko Asakura 2026-05-05 00:29:10 +08:00 committed by GitHub
parent 952784af47
commit 9846407eaf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 1569 additions and 1176 deletions

View file

@ -3756,7 +3756,7 @@ dummy_func(
next = item;
}
macro(FOR_ITER) = _SPECIALIZE_FOR_ITER + _FOR_ITER;
macro(FOR_ITER) = _SPECIALIZE_FOR_ITER + _RECORD_NOS_TYPE + _FOR_ITER;
op(_FOR_ITER_TIER_TWO, (iter, null_or_index -- iter, null_or_index, next)) {
_PyStackRef item = _PyForIter_VirtualIteratorNext(tstate, frame, iter, &null_or_index);
@ -3771,6 +3771,31 @@ dummy_func(
next = item;
}
tier2 op(_GUARD_TYPE_ITER, (expected_type/4, iter, null_or_index -- iter, null_or_index)) {
PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
EXIT_IF(Py_TYPE(iter_o) != (PyTypeObject *)expected_type);
}
tier2 op(_ITER_NEXT_INLINE, (iternext_fn/4, iter, null_or_index -- iter, null_or_index, next)) {
assert(sizeof(iternextfunc) == sizeof(uintptr_t));
volatile iternextfunc iternext_v = (iternextfunc)iternext_fn;
PyObject *item = iternext_v(PyStackRef_AsPyObjectBorrow(iter));
if (item == NULL) {
if (_PyErr_Occurred(tstate)) {
if (_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) {
_PyEval_MonitorRaise(tstate, frame, frame->instr_ptr);
_PyErr_Clear(tstate);
}
else {
ERROR_NO_POP();
}
}
EXIT_IF(true);
}
STAT_INC(FOR_ITER, hit);
next = PyStackRef_FromPyObjectSteal(item);
}
op(_GUARD_NOS_ITER_VIRTUAL, (iter, null_or_index -- iter, null_or_index)) {
PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
EXIT_IF(Py_TYPE(iter_o)->_tp_iteritem == NULL);

View file

@ -14072,6 +14072,152 @@
break;
}
case _GUARD_TYPE_ITER_r02: {
CHECK_CURRENT_CACHED_VALUES(0);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
_PyStackRef iter;
iter = stack_pointer[-2];
PyObject *expected_type = (PyObject *)CURRENT_OPERAND0_64();
PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
if (Py_TYPE(iter_o) != (PyTypeObject *)expected_type) {
UOP_STAT_INC(uopcode, miss);
SET_CURRENT_CACHED_VALUES(0);
JUMP_TO_JUMP_TARGET();
}
_tos_cache1 = stack_pointer[-1];
_tos_cache0 = iter;
SET_CURRENT_CACHED_VALUES(2);
stack_pointer += -2;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
break;
}
case _GUARD_TYPE_ITER_r12: {
CHECK_CURRENT_CACHED_VALUES(1);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
_PyStackRef iter;
_PyStackRef _stack_item_0 = _tos_cache0;
iter = stack_pointer[-1];
PyObject *expected_type = (PyObject *)CURRENT_OPERAND0_64();
PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
if (Py_TYPE(iter_o) != (PyTypeObject *)expected_type) {
UOP_STAT_INC(uopcode, miss);
_tos_cache0 = _stack_item_0;
SET_CURRENT_CACHED_VALUES(1);
JUMP_TO_JUMP_TARGET();
}
_tos_cache1 = _stack_item_0;
_tos_cache0 = iter;
SET_CURRENT_CACHED_VALUES(2);
stack_pointer += -1;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
break;
}
case _GUARD_TYPE_ITER_r22: {
CHECK_CURRENT_CACHED_VALUES(2);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
_PyStackRef iter;
_PyStackRef _stack_item_0 = _tos_cache0;
_PyStackRef _stack_item_1 = _tos_cache1;
iter = _stack_item_0;
PyObject *expected_type = (PyObject *)CURRENT_OPERAND0_64();
PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
if (Py_TYPE(iter_o) != (PyTypeObject *)expected_type) {
UOP_STAT_INC(uopcode, miss);
_tos_cache1 = _stack_item_1;
_tos_cache0 = iter;
SET_CURRENT_CACHED_VALUES(2);
JUMP_TO_JUMP_TARGET();
}
_tos_cache1 = _stack_item_1;
_tos_cache0 = iter;
SET_CURRENT_CACHED_VALUES(2);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
break;
}
case _GUARD_TYPE_ITER_r33: {
CHECK_CURRENT_CACHED_VALUES(3);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
_PyStackRef iter;
_PyStackRef _stack_item_0 = _tos_cache0;
_PyStackRef _stack_item_1 = _tos_cache1;
_PyStackRef _stack_item_2 = _tos_cache2;
iter = _stack_item_1;
PyObject *expected_type = (PyObject *)CURRENT_OPERAND0_64();
PyObject *iter_o = PyStackRef_AsPyObjectBorrow(iter);
if (Py_TYPE(iter_o) != (PyTypeObject *)expected_type) {
UOP_STAT_INC(uopcode, miss);
_tos_cache2 = _stack_item_2;
_tos_cache1 = iter;
_tos_cache0 = _stack_item_0;
SET_CURRENT_CACHED_VALUES(3);
JUMP_TO_JUMP_TARGET();
}
_tos_cache2 = _stack_item_2;
_tos_cache1 = iter;
_tos_cache0 = _stack_item_0;
SET_CURRENT_CACHED_VALUES(3);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
break;
}
case _ITER_NEXT_INLINE_r23: {
CHECK_CURRENT_CACHED_VALUES(2);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
_PyStackRef iter;
_PyStackRef next;
_PyStackRef _stack_item_0 = _tos_cache0;
_PyStackRef _stack_item_1 = _tos_cache1;
iter = _stack_item_0;
PyObject *iternext_fn = (PyObject *)CURRENT_OPERAND0_64();
assert(sizeof(iternextfunc) == sizeof(uintptr_t));
volatile iternextfunc iternext_v = (iternextfunc)iternext_fn;
stack_pointer[0] = iter;
stack_pointer[1] = _stack_item_1;
stack_pointer += 2;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
_PyFrame_SetStackPointer(frame, stack_pointer);
PyObject *item = iternext_v(PyStackRef_AsPyObjectBorrow(iter));
stack_pointer = _PyFrame_GetStackPointer(frame);
if (item == NULL) {
if (_PyErr_Occurred(tstate)) {
if (_PyErr_ExceptionMatches(tstate, PyExc_StopIteration)) {
_PyFrame_SetStackPointer(frame, stack_pointer);
_PyEval_MonitorRaise(tstate, frame, frame->instr_ptr);
_PyErr_Clear(tstate);
stack_pointer = _PyFrame_GetStackPointer(frame);
}
else {
SET_CURRENT_CACHED_VALUES(0);
JUMP_TO_ERROR();
}
}
if (true) {
UOP_STAT_INC(uopcode, miss);
_tos_cache1 = _stack_item_1;
_tos_cache0 = iter;
SET_CURRENT_CACHED_VALUES(2);
stack_pointer += -2;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
JUMP_TO_JUMP_TARGET();
}
}
STAT_INC(FOR_ITER, hit);
next = PyStackRef_FromPyObjectSteal(item);
_tos_cache2 = next;
_tos_cache1 = _stack_item_1;
_tos_cache0 = iter;
SET_CURRENT_CACHED_VALUES(3);
stack_pointer += -2;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
break;
}
case _GUARD_NOS_ITER_VIRTUAL_r02: {
CHECK_CURRENT_CACHED_VALUES(0);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());

View file

@ -508,6 +508,7 @@ is_for_iter_test[MAX_UOP_ID + 1] = {
[_GUARD_NOT_EXHAUSTED_LIST] = 1,
[_GUARD_NOT_EXHAUSTED_TUPLE] = 1,
[_FOR_ITER_TIER_TWO] = 1,
[_ITER_NEXT_INLINE] = 1,
};
static const uint16_t

View file

@ -1452,6 +1452,28 @@ dummy_func(void) {
}
}
op(_FOR_ITER_TIER_TWO, (iter, null_or_index -- iter, null_or_index, next)) {
bool definite = true;
PyTypeObject *type = sym_get_type(iter);
if (type == NULL) {
type = sym_get_probable_type(iter);
definite = false;
}
if (type != NULL && type != &PyGen_Type && type->tp_iternext != NULL) {
PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type);
_Py_BloomFilter_Add(dependencies, type);
if (!definite) {
sym_set_type(iter, type);
assert((this_instr - 1)->opcode == _RECORD_NOS_TYPE);
int32_t orig_target = (this_instr - 1)->target;
ADD_OP(_GUARD_TYPE_ITER, 0, (uintptr_t)type);
uop_buffer_last(&ctx->out_buffer)->target = orig_target;
}
ADD_OP(_ITER_NEXT_INLINE, 0, (uintptr_t)type->tp_iternext);
}
next = sym_new_not_null(ctx);
}
op(_GUARD_ITERATOR, (iterable -- iterable)) {
bool definite = true;
PyTypeObject *tp = sym_get_type(iterable);

View file

@ -3640,6 +3640,40 @@
/* _FOR_ITER is not a viable micro-op for tier 2 */
case _FOR_ITER_TIER_TWO: {
JitOptRef iter;
JitOptRef next;
iter = stack_pointer[-2];
bool definite = true;
PyTypeObject *type = sym_get_type(iter);
if (type == NULL) {
type = sym_get_probable_type(iter);
definite = false;
}
if (type != NULL && type != &PyGen_Type && type->tp_iternext != NULL) {
PyType_Watch(TYPE_WATCHER_ID, (PyObject *)type);
_Py_BloomFilter_Add(dependencies, type);
if (!definite) {
sym_set_type(iter, type);
assert((this_instr - 1)->opcode == _RECORD_NOS_TYPE);
int32_t orig_target = (this_instr - 1)->target;
ADD_OP(_GUARD_TYPE_ITER, 0, (uintptr_t)type);
uop_buffer_last(&ctx->out_buffer)->target = orig_target;
}
ADD_OP(_ITER_NEXT_INLINE, 0, (uintptr_t)type->tp_iternext);
}
next = sym_new_not_null(ctx);
CHECK_STACK_BOUNDS(1);
stack_pointer[0] = next;
stack_pointer += 1;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
break;
}
case _GUARD_TYPE_ITER: {
break;
}
case _ITER_NEXT_INLINE: {
JitOptRef next;
next = sym_new_not_null(ctx);
CHECK_STACK_BOUNDS(1);

View file

@ -103,10 +103,9 @@ void _PyOpcode_RecordFunction_CODE(_PyInterpreterFrame *frame, _PyStackRef *stac
#define _RECORD_NOS_TYPE_INDEX 3
#define _RECORD_3OS_GEN_FUNC_INDEX 4
#define _RECORD_TOS_INDEX 5
#define _RECORD_NOS_GEN_FUNC_INDEX 6
#define _RECORD_CALLABLE_INDEX 7
#define _RECORD_CALLABLE_KW_INDEX 8
#define _RECORD_4OS_INDEX 9
#define _RECORD_CALLABLE_INDEX 6
#define _RECORD_CALLABLE_KW_INDEX 7
#define _RECORD_4OS_INDEX 8
const _PyOpcodeRecordEntry _PyOpcode_RecordEntries[256] = {
[TO_BOOL_BOOL] = {1, {_RECORD_TOS_TYPE_INDEX}},
@ -156,12 +155,12 @@ const _PyOpcodeRecordEntry _PyOpcode_RecordEntries[256] = {
[GET_ITER] = {1, {_RECORD_TOS_TYPE_INDEX}},
[GET_ITER_SELF] = {1, {_RECORD_TOS_TYPE_INDEX}},
[GET_ITER_VIRTUAL] = {1, {_RECORD_TOS_TYPE_INDEX}},
[FOR_ITER] = {1, {_RECORD_NOS_GEN_FUNC_INDEX}},
[FOR_ITER_VIRTUAL] = {1, {_RECORD_NOS_GEN_FUNC_INDEX}},
[FOR_ITER_LIST] = {1, {_RECORD_NOS_GEN_FUNC_INDEX}},
[FOR_ITER_TUPLE] = {1, {_RECORD_NOS_GEN_FUNC_INDEX}},
[FOR_ITER_RANGE] = {1, {_RECORD_NOS_GEN_FUNC_INDEX}},
[FOR_ITER_GEN] = {1, {_RECORD_NOS_GEN_FUNC_INDEX}},
[FOR_ITER] = {1, {_RECORD_NOS_INDEX}},
[FOR_ITER_VIRTUAL] = {1, {_RECORD_NOS_INDEX}},
[FOR_ITER_LIST] = {1, {_RECORD_NOS_INDEX}},
[FOR_ITER_TUPLE] = {1, {_RECORD_NOS_INDEX}},
[FOR_ITER_RANGE] = {1, {_RECORD_NOS_INDEX}},
[FOR_ITER_GEN] = {1, {_RECORD_NOS_INDEX}},
[LOAD_SPECIAL] = {1, {_RECORD_TOS_TYPE_INDEX}},
[LOAD_ATTR_METHOD_WITH_VALUES] = {1, {_RECORD_TOS_INDEX}},
[LOAD_ATTR_METHOD_NO_DICT] = {1, {_RECORD_TOS_INDEX}},
@ -219,7 +218,8 @@ const _PyOpcodeRecordSlotMap _PyOpcode_RecordSlotMaps[256] = {
[GET_ITER] = {1, 0, {0}},
[GET_ITER_SELF] = {1, 0, {0}},
[GET_ITER_VIRTUAL] = {1, 0, {0}},
[FOR_ITER_GEN] = {1, 0, {0}},
[FOR_ITER] = {1, 1, {0}},
[FOR_ITER_GEN] = {1, 1, {0}},
[LOAD_SPECIAL] = {1, 0, {0}},
[LOAD_ATTR_METHOD_WITH_VALUES] = {1, 1, {0}},
[LOAD_ATTR_METHOD_NO_DICT] = {1, 1, {0}},
@ -245,14 +245,13 @@ const _PyOpcodeRecordSlotMap _PyOpcode_RecordSlotMaps[256] = {
[BINARY_OP] = {2, 2, {1, 0}},
};
const _Py_RecordFuncPtr _PyOpcode_RecordFunctions[10] = {
const _Py_RecordFuncPtr _PyOpcode_RecordFunctions[9] = {
[0] = NULL,
[_RECORD_TOS_TYPE_INDEX] = _PyOpcode_RecordFunction_TOS_TYPE,
[_RECORD_NOS_INDEX] = _PyOpcode_RecordFunction_NOS,
[_RECORD_NOS_TYPE_INDEX] = _PyOpcode_RecordFunction_NOS_TYPE,
[_RECORD_3OS_GEN_FUNC_INDEX] = _PyOpcode_RecordFunction_3OS_GEN_FUNC,
[_RECORD_TOS_INDEX] = _PyOpcode_RecordFunction_TOS,
[_RECORD_NOS_GEN_FUNC_INDEX] = _PyOpcode_RecordFunction_NOS_GEN_FUNC,
[_RECORD_CALLABLE_INDEX] = _PyOpcode_RecordFunction_CALLABLE,
[_RECORD_CALLABLE_KW_INDEX] = _PyOpcode_RecordFunction_CALLABLE_KW,
[_RECORD_4OS_INDEX] = _PyOpcode_RecordFunction_4OS,