gh-gh-131798: optimize LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN in the JIT (#148555)

This commit is contained in:
Kumar Aditya 2026-04-14 21:00:32 +05:30 committed by GitHub
parent c88c27b0c1
commit 1aa7e7ee6d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 1353 additions and 1168 deletions

View file

@ -2942,35 +2942,34 @@ dummy_func(
_SAVE_RETURN_OFFSET +
_PUSH_FRAME;
inst(LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN, (unused/1, type_version/2, func_version/2, getattribute/4, owner -- unused)) {
PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner);
op(_LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN_FRAME, (func_version/2, getattribute/4, owner -- new_frame)) {
assert((oparg & 1) == 0);
DEOPT_IF(IS_PEP523_HOOKED(tstate));
PyTypeObject *cls = Py_TYPE(owner_o);
assert(type_version != 0);
DEOPT_IF(FT_ATOMIC_LOAD_UINT_RELAXED(cls->tp_version_tag) != type_version);
assert(Py_IS_TYPE(getattribute, &PyFunction_Type));
PyFunctionObject *f = (PyFunctionObject *)getattribute;
assert(func_version != 0);
DEOPT_IF(f->func_version != func_version);
EXIT_IF(f->func_version != func_version);
PyCodeObject *code = (PyCodeObject *)f->func_code;
assert(code->co_argcount == 2);
DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize));
EXIT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize));
STAT_INC(LOAD_ATTR, hit);
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg >> 1);
_PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(
_PyInterpreterFrame *pushed_frame = _PyFrame_PushUnchecked(
tstate, PyStackRef_FromPyObjectNew(f), 2, frame);
new_frame->localsplus[0] = owner;
pushed_frame->localsplus[0] = owner;
DEAD(owner);
// Manipulate stack directly because we exit with DISPATCH_INLINED().
SYNC_SP();
new_frame->localsplus[1] = PyStackRef_FromPyObjectNew(name);
frame->return_offset = INSTRUCTION_SIZE;
DISPATCH_INLINED(new_frame);
pushed_frame->localsplus[1] = PyStackRef_FromPyObjectNew(name);
new_frame = PyStackRef_Wrap(pushed_frame);
}
macro(LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN) =
unused/1 +
_RECORD_TOS_TYPE +
_GUARD_TYPE_VERSION +
_CHECK_PEP_523 +
_LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN_FRAME +
_SAVE_RETURN_OFFSET +
_PUSH_FRAME;
op(_GUARD_DORV_NO_DICT, (owner -- owner)) {
PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner);

View file

@ -11605,7 +11605,48 @@
break;
}
/* _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN is not a viable micro-op for tier 2 because it has too many cache entries */
case _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN_FRAME_r11: {
CHECK_CURRENT_CACHED_VALUES(1);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
_PyStackRef owner;
_PyStackRef new_frame;
_PyStackRef _stack_item_0 = _tos_cache0;
oparg = CURRENT_OPARG();
owner = _stack_item_0;
uint32_t func_version = (uint32_t)CURRENT_OPERAND0_32();
PyObject *getattribute = (PyObject *)CURRENT_OPERAND1_64();
assert((oparg & 1) == 0);
assert(Py_IS_TYPE(getattribute, &PyFunction_Type));
PyFunctionObject *f = (PyFunctionObject *)getattribute;
assert(func_version != 0);
if (f->func_version != func_version) {
UOP_STAT_INC(uopcode, miss);
_tos_cache0 = owner;
SET_CURRENT_CACHED_VALUES(1);
JUMP_TO_JUMP_TARGET();
}
PyCodeObject *code = (PyCodeObject *)f->func_code;
assert(code->co_argcount == 2);
if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize)) {
UOP_STAT_INC(uopcode, miss);
_tos_cache0 = owner;
SET_CURRENT_CACHED_VALUES(1);
JUMP_TO_JUMP_TARGET();
}
STAT_INC(LOAD_ATTR, hit);
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg >> 1);
_PyInterpreterFrame *pushed_frame = _PyFrame_PushUnchecked(
tstate, PyStackRef_FromPyObjectNew(f), 2, frame);
pushed_frame->localsplus[0] = owner;
pushed_frame->localsplus[1] = PyStackRef_FromPyObjectNew(name);
new_frame = PyStackRef_Wrap(pushed_frame);
_tos_cache0 = new_frame;
_tos_cache1 = PyStackRef_ZERO_BITS;
_tos_cache2 = PyStackRef_ZERO_BITS;
SET_CURRENT_CACHED_VALUES(1);
assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE());
break;
}
case _GUARD_DORV_NO_DICT_r01: {
CHECK_CURRENT_CACHED_VALUES(0);

View file

@ -8307,50 +8307,81 @@
INSTRUCTION_STATS(LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN);
static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size");
_PyStackRef owner;
_PyStackRef new_frame;
/* Skip 1 cache entry */
owner = stack_pointer[-1];
uint32_t type_version = read_u32(&this_instr[2].cache);
uint32_t func_version = read_u32(&this_instr[4].cache);
PyObject *getattribute = read_obj(&this_instr[6].cache);
PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner);
assert((oparg & 1) == 0);
if (IS_PEP523_HOOKED(tstate)) {
UPDATE_MISS_STATS(LOAD_ATTR);
assert(_PyOpcode_Deopt[opcode] == (LOAD_ATTR));
JUMP_TO_PREDICTED(LOAD_ATTR);
// _GUARD_TYPE_VERSION
{
owner = stack_pointer[-1];
uint32_t type_version = read_u32(&this_instr[2].cache);
PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(owner));
assert(type_version != 0);
if (FT_ATOMIC_LOAD_UINT_RELAXED(tp->tp_version_tag) != type_version) {
UPDATE_MISS_STATS(LOAD_ATTR);
assert(_PyOpcode_Deopt[opcode] == (LOAD_ATTR));
JUMP_TO_PREDICTED(LOAD_ATTR);
}
}
PyTypeObject *cls = Py_TYPE(owner_o);
assert(type_version != 0);
if (FT_ATOMIC_LOAD_UINT_RELAXED(cls->tp_version_tag) != type_version) {
UPDATE_MISS_STATS(LOAD_ATTR);
assert(_PyOpcode_Deopt[opcode] == (LOAD_ATTR));
JUMP_TO_PREDICTED(LOAD_ATTR);
// _CHECK_PEP_523
{
if (IS_PEP523_HOOKED(tstate)) {
UPDATE_MISS_STATS(LOAD_ATTR);
assert(_PyOpcode_Deopt[opcode] == (LOAD_ATTR));
JUMP_TO_PREDICTED(LOAD_ATTR);
}
}
assert(Py_IS_TYPE(getattribute, &PyFunction_Type));
PyFunctionObject *f = (PyFunctionObject *)getattribute;
assert(func_version != 0);
if (f->func_version != func_version) {
UPDATE_MISS_STATS(LOAD_ATTR);
assert(_PyOpcode_Deopt[opcode] == (LOAD_ATTR));
JUMP_TO_PREDICTED(LOAD_ATTR);
// _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN_FRAME
{
uint32_t func_version = read_u32(&this_instr[4].cache);
PyObject *getattribute = read_obj(&this_instr[6].cache);
assert((oparg & 1) == 0);
assert(Py_IS_TYPE(getattribute, &PyFunction_Type));
PyFunctionObject *f = (PyFunctionObject *)getattribute;
assert(func_version != 0);
if (f->func_version != func_version) {
UPDATE_MISS_STATS(LOAD_ATTR);
assert(_PyOpcode_Deopt[opcode] == (LOAD_ATTR));
JUMP_TO_PREDICTED(LOAD_ATTR);
}
PyCodeObject *code = (PyCodeObject *)f->func_code;
assert(code->co_argcount == 2);
if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize)) {
UPDATE_MISS_STATS(LOAD_ATTR);
assert(_PyOpcode_Deopt[opcode] == (LOAD_ATTR));
JUMP_TO_PREDICTED(LOAD_ATTR);
}
STAT_INC(LOAD_ATTR, hit);
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg >> 1);
_PyInterpreterFrame *pushed_frame = _PyFrame_PushUnchecked(
tstate, PyStackRef_FromPyObjectNew(f), 2, frame);
pushed_frame->localsplus[0] = owner;
pushed_frame->localsplus[1] = PyStackRef_FromPyObjectNew(name);
new_frame = PyStackRef_Wrap(pushed_frame);
}
PyCodeObject *code = (PyCodeObject *)f->func_code;
assert(code->co_argcount == 2);
if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize)) {
UPDATE_MISS_STATS(LOAD_ATTR);
assert(_PyOpcode_Deopt[opcode] == (LOAD_ATTR));
JUMP_TO_PREDICTED(LOAD_ATTR);
// _SAVE_RETURN_OFFSET
{
#if TIER_ONE
frame->return_offset = (uint16_t)(next_instr - this_instr);
#endif
#if TIER_TWO
frame->return_offset = oparg;
#endif
}
STAT_INC(LOAD_ATTR, hit);
PyObject *name = GETITEM(FRAME_CO_NAMES, oparg >> 1);
_PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(
tstate, PyStackRef_FromPyObjectNew(f), 2, frame);
new_frame->localsplus[0] = owner;
stack_pointer += -1;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
new_frame->localsplus[1] = PyStackRef_FromPyObjectNew(name);
frame->return_offset = 10u ;
DISPATCH_INLINED(new_frame);
// _PUSH_FRAME
{
assert(!IS_PEP523_HOOKED(tstate));
_PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame);
stack_pointer += -1;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
_PyFrame_SetStackPointer(frame, stack_pointer);
assert(temp->previous == frame || temp->previous->previous == frame);
CALL_STAT_INC(inlined_py_calls);
frame = tstate->current_frame = temp;
tstate->py_recursion_remaining--;
LOAD_SP();
LOAD_IP(0);
LLTRACE_RESUME_FRAME();
}
DISPATCH();
}
TARGET(LOAD_ATTR_INSTANCE_VALUE) {

View file

@ -1016,6 +1016,27 @@ dummy_func(void) {
new_frame = PyJitRef_WrapInvalid(f);
}
op(_LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN_FRAME, (func_version/2, getattribute/4, owner -- new_frame)) {
PyFunctionObject *func = (PyFunctionObject *)getattribute;
if (sym_get_type_version(owner) == 0 ||
func->func_version != func_version) {
ctx->contradiction = true;
ctx->done = true;
break;
}
_Py_BloomFilter_Add(dependencies, func);
PyCodeObject *co = (PyCodeObject *)func->func_code;
_Py_UOpsAbstractFrame *f = frame_new(ctx, co, NULL, 0);
if (f == NULL) {
break;
}
PyObject *name = get_co_name(ctx, oparg >> 1);
f->locals[0] = owner;
f->locals[1] = sym_new_const(ctx, name);
f->func = func;
new_frame = PyJitRef_WrapInvalid(f);
}
op(_INIT_CALL_BOUND_METHOD_EXACT_ARGS, (callable, self_or_null, unused[oparg] -- callable, self_or_null, unused[oparg])) {
PyObject *bound_method = sym_get_probable_value(callable);
if (bound_method != NULL && Py_TYPE(bound_method) == &PyMethod_Type) {

View file

@ -2636,7 +2636,33 @@
break;
}
/* _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN is not a viable micro-op for tier 2 */
case _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN_FRAME: {
JitOptRef owner;
JitOptRef new_frame;
owner = stack_pointer[-1];
uint32_t func_version = (uint32_t)this_instr->operand0;
PyObject *getattribute = (PyObject *)this_instr->operand1;
PyFunctionObject *func = (PyFunctionObject *)getattribute;
if (sym_get_type_version(owner) == 0 ||
func->func_version != func_version) {
ctx->contradiction = true;
ctx->done = true;
break;
}
_Py_BloomFilter_Add(dependencies, func);
PyCodeObject *co = (PyCodeObject *)func->func_code;
_Py_UOpsAbstractFrame *f = frame_new(ctx, co, NULL, 0);
if (f == NULL) {
break;
}
PyObject *name = get_co_name(ctx, oparg >> 1);
f->locals[0] = owner;
f->locals[1] = sym_new_const(ctx, name);
f->func = func;
new_frame = PyJitRef_WrapInvalid(f);
stack_pointer[-1] = new_frame;
break;
}
case _GUARD_DORV_NO_DICT: {
break;

View file

@ -110,6 +110,7 @@ const _PyOpcodeRecordEntry _PyOpcode_RecordEntries[256] = {
[LOAD_ATTR_SLOT] = {1, {_RECORD_TOS_TYPE_INDEX}},
[LOAD_ATTR_CLASS_WITH_METACLASS_CHECK] = {1, {_RECORD_TOS_TYPE_INDEX}},
[LOAD_ATTR_PROPERTY] = {1, {_RECORD_TOS_TYPE_INDEX}},
[LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = {1, {_RECORD_TOS_TYPE_INDEX}},
[STORE_ATTR_INSTANCE_VALUE] = {1, {_RECORD_TOS_TYPE_INDEX}},
[STORE_ATTR_WITH_HINT] = {1, {_RECORD_TOS_TYPE_INDEX}},
[STORE_ATTR_SLOT] = {1, {_RECORD_TOS_TYPE_INDEX}},