GH-131798: Narrow types more aggressively in the JIT (GH-134373)

This commit is contained in:
Brandt Bucher 2025-05-20 18:09:51 -04:00 committed by GitHub
parent e1c0c451a2
commit 2f0570caf4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 180 additions and 88 deletions

View file

@ -270,67 +270,68 @@ extern "C" {
#define _POP_TOP POP_TOP #define _POP_TOP POP_TOP
#define _POP_TOP_LOAD_CONST_INLINE 486 #define _POP_TOP_LOAD_CONST_INLINE 486
#define _POP_TOP_LOAD_CONST_INLINE_BORROW 487 #define _POP_TOP_LOAD_CONST_INLINE_BORROW 487
#define _POP_TWO_LOAD_CONST_INLINE_BORROW 488 #define _POP_TWO 488
#define _POP_TWO_LOAD_CONST_INLINE_BORROW 489
#define _PUSH_EXC_INFO PUSH_EXC_INFO #define _PUSH_EXC_INFO PUSH_EXC_INFO
#define _PUSH_FRAME 489 #define _PUSH_FRAME 490
#define _PUSH_NULL PUSH_NULL #define _PUSH_NULL PUSH_NULL
#define _PUSH_NULL_CONDITIONAL 490 #define _PUSH_NULL_CONDITIONAL 491
#define _PY_FRAME_GENERAL 491 #define _PY_FRAME_GENERAL 492
#define _PY_FRAME_KW 492 #define _PY_FRAME_KW 493
#define _QUICKEN_RESUME 493 #define _QUICKEN_RESUME 494
#define _REPLACE_WITH_TRUE 494 #define _REPLACE_WITH_TRUE 495
#define _RESUME_CHECK RESUME_CHECK #define _RESUME_CHECK RESUME_CHECK
#define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_GENERATOR RETURN_GENERATOR
#define _RETURN_VALUE RETURN_VALUE #define _RETURN_VALUE RETURN_VALUE
#define _SAVE_RETURN_OFFSET 495 #define _SAVE_RETURN_OFFSET 496
#define _SEND 496 #define _SEND 497
#define _SEND_GEN_FRAME 497 #define _SEND_GEN_FRAME 498
#define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS
#define _SET_ADD SET_ADD #define _SET_ADD SET_ADD
#define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE
#define _SET_UPDATE SET_UPDATE #define _SET_UPDATE SET_UPDATE
#define _START_EXECUTOR 498 #define _START_EXECUTOR 499
#define _STORE_ATTR 499 #define _STORE_ATTR 500
#define _STORE_ATTR_INSTANCE_VALUE 500 #define _STORE_ATTR_INSTANCE_VALUE 501
#define _STORE_ATTR_SLOT 501 #define _STORE_ATTR_SLOT 502
#define _STORE_ATTR_WITH_HINT 502 #define _STORE_ATTR_WITH_HINT 503
#define _STORE_DEREF STORE_DEREF #define _STORE_DEREF STORE_DEREF
#define _STORE_FAST 503 #define _STORE_FAST 504
#define _STORE_FAST_0 504 #define _STORE_FAST_0 505
#define _STORE_FAST_1 505 #define _STORE_FAST_1 506
#define _STORE_FAST_2 506 #define _STORE_FAST_2 507
#define _STORE_FAST_3 507 #define _STORE_FAST_3 508
#define _STORE_FAST_4 508 #define _STORE_FAST_4 509
#define _STORE_FAST_5 509 #define _STORE_FAST_5 510
#define _STORE_FAST_6 510 #define _STORE_FAST_6 511
#define _STORE_FAST_7 511 #define _STORE_FAST_7 512
#define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST
#define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST
#define _STORE_GLOBAL STORE_GLOBAL #define _STORE_GLOBAL STORE_GLOBAL
#define _STORE_NAME STORE_NAME #define _STORE_NAME STORE_NAME
#define _STORE_SLICE 512 #define _STORE_SLICE 513
#define _STORE_SUBSCR 513 #define _STORE_SUBSCR 514
#define _STORE_SUBSCR_DICT 514 #define _STORE_SUBSCR_DICT 515
#define _STORE_SUBSCR_LIST_INT 515 #define _STORE_SUBSCR_LIST_INT 516
#define _SWAP SWAP #define _SWAP SWAP
#define _TIER2_RESUME_CHECK 516 #define _TIER2_RESUME_CHECK 517
#define _TO_BOOL 517 #define _TO_BOOL 518
#define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_BOOL TO_BOOL_BOOL
#define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_INT TO_BOOL_INT
#define _TO_BOOL_LIST 518 #define _TO_BOOL_LIST 519
#define _TO_BOOL_NONE TO_BOOL_NONE #define _TO_BOOL_NONE TO_BOOL_NONE
#define _TO_BOOL_STR 519 #define _TO_BOOL_STR 520
#define _UNARY_INVERT UNARY_INVERT #define _UNARY_INVERT UNARY_INVERT
#define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NEGATIVE UNARY_NEGATIVE
#define _UNARY_NOT UNARY_NOT #define _UNARY_NOT UNARY_NOT
#define _UNPACK_EX UNPACK_EX #define _UNPACK_EX UNPACK_EX
#define _UNPACK_SEQUENCE 520 #define _UNPACK_SEQUENCE 521
#define _UNPACK_SEQUENCE_LIST 521 #define _UNPACK_SEQUENCE_LIST 522
#define _UNPACK_SEQUENCE_TUPLE 522 #define _UNPACK_SEQUENCE_TUPLE 523
#define _UNPACK_SEQUENCE_TWO_TUPLE 523 #define _UNPACK_SEQUENCE_TWO_TUPLE 524
#define _WITH_EXCEPT_START WITH_EXCEPT_START #define _WITH_EXCEPT_START WITH_EXCEPT_START
#define _YIELD_VALUE YIELD_VALUE #define _YIELD_VALUE YIELD_VALUE
#define MAX_UOP_ID 523 #define MAX_UOP_ID 524
#ifdef __cplusplus #ifdef __cplusplus
} }

View file

@ -63,6 +63,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
[_STORE_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG, [_STORE_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG,
[_STORE_FAST_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG, [_STORE_FAST_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG,
[_POP_TOP] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_POP_TOP] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG,
[_POP_TWO] = HAS_ESCAPES_FLAG,
[_PUSH_NULL] = HAS_PURE_FLAG, [_PUSH_NULL] = HAS_PURE_FLAG,
[_END_FOR] = HAS_ESCAPES_FLAG | HAS_NO_SAVE_IP_FLAG, [_END_FOR] = HAS_ESCAPES_FLAG | HAS_NO_SAVE_IP_FLAG,
[_END_SEND] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_END_SEND] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG,
@ -557,6 +558,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
[_POP_TOP] = "_POP_TOP", [_POP_TOP] = "_POP_TOP",
[_POP_TOP_LOAD_CONST_INLINE] = "_POP_TOP_LOAD_CONST_INLINE", [_POP_TOP_LOAD_CONST_INLINE] = "_POP_TOP_LOAD_CONST_INLINE",
[_POP_TOP_LOAD_CONST_INLINE_BORROW] = "_POP_TOP_LOAD_CONST_INLINE_BORROW", [_POP_TOP_LOAD_CONST_INLINE_BORROW] = "_POP_TOP_LOAD_CONST_INLINE_BORROW",
[_POP_TWO] = "_POP_TWO",
[_POP_TWO_LOAD_CONST_INLINE_BORROW] = "_POP_TWO_LOAD_CONST_INLINE_BORROW", [_POP_TWO_LOAD_CONST_INLINE_BORROW] = "_POP_TWO_LOAD_CONST_INLINE_BORROW",
[_PUSH_EXC_INFO] = "_PUSH_EXC_INFO", [_PUSH_EXC_INFO] = "_PUSH_EXC_INFO",
[_PUSH_FRAME] = "_PUSH_FRAME", [_PUSH_FRAME] = "_PUSH_FRAME",
@ -708,6 +710,8 @@ int _PyUop_num_popped(int opcode, int oparg)
return 2; return 2;
case _POP_TOP: case _POP_TOP:
return 1; return 1;
case _POP_TWO:
return 2;
case _PUSH_NULL: case _PUSH_NULL:
return 0; return 0;
case _END_FOR: case _END_FOR:

View file

@ -2137,6 +2137,25 @@ def testfunc(n):
self.assertNotIn("_TO_BOOL_BOOL", uops) self.assertNotIn("_TO_BOOL_BOOL", uops)
self.assertIn("_GUARD_IS_TRUE_POP", uops) self.assertIn("_GUARD_IS_TRUE_POP", uops)
def test_set_type_version_sets_type(self):
class C:
A = 1
def testfunc(n):
x = 0
c = C()
for _ in range(n):
x += c.A # Guarded.
x += type(c).A # Unguarded!
return x
res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD)
self.assertEqual(res, 2 * TIER2_THRESHOLD)
self.assertIsNotNone(ex)
uops = get_opnames(ex)
self.assertIn("_GUARD_TYPE_VERSION", uops)
self.assertNotIn("_CHECK_ATTR_CLASS", uops)
def global_identity(x): def global_identity(x):
return x return x

View file

@ -0,0 +1 @@
Improve the JIT's ability to narrow unknown classes to constant values.

View file

@ -344,6 +344,11 @@ dummy_func(
PyStackRef_CLOSE(value); PyStackRef_CLOSE(value);
} }
tier2 op(_POP_TWO, (nos, tos --)) {
PyStackRef_CLOSE(tos);
PyStackRef_CLOSE(nos);
}
pure inst(PUSH_NULL, (-- res)) { pure inst(PUSH_NULL, (-- res)) {
res = PyStackRef_NULL; res = PyStackRef_NULL;
} }

View file

@ -539,6 +539,24 @@
break; break;
} }
case _POP_TWO: {
_PyStackRef tos;
_PyStackRef nos;
tos = stack_pointer[-1];
nos = stack_pointer[-2];
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
_PyFrame_SetStackPointer(frame, stack_pointer);
PyStackRef_CLOSE(tos);
stack_pointer = _PyFrame_GetStackPointer(frame);
stack_pointer += -1;
assert(WITHIN_STACK_BOUNDS());
_PyFrame_SetStackPointer(frame, stack_pointer);
PyStackRef_CLOSE(nos);
stack_pointer = _PyFrame_GetStackPointer(frame);
break;
}
case _PUSH_NULL: { case _PUSH_NULL: {
_PyStackRef res; _PyStackRef res;
res = PyStackRef_NULL; res = PyStackRef_NULL;

View file

@ -523,6 +523,25 @@ optimize_uops(
} }
const uint16_t op_without_push[MAX_UOP_ID + 1] = {
[_COPY] = _NOP,
[_LOAD_CONST_INLINE] = _NOP,
[_LOAD_CONST_INLINE_BORROW] = _NOP,
[_LOAD_FAST] = _NOP,
[_LOAD_FAST_BORROW] = _NOP,
[_LOAD_SMALL_INT] = _NOP,
[_POP_TOP_LOAD_CONST_INLINE] = _POP_TOP,
[_POP_TOP_LOAD_CONST_INLINE_BORROW] = _POP_TOP,
[_POP_TWO_LOAD_CONST_INLINE_BORROW] = _POP_TWO,
};
const uint16_t op_without_pop[MAX_UOP_ID + 1] = {
[_POP_TOP] = _NOP,
[_POP_TOP_LOAD_CONST_INLINE] = _LOAD_CONST_INLINE,
[_POP_TOP_LOAD_CONST_INLINE_BORROW] = _LOAD_CONST_INLINE_BORROW,
[_POP_TWO_LOAD_CONST_INLINE_BORROW] = _POP_TOP_LOAD_CONST_INLINE_BORROW,
};
static int static int
remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
@ -551,50 +570,23 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size)
buffer[pc].opcode = _NOP; buffer[pc].opcode = _NOP;
} }
break; break;
case _POP_TOP:
case _POP_TOP_LOAD_CONST_INLINE:
case _POP_TOP_LOAD_CONST_INLINE_BORROW:
case _POP_TWO_LOAD_CONST_INLINE_BORROW:
optimize_pop_top_again:
{
_PyUOpInstruction *last = &buffer[pc-1];
while (last->opcode == _NOP) {
last--;
}
switch (last->opcode) {
case _POP_TWO_LOAD_CONST_INLINE_BORROW:
last->opcode = _POP_TOP;
break;
case _POP_TOP_LOAD_CONST_INLINE:
case _POP_TOP_LOAD_CONST_INLINE_BORROW:
last->opcode = _NOP;
goto optimize_pop_top_again;
case _COPY:
case _LOAD_CONST_INLINE:
case _LOAD_CONST_INLINE_BORROW:
case _LOAD_FAST:
case _LOAD_FAST_BORROW:
case _LOAD_SMALL_INT:
last->opcode = _NOP;
if (opcode == _POP_TOP) {
opcode = buffer[pc].opcode = _NOP;
}
else if (opcode == _POP_TOP_LOAD_CONST_INLINE) {
opcode = buffer[pc].opcode = _LOAD_CONST_INLINE;
}
else if (opcode == _POP_TOP_LOAD_CONST_INLINE_BORROW) {
opcode = buffer[pc].opcode = _LOAD_CONST_INLINE_BORROW;
}
else {
assert(opcode == _POP_TWO_LOAD_CONST_INLINE_BORROW);
opcode = buffer[pc].opcode = _POP_TOP_LOAD_CONST_INLINE_BORROW;
goto optimize_pop_top_again;
}
}
_Py_FALLTHROUGH;
}
default: default:
{ {
// Cancel out pushes and pops, repeatedly. So:
// _LOAD_FAST + _POP_TWO_LOAD_CONST_INLINE_BORROW + _POP_TOP
// ...becomes:
// _NOP + _POP_TOP + _NOP
while (op_without_pop[opcode]) {
_PyUOpInstruction *last = &buffer[pc - 1];
while (last->opcode == _NOP) {
last--;
}
if (!op_without_push[last->opcode]) {
break;
}
last->opcode = op_without_push[last->opcode];
opcode = buffer[pc].opcode = op_without_pop[opcode];
}
/* _PUSH_FRAME doesn't escape or error, but it /* _PUSH_FRAME doesn't escape or error, but it
* does need the IP for the return address */ * does need the IP for the return address */
bool needs_ip = opcode == _PUSH_FRAME; bool needs_ip = opcode == _PUSH_FRAME;

View file

@ -118,6 +118,18 @@ dummy_func(void) {
sym_set_type(left, &PyLong_Type); sym_set_type(left, &PyLong_Type);
} }
op(_CHECK_ATTR_CLASS, (type_version/2, owner -- owner)) {
PyObject *type = (PyObject *)_PyType_LookupByVersion(type_version);
if (type) {
if (type == sym_get_const(ctx, owner)) {
REPLACE_OP(this_instr, _NOP, 0, 0);
}
else {
sym_set_const(owner, type);
}
}
}
op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner)) { op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner)) {
assert(type_version); assert(type_version);
if (sym_matches_type_version(owner, type_version)) { if (sym_matches_type_version(owner, type_version)) {

View file

@ -102,6 +102,12 @@
break; break;
} }
case _POP_TWO: {
stack_pointer += -2;
assert(WITHIN_STACK_BOUNDS());
break;
}
case _PUSH_NULL: { case _PUSH_NULL: {
JitOptSymbol *res; JitOptSymbol *res;
res = sym_new_null(ctx); res = sym_new_null(ctx);
@ -1259,6 +1265,18 @@
} }
case _CHECK_ATTR_CLASS: { case _CHECK_ATTR_CLASS: {
JitOptSymbol *owner;
owner = stack_pointer[-1];
uint32_t type_version = (uint32_t)this_instr->operand0;
PyObject *type = (PyObject *)_PyType_LookupByVersion(type_version);
if (type) {
if (type == sym_get_const(ctx, owner)) {
REPLACE_OP(this_instr, _NOP, 0, 0);
}
else {
sym_set_const(owner, type);
}
}
break; break;
} }

View file

@ -200,6 +200,10 @@ _Py_uop_sym_set_type(JitOptContext *ctx, JitOptSymbol *sym, PyTypeObject *typ)
bool bool
_Py_uop_sym_set_type_version(JitOptContext *ctx, JitOptSymbol *sym, unsigned int version) _Py_uop_sym_set_type_version(JitOptContext *ctx, JitOptSymbol *sym, unsigned int version)
{ {
PyTypeObject *type = _PyType_LookupByVersion(version);
if (type) {
_Py_uop_sym_set_type(ctx, sym, type);
}
JitSymType tag = sym->tag; JitSymType tag = sym->tag;
switch(tag) { switch(tag) {
case JIT_SYM_NULL_TAG: case JIT_SYM_NULL_TAG:
@ -215,18 +219,24 @@ _Py_uop_sym_set_type_version(JitOptContext *ctx, JitOptSymbol *sym, unsigned int
return true; return true;
} }
case JIT_SYM_KNOWN_VALUE_TAG: case JIT_SYM_KNOWN_VALUE_TAG:
Py_CLEAR(sym->value.value); if (Py_TYPE(sym->value.value)->tp_version_tag != version) {
sym_set_bottom(ctx, sym); Py_CLEAR(sym->value.value);
return false; sym_set_bottom(ctx, sym);
return false;
};
return true;
case JIT_SYM_TUPLE_TAG: case JIT_SYM_TUPLE_TAG:
sym_set_bottom(ctx, sym); if (PyTuple_Type.tp_version_tag != version) {
return false; sym_set_bottom(ctx, sym);
return false;
};
return true;
case JIT_SYM_TYPE_VERSION_TAG: case JIT_SYM_TYPE_VERSION_TAG:
if (sym->version.version == version) { if (sym->version.version != version) {
return true; sym_set_bottom(ctx, sym);
return false;
} }
sym_set_bottom(ctx, sym); return true;
return false;
case JIT_SYM_BOTTOM_TAG: case JIT_SYM_BOTTOM_TAG:
return false; return false;
case JIT_SYM_NON_NULL_TAG: case JIT_SYM_NON_NULL_TAG:
@ -266,6 +276,18 @@ _Py_uop_sym_set_const(JitOptContext *ctx, JitOptSymbol *sym, PyObject *const_val
} }
return; return;
case JIT_SYM_TUPLE_TAG: case JIT_SYM_TUPLE_TAG:
if (PyTuple_CheckExact(const_val)) {
Py_ssize_t len = _Py_uop_sym_tuple_length(sym);
if (len == PyTuple_GET_SIZE(const_val)) {
for (Py_ssize_t i = 0; i < len; i++) {
JitOptSymbol *sym_item = _Py_uop_sym_tuple_getitem(ctx, sym, i);
PyObject *item = PyTuple_GET_ITEM(const_val, i);
_Py_uop_sym_set_const(ctx, sym_item, item);
}
make_const(sym, const_val);
return;
}
}
sym_set_bottom(ctx, sym); sym_set_bottom(ctx, sym);
return; return;
case JIT_SYM_TYPE_VERSION_TAG: case JIT_SYM_TYPE_VERSION_TAG: