mirror of
https://github.com/python/cpython.git
synced 2026-01-03 05:54:03 +00:00
GH-139757: JIT: Remove redundant branches to jumps in the assembly optimizer (GH-140800)
JIT: Remove redundant branches to jump in the assembly optimizer * Refactor JIT assembly optimizer making instructions instances not just strings * Remove redundant jumps and branches where legal to do so * Modifies _BINARY_OP_SUBSCR_STR_INT to avoid excessive inlining depth
This commit is contained in:
parent
37988c57ea
commit
e0451ceef8
8 changed files with 220 additions and 74 deletions
|
|
@ -982,9 +982,10 @@ dummy_func(
|
|||
DEOPT_IF(!_PyLong_IsNonNegativeCompact((PyLongObject *)sub));
|
||||
Py_ssize_t index = ((PyLongObject*)sub)->long_value.ob_digit[0];
|
||||
DEOPT_IF(PyUnicode_GET_LENGTH(str) <= index);
|
||||
// Specialize for reading an ASCII character from any string:
|
||||
Py_UCS4 c = PyUnicode_READ_CHAR(str, index);
|
||||
DEOPT_IF(Py_ARRAY_LENGTH(_Py_SINGLETON(strings).ascii) <= c);
|
||||
// Specialize for reading an ASCII character from an ASCII string:
|
||||
DEOPT_IF(!PyUnicode_IS_COMPACT_ASCII(str));
|
||||
uint8_t c = PyUnicode_1BYTE_DATA(str)[index];
|
||||
assert(c < 128);
|
||||
STAT_INC(BINARY_OP, hit);
|
||||
PyObject *res_o = (PyObject*)&_Py_SINGLETON(strings).ascii[c];
|
||||
PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc);
|
||||
|
|
|
|||
5
Python/executor_cases.c.h
generated
5
Python/executor_cases.c.h
generated
|
|
@ -1502,11 +1502,12 @@
|
|||
UOP_STAT_INC(uopcode, miss);
|
||||
JUMP_TO_JUMP_TARGET();
|
||||
}
|
||||
Py_UCS4 c = PyUnicode_READ_CHAR(str, index);
|
||||
if (Py_ARRAY_LENGTH(_Py_SINGLETON(strings).ascii) <= c) {
|
||||
if (!PyUnicode_IS_COMPACT_ASCII(str)) {
|
||||
UOP_STAT_INC(uopcode, miss);
|
||||
JUMP_TO_JUMP_TARGET();
|
||||
}
|
||||
uint8_t c = PyUnicode_1BYTE_DATA(str)[index];
|
||||
assert(c < 128);
|
||||
STAT_INC(BINARY_OP, hit);
|
||||
PyObject *res_o = (PyObject*)&_Py_SINGLETON(strings).ascii[c];
|
||||
PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc);
|
||||
|
|
|
|||
5
Python/generated_cases.c.h
generated
5
Python/generated_cases.c.h
generated
|
|
@ -892,12 +892,13 @@
|
|||
assert(_PyOpcode_Deopt[opcode] == (BINARY_OP));
|
||||
JUMP_TO_PREDICTED(BINARY_OP);
|
||||
}
|
||||
Py_UCS4 c = PyUnicode_READ_CHAR(str, index);
|
||||
if (Py_ARRAY_LENGTH(_Py_SINGLETON(strings).ascii) <= c) {
|
||||
if (!PyUnicode_IS_COMPACT_ASCII(str)) {
|
||||
UPDATE_MISS_STATS(BINARY_OP);
|
||||
assert(_PyOpcode_Deopt[opcode] == (BINARY_OP));
|
||||
JUMP_TO_PREDICTED(BINARY_OP);
|
||||
}
|
||||
uint8_t c = PyUnicode_1BYTE_DATA(str)[index];
|
||||
assert(c < 128);
|
||||
STAT_INC(BINARY_OP, hit);
|
||||
PyObject *res_o = (PyObject*)&_Py_SINGLETON(strings).ascii[c];
|
||||
PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc);
|
||||
|
|
|
|||
|
|
@ -185,6 +185,7 @@ set_bits(uint32_t *loc, uint8_t loc_start, uint64_t value, uint8_t value_start,
|
|||
#define IS_AARCH64_ADRP(I) (((I) & 0x9F000000) == 0x90000000)
|
||||
#define IS_AARCH64_BRANCH(I) (((I) & 0x7C000000) == 0x14000000)
|
||||
#define IS_AARCH64_BRANCH_COND(I) (((I) & 0x7C000000) == 0x54000000)
|
||||
#define IS_AARCH64_BRANCH_ZERO(I) (((I) & 0x7E000000) == 0x34000000)
|
||||
#define IS_AARCH64_TEST_AND_BRANCH(I) (((I) & 0x7E000000) == 0x36000000)
|
||||
#define IS_AARCH64_LDR_OR_STR(I) (((I) & 0x3B000000) == 0x39000000)
|
||||
#define IS_AARCH64_MOV(I) (((I) & 0x9F800000) == 0x92800000)
|
||||
|
|
@ -352,7 +353,7 @@ void
|
|||
patch_aarch64_19r(unsigned char *location, uint64_t value)
|
||||
{
|
||||
uint32_t *loc32 = (uint32_t *)location;
|
||||
assert(IS_AARCH64_BRANCH_COND(*loc32));
|
||||
assert(IS_AARCH64_BRANCH_COND(*loc32) || IS_AARCH64_BRANCH_ZERO(*loc32));
|
||||
value -= (uintptr_t)location;
|
||||
// Check that we're not out of range of 21 signed bits:
|
||||
assert((int64_t)value >= -(1 << 20));
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue