mirror of
https://github.com/python/cpython.git
synced 2026-05-04 09:31:02 +00:00
gh-146455: Fix O(N²) in add_const() after constant folding moved to CFG (#146456)
The add_const() function in flowgraph.c uses a linear search over the consts list to find the index of a constant. After gh-126835 moved constant folding from the AST optimizer to the CFG optimizer, this function is now called N times for N inner tuple elements during fold_tuple_of_constants(), resulting in O(N²) total time. Fix by maintaining an auxiliary _Py_hashtable_t that maps object pointers to their indices in the consts list, providing O(1) lookup. For a file with 100,000 constant 2-tuples: - Before: 10.38s (add_const occupies 83.76% of CPU time) - After: 1.48s
This commit is contained in:
parent
6d4ca16f47
commit
5d416324c5
2 changed files with 89 additions and 43 deletions
|
|
@ -0,0 +1 @@
|
|||
Fix O(N²) compile-time regression in constant folding after it was moved from AST to CFG optimizer.
|
||||
|
|
@ -6,6 +6,7 @@
|
|||
#include "pycore_intrinsics.h"
|
||||
#include "pycore_pymem.h" // _PyMem_IsPtrFreed()
|
||||
#include "pycore_long.h" // _PY_IS_SMALL_INT()
|
||||
#include "pycore_hashtable.h" // _Py_hashtable_t
|
||||
|
||||
#include "pycore_opcode_utils.h"
|
||||
#include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc
|
||||
|
|
@ -1333,30 +1334,38 @@ get_const_value(int opcode, int oparg, PyObject *co_consts)
|
|||
|
||||
// Steals a reference to newconst.
|
||||
static int
|
||||
add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache)
|
||||
add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache,
|
||||
_Py_hashtable_t *consts_index)
|
||||
{
|
||||
if (_PyCompile_ConstCacheMergeOne(const_cache, &newconst) < 0) {
|
||||
Py_DECREF(newconst);
|
||||
return -1;
|
||||
}
|
||||
|
||||
Py_ssize_t index;
|
||||
for (index = 0; index < PyList_GET_SIZE(consts); index++) {
|
||||
if (PyList_GET_ITEM(consts, index) == newconst) {
|
||||
break;
|
||||
}
|
||||
_Py_hashtable_entry_t *entry = _Py_hashtable_get_entry(consts_index, (void *)newconst);
|
||||
if (entry != NULL) {
|
||||
Py_DECREF(newconst);
|
||||
return (int)(uintptr_t)entry->value;
|
||||
}
|
||||
if (index == PyList_GET_SIZE(consts)) {
|
||||
if ((size_t)index >= (size_t)INT_MAX - 1) {
|
||||
PyErr_SetString(PyExc_OverflowError, "too many constants");
|
||||
Py_DECREF(newconst);
|
||||
return -1;
|
||||
}
|
||||
if (PyList_Append(consts, newconst)) {
|
||||
Py_DECREF(newconst);
|
||||
return -1;
|
||||
}
|
||||
|
||||
Py_ssize_t index = PyList_GET_SIZE(consts);
|
||||
if ((size_t)index >= (size_t)INT_MAX - 1) {
|
||||
PyErr_SetString(PyExc_OverflowError, "too many constants");
|
||||
Py_DECREF(newconst);
|
||||
return -1;
|
||||
}
|
||||
if (PyList_Append(consts, newconst)) {
|
||||
Py_DECREF(newconst);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (_Py_hashtable_set(consts_index, (void *)newconst, (void *)(uintptr_t)index) < 0) {
|
||||
PyList_SetSlice(consts, index, index + 1, NULL);
|
||||
Py_DECREF(newconst);
|
||||
PyErr_NoMemory();
|
||||
return -1;
|
||||
}
|
||||
|
||||
Py_DECREF(newconst);
|
||||
return (int)index;
|
||||
}
|
||||
|
|
@ -1432,7 +1441,8 @@ maybe_instr_make_load_smallint(cfg_instr *instr, PyObject *newconst,
|
|||
/* Steals reference to "newconst" */
|
||||
static int
|
||||
instr_make_load_const(cfg_instr *instr, PyObject *newconst,
|
||||
PyObject *consts, PyObject *const_cache)
|
||||
PyObject *consts, PyObject *const_cache,
|
||||
_Py_hashtable_t *consts_index)
|
||||
{
|
||||
int res = maybe_instr_make_load_smallint(instr, newconst, consts, const_cache);
|
||||
if (res < 0) {
|
||||
|
|
@ -1442,7 +1452,7 @@ instr_make_load_const(cfg_instr *instr, PyObject *newconst,
|
|||
if (res > 0) {
|
||||
return SUCCESS;
|
||||
}
|
||||
int oparg = add_const(newconst, consts, const_cache);
|
||||
int oparg = add_const(newconst, consts, const_cache, consts_index);
|
||||
RETURN_IF_ERROR(oparg);
|
||||
INSTR_SET_OP1(instr, LOAD_CONST, oparg);
|
||||
return SUCCESS;
|
||||
|
|
@ -1455,7 +1465,8 @@ instr_make_load_const(cfg_instr *instr, PyObject *newconst,
|
|||
Called with codestr pointing to the first LOAD_CONST.
|
||||
*/
|
||||
static int
|
||||
fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
|
||||
fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts,
|
||||
PyObject *const_cache, _Py_hashtable_t *consts_index)
|
||||
{
|
||||
/* Pre-conditions */
|
||||
assert(PyDict_CheckExact(const_cache));
|
||||
|
|
@ -1492,7 +1503,7 @@ fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const
|
|||
}
|
||||
|
||||
nop_out(const_instrs, seq_size);
|
||||
return instr_make_load_const(instr, const_tuple, consts, const_cache);
|
||||
return instr_make_load_const(instr, const_tuple, consts, const_cache, consts_index);
|
||||
}
|
||||
|
||||
/* Replace:
|
||||
|
|
@ -1510,7 +1521,8 @@ fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const
|
|||
*/
|
||||
static int
|
||||
fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
|
||||
PyObject *consts, PyObject *const_cache)
|
||||
PyObject *consts, PyObject *const_cache,
|
||||
_Py_hashtable_t *consts_index)
|
||||
{
|
||||
assert(PyDict_CheckExact(const_cache));
|
||||
assert(PyList_CheckExact(consts));
|
||||
|
|
@ -1562,7 +1574,7 @@ fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
|
|||
nop_out(&instr, 1);
|
||||
}
|
||||
assert(consts_found == 0);
|
||||
return instr_make_load_const(intrinsic, newconst, consts, const_cache);
|
||||
return instr_make_load_const(intrinsic, newconst, consts, const_cache, consts_index);
|
||||
}
|
||||
|
||||
if (expect_append) {
|
||||
|
|
@ -1598,7 +1610,8 @@ Optimize lists and sets for:
|
|||
*/
|
||||
static int
|
||||
optimize_lists_and_sets(basicblock *bb, int i, int nextop,
|
||||
PyObject *consts, PyObject *const_cache)
|
||||
PyObject *consts, PyObject *const_cache,
|
||||
_Py_hashtable_t *consts_index)
|
||||
{
|
||||
assert(PyDict_CheckExact(const_cache));
|
||||
assert(PyList_CheckExact(consts));
|
||||
|
|
@ -1648,7 +1661,7 @@ optimize_lists_and_sets(basicblock *bb, int i, int nextop,
|
|||
Py_SETREF(const_result, frozenset);
|
||||
}
|
||||
|
||||
int index = add_const(const_result, consts, const_cache);
|
||||
int index = add_const(const_result, consts, const_cache, consts_index);
|
||||
RETURN_IF_ERROR(index);
|
||||
nop_out(const_instrs, seq_size);
|
||||
|
||||
|
|
@ -1845,7 +1858,8 @@ eval_const_binop(PyObject *left, int op, PyObject *right)
|
|||
}
|
||||
|
||||
static int
|
||||
fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
|
||||
fold_const_binop(basicblock *bb, int i, PyObject *consts,
|
||||
PyObject *const_cache, _Py_hashtable_t *consts_index)
|
||||
{
|
||||
#define BINOP_OPERAND_COUNT 2
|
||||
assert(PyDict_CheckExact(const_cache));
|
||||
|
|
@ -1887,7 +1901,7 @@ fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
|
|||
}
|
||||
|
||||
nop_out(operands_instrs, BINOP_OPERAND_COUNT);
|
||||
return instr_make_load_const(binop, newconst, consts, const_cache);
|
||||
return instr_make_load_const(binop, newconst, consts, const_cache, consts_index);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
|
|
@ -1933,7 +1947,8 @@ eval_const_unaryop(PyObject *operand, int opcode, int oparg)
|
|||
}
|
||||
|
||||
static int
|
||||
fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
|
||||
fold_const_unaryop(basicblock *bb, int i, PyObject *consts,
|
||||
PyObject *const_cache, _Py_hashtable_t *consts_index)
|
||||
{
|
||||
#define UNARYOP_OPERAND_COUNT 1
|
||||
assert(PyDict_CheckExact(const_cache));
|
||||
|
|
@ -1970,7 +1985,7 @@ fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cach
|
|||
assert(PyBool_Check(newconst));
|
||||
}
|
||||
nop_out(&operand_instr, UNARYOP_OPERAND_COUNT);
|
||||
return instr_make_load_const(unaryop, newconst, consts, const_cache);
|
||||
return instr_make_load_const(unaryop, newconst, consts, const_cache, consts_index);
|
||||
}
|
||||
|
||||
#define VISITED (-1)
|
||||
|
|
@ -2165,7 +2180,8 @@ apply_static_swaps(basicblock *block, int i)
|
|||
}
|
||||
|
||||
static int
|
||||
basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *consts)
|
||||
basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb,
|
||||
PyObject *consts, _Py_hashtable_t *consts_index)
|
||||
{
|
||||
assert(PyDict_CheckExact(const_cache));
|
||||
assert(PyList_CheckExact(consts));
|
||||
|
|
@ -2283,7 +2299,7 @@ basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *
|
|||
return ERROR;
|
||||
}
|
||||
cnt = PyBool_FromLong(is_true);
|
||||
int index = add_const(cnt, consts, const_cache);
|
||||
int index = add_const(cnt, consts, const_cache, consts_index);
|
||||
if (index < 0) {
|
||||
return ERROR;
|
||||
}
|
||||
|
|
@ -2297,15 +2313,17 @@ basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *
|
|||
}
|
||||
|
||||
static int
|
||||
optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts) {
|
||||
optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts,
|
||||
_Py_hashtable_t *consts_index) {
|
||||
for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
|
||||
RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts));
|
||||
RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts, consts_index));
|
||||
}
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
static int
|
||||
optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
|
||||
optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts,
|
||||
_Py_hashtable_t *consts_index)
|
||||
{
|
||||
assert(PyDict_CheckExact(const_cache));
|
||||
assert(PyList_CheckExact(consts));
|
||||
|
|
@ -2345,11 +2363,11 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
|
|||
continue;
|
||||
}
|
||||
}
|
||||
RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache));
|
||||
RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache, consts_index));
|
||||
break;
|
||||
case BUILD_LIST:
|
||||
case BUILD_SET:
|
||||
RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache));
|
||||
RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache, consts_index));
|
||||
break;
|
||||
case POP_JUMP_IF_NOT_NONE:
|
||||
case POP_JUMP_IF_NONE:
|
||||
|
|
@ -2484,7 +2502,7 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
|
|||
_Py_FALLTHROUGH;
|
||||
case UNARY_INVERT:
|
||||
case UNARY_NEGATIVE:
|
||||
RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
|
||||
RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache, consts_index));
|
||||
break;
|
||||
case CALL_INTRINSIC_1:
|
||||
if (oparg == INTRINSIC_LIST_TO_TUPLE) {
|
||||
|
|
@ -2492,15 +2510,15 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
|
|||
INSTR_SET_OP0(inst, NOP);
|
||||
}
|
||||
else {
|
||||
RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache));
|
||||
RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache, consts_index));
|
||||
}
|
||||
}
|
||||
else if (oparg == INTRINSIC_UNARY_POSITIVE) {
|
||||
RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
|
||||
RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache, consts_index));
|
||||
}
|
||||
break;
|
||||
case BINARY_OP:
|
||||
RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache));
|
||||
RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache, consts_index));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -2545,16 +2563,17 @@ remove_redundant_nops_and_jumps(cfg_builder *g)
|
|||
NOPs. Later those NOPs are removed.
|
||||
*/
|
||||
static int
|
||||
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache, int firstlineno)
|
||||
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache,
|
||||
_Py_hashtable_t *consts_index, int firstlineno)
|
||||
{
|
||||
assert(PyDict_CheckExact(const_cache));
|
||||
RETURN_IF_ERROR(check_cfg(g));
|
||||
RETURN_IF_ERROR(inline_small_or_no_lineno_blocks(g->g_entryblock));
|
||||
RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
|
||||
RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
|
||||
RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts));
|
||||
RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts, consts_index));
|
||||
for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
|
||||
RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts));
|
||||
RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts, consts_index));
|
||||
}
|
||||
RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock));
|
||||
RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
|
||||
|
|
@ -3674,7 +3693,33 @@ _PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
|
|||
RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
|
||||
|
||||
/** Optimization **/
|
||||
RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache, firstlineno));
|
||||
|
||||
_Py_hashtable_t *consts_index = _Py_hashtable_new(
|
||||
_Py_hashtable_hash_ptr, _Py_hashtable_compare_direct);
|
||||
if (consts_index == NULL) {
|
||||
PyErr_NoMemory();
|
||||
return ERROR;
|
||||
}
|
||||
|
||||
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(consts); i++) {
|
||||
PyObject *item = PyList_GET_ITEM(consts, i);
|
||||
if (_Py_hashtable_get_entry(consts_index, (void *)item) != NULL) {
|
||||
continue;
|
||||
}
|
||||
if (_Py_hashtable_set(consts_index, (void *)item,
|
||||
(void *)(uintptr_t)i) < 0) {
|
||||
_Py_hashtable_destroy(consts_index);
|
||||
PyErr_NoMemory();
|
||||
return ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
int ret = optimize_cfg(g, consts, const_cache, consts_index, firstlineno);
|
||||
|
||||
_Py_hashtable_destroy(consts_index);
|
||||
|
||||
RETURN_IF_ERROR(ret);
|
||||
|
||||
RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts));
|
||||
RETURN_IF_ERROR(
|
||||
add_checks_for_loads_of_uninitialized_variables(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue