gh-146455: Fix O(N²) in add_const() after constant folding moved to CFG (#146456)

The add_const() function in flowgraph.c uses a linear search over the
consts list to find the index of a constant. After gh-126835 moved
constant folding from the AST optimizer to the CFG optimizer, this
function is now called N times for N inner tuple elements during
fold_tuple_of_constants(), resulting in O(N²) total time.

Fix by maintaining an auxiliary _Py_hashtable_t that maps object
pointers to their indices in the consts list, providing O(1) lookup.

For a file with 100,000 constant 2-tuples:
- Before: 10.38s (add_const occupies 83.76% of CPU time)
- After:  1.48s
This commit is contained in:
zSirius 2026-04-26 20:15:24 +08:00 committed by GitHub
parent 6d4ca16f47
commit 5d416324c5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 89 additions and 43 deletions

View file

@ -6,6 +6,7 @@
#include "pycore_intrinsics.h"
#include "pycore_pymem.h" // _PyMem_IsPtrFreed()
#include "pycore_long.h" // _PY_IS_SMALL_INT()
#include "pycore_hashtable.h" // _Py_hashtable_t
#include "pycore_opcode_utils.h"
#include "pycore_opcode_metadata.h" // OPCODE_HAS_ARG, etc
@ -1333,30 +1334,38 @@ get_const_value(int opcode, int oparg, PyObject *co_consts)
// Steals a reference to newconst.
static int
add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache)
add_const(PyObject *newconst, PyObject *consts, PyObject *const_cache,
_Py_hashtable_t *consts_index)
{
if (_PyCompile_ConstCacheMergeOne(const_cache, &newconst) < 0) {
Py_DECREF(newconst);
return -1;
}
Py_ssize_t index;
for (index = 0; index < PyList_GET_SIZE(consts); index++) {
if (PyList_GET_ITEM(consts, index) == newconst) {
break;
}
_Py_hashtable_entry_t *entry = _Py_hashtable_get_entry(consts_index, (void *)newconst);
if (entry != NULL) {
Py_DECREF(newconst);
return (int)(uintptr_t)entry->value;
}
if (index == PyList_GET_SIZE(consts)) {
if ((size_t)index >= (size_t)INT_MAX - 1) {
PyErr_SetString(PyExc_OverflowError, "too many constants");
Py_DECREF(newconst);
return -1;
}
if (PyList_Append(consts, newconst)) {
Py_DECREF(newconst);
return -1;
}
Py_ssize_t index = PyList_GET_SIZE(consts);
if ((size_t)index >= (size_t)INT_MAX - 1) {
PyErr_SetString(PyExc_OverflowError, "too many constants");
Py_DECREF(newconst);
return -1;
}
if (PyList_Append(consts, newconst)) {
Py_DECREF(newconst);
return -1;
}
if (_Py_hashtable_set(consts_index, (void *)newconst, (void *)(uintptr_t)index) < 0) {
PyList_SetSlice(consts, index, index + 1, NULL);
Py_DECREF(newconst);
PyErr_NoMemory();
return -1;
}
Py_DECREF(newconst);
return (int)index;
}
@ -1432,7 +1441,8 @@ maybe_instr_make_load_smallint(cfg_instr *instr, PyObject *newconst,
/* Steals reference to "newconst" */
static int
instr_make_load_const(cfg_instr *instr, PyObject *newconst,
PyObject *consts, PyObject *const_cache)
PyObject *consts, PyObject *const_cache,
_Py_hashtable_t *consts_index)
{
int res = maybe_instr_make_load_smallint(instr, newconst, consts, const_cache);
if (res < 0) {
@ -1442,7 +1452,7 @@ instr_make_load_const(cfg_instr *instr, PyObject *newconst,
if (res > 0) {
return SUCCESS;
}
int oparg = add_const(newconst, consts, const_cache);
int oparg = add_const(newconst, consts, const_cache, consts_index);
RETURN_IF_ERROR(oparg);
INSTR_SET_OP1(instr, LOAD_CONST, oparg);
return SUCCESS;
@ -1455,7 +1465,8 @@ instr_make_load_const(cfg_instr *instr, PyObject *newconst,
Called with codestr pointing to the first LOAD_CONST.
*/
static int
fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts,
PyObject *const_cache, _Py_hashtable_t *consts_index)
{
/* Pre-conditions */
assert(PyDict_CheckExact(const_cache));
@ -1492,7 +1503,7 @@ fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const
}
nop_out(const_instrs, seq_size);
return instr_make_load_const(instr, const_tuple, consts, const_cache);
return instr_make_load_const(instr, const_tuple, consts, const_cache, consts_index);
}
/* Replace:
@ -1510,7 +1521,8 @@ fold_tuple_of_constants(basicblock *bb, int i, PyObject *consts, PyObject *const
*/
static int
fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
PyObject *consts, PyObject *const_cache)
PyObject *consts, PyObject *const_cache,
_Py_hashtable_t *consts_index)
{
assert(PyDict_CheckExact(const_cache));
assert(PyList_CheckExact(consts));
@ -1562,7 +1574,7 @@ fold_constant_intrinsic_list_to_tuple(basicblock *bb, int i,
nop_out(&instr, 1);
}
assert(consts_found == 0);
return instr_make_load_const(intrinsic, newconst, consts, const_cache);
return instr_make_load_const(intrinsic, newconst, consts, const_cache, consts_index);
}
if (expect_append) {
@ -1598,7 +1610,8 @@ Optimize lists and sets for:
*/
static int
optimize_lists_and_sets(basicblock *bb, int i, int nextop,
PyObject *consts, PyObject *const_cache)
PyObject *consts, PyObject *const_cache,
_Py_hashtable_t *consts_index)
{
assert(PyDict_CheckExact(const_cache));
assert(PyList_CheckExact(consts));
@ -1648,7 +1661,7 @@ optimize_lists_and_sets(basicblock *bb, int i, int nextop,
Py_SETREF(const_result, frozenset);
}
int index = add_const(const_result, consts, const_cache);
int index = add_const(const_result, consts, const_cache, consts_index);
RETURN_IF_ERROR(index);
nop_out(const_instrs, seq_size);
@ -1845,7 +1858,8 @@ eval_const_binop(PyObject *left, int op, PyObject *right)
}
static int
fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
fold_const_binop(basicblock *bb, int i, PyObject *consts,
PyObject *const_cache, _Py_hashtable_t *consts_index)
{
#define BINOP_OPERAND_COUNT 2
assert(PyDict_CheckExact(const_cache));
@ -1887,7 +1901,7 @@ fold_const_binop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
}
nop_out(operands_instrs, BINOP_OPERAND_COUNT);
return instr_make_load_const(binop, newconst, consts, const_cache);
return instr_make_load_const(binop, newconst, consts, const_cache, consts_index);
}
static PyObject *
@ -1933,7 +1947,8 @@ eval_const_unaryop(PyObject *operand, int opcode, int oparg)
}
static int
fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cache)
fold_const_unaryop(basicblock *bb, int i, PyObject *consts,
PyObject *const_cache, _Py_hashtable_t *consts_index)
{
#define UNARYOP_OPERAND_COUNT 1
assert(PyDict_CheckExact(const_cache));
@ -1970,7 +1985,7 @@ fold_const_unaryop(basicblock *bb, int i, PyObject *consts, PyObject *const_cach
assert(PyBool_Check(newconst));
}
nop_out(&operand_instr, UNARYOP_OPERAND_COUNT);
return instr_make_load_const(unaryop, newconst, consts, const_cache);
return instr_make_load_const(unaryop, newconst, consts, const_cache, consts_index);
}
#define VISITED (-1)
@ -2165,7 +2180,8 @@ apply_static_swaps(basicblock *block, int i)
}
static int
basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *consts)
basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb,
PyObject *consts, _Py_hashtable_t *consts_index)
{
assert(PyDict_CheckExact(const_cache));
assert(PyList_CheckExact(consts));
@ -2283,7 +2299,7 @@ basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *
return ERROR;
}
cnt = PyBool_FromLong(is_true);
int index = add_const(cnt, consts, const_cache);
int index = add_const(cnt, consts, const_cache, consts_index);
if (index < 0) {
return ERROR;
}
@ -2297,15 +2313,17 @@ basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *
}
static int
optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts) {
optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts,
_Py_hashtable_t *consts_index) {
for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts));
RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts, consts_index));
}
return SUCCESS;
}
static int
optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts,
_Py_hashtable_t *consts_index)
{
assert(PyDict_CheckExact(const_cache));
assert(PyList_CheckExact(consts));
@ -2345,11 +2363,11 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
continue;
}
}
RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache));
RETURN_IF_ERROR(fold_tuple_of_constants(bb, i, consts, const_cache, consts_index));
break;
case BUILD_LIST:
case BUILD_SET:
RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache));
RETURN_IF_ERROR(optimize_lists_and_sets(bb, i, nextop, consts, const_cache, consts_index));
break;
case POP_JUMP_IF_NOT_NONE:
case POP_JUMP_IF_NONE:
@ -2484,7 +2502,7 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
_Py_FALLTHROUGH;
case UNARY_INVERT:
case UNARY_NEGATIVE:
RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache, consts_index));
break;
case CALL_INTRINSIC_1:
if (oparg == INTRINSIC_LIST_TO_TUPLE) {
@ -2492,15 +2510,15 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts)
INSTR_SET_OP0(inst, NOP);
}
else {
RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache));
RETURN_IF_ERROR(fold_constant_intrinsic_list_to_tuple(bb, i, consts, const_cache, consts_index));
}
}
else if (oparg == INTRINSIC_UNARY_POSITIVE) {
RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache));
RETURN_IF_ERROR(fold_const_unaryop(bb, i, consts, const_cache, consts_index));
}
break;
case BINARY_OP:
RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache));
RETURN_IF_ERROR(fold_const_binop(bb, i, consts, const_cache, consts_index));
break;
}
}
@ -2545,16 +2563,17 @@ remove_redundant_nops_and_jumps(cfg_builder *g)
NOPs. Later those NOPs are removed.
*/
static int
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache, int firstlineno)
optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache,
_Py_hashtable_t *consts_index, int firstlineno)
{
assert(PyDict_CheckExact(const_cache));
RETURN_IF_ERROR(check_cfg(g));
RETURN_IF_ERROR(inline_small_or_no_lineno_blocks(g->g_entryblock));
RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno));
RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts));
RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts, consts_index));
for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) {
RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts));
RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts, consts_index));
}
RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock));
RETURN_IF_ERROR(remove_unreachable(g->g_entryblock));
@ -3674,7 +3693,33 @@ _PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache,
RETURN_IF_ERROR(label_exception_targets(g->g_entryblock));
/** Optimization **/
RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache, firstlineno));
_Py_hashtable_t *consts_index = _Py_hashtable_new(
_Py_hashtable_hash_ptr, _Py_hashtable_compare_direct);
if (consts_index == NULL) {
PyErr_NoMemory();
return ERROR;
}
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(consts); i++) {
PyObject *item = PyList_GET_ITEM(consts, i);
if (_Py_hashtable_get_entry(consts_index, (void *)item) != NULL) {
continue;
}
if (_Py_hashtable_set(consts_index, (void *)item,
(void *)(uintptr_t)i) < 0) {
_Py_hashtable_destroy(consts_index);
PyErr_NoMemory();
return ERROR;
}
}
int ret = optimize_cfg(g, consts, const_cache, consts_index, firstlineno);
_Py_hashtable_destroy(consts_index);
RETURN_IF_ERROR(ret);
RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts));
RETURN_IF_ERROR(
add_checks_for_loads_of_uninitialized_variables(