gh-150027: Avoid copying during construction of frozenset objects (GH-150028)

This commit is contained in:
Peter Bierma 2026-05-19 13:57:37 -04:00 committed by GitHub
parent 29415c071f
commit 409fa8e1f3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 116 additions and 22 deletions

View file

@ -18,8 +18,9 @@
#define INTRINSIC_TYPEVARTUPLE 9
#define INTRINSIC_SUBSCRIPT_GENERIC 10
#define INTRINSIC_TYPEALIAS 11
#define INTRINSIC_BUILD_FROZENSET 12
#define MAX_INTRINSIC_1 11
#define MAX_INTRINSIC_1 12
/* Binary Functions: */

View file

@ -80,7 +80,8 @@ extern "C" {
#define CONSTANT_TRUE 9
#define CONSTANT_FALSE 10
#define CONSTANT_MINUS_ONE 11
#define NUM_COMMON_CONSTANTS 12
#define CONSTANT_BUILTIN_FROZENSET 12
#define NUM_COMMON_CONSTANTS 13
/* Values used in the oparg for RESUME */
#define RESUME_AT_FUNC_START 0

View file

@ -35,6 +35,9 @@ extern void _PySet_ClearInternal(PySetObject *so);
PyAPI_FUNC(int) _PySet_AddTakeRef(PySetObject *so, PyObject *key);
PyObject *
_PySet_Freeze(PyObject *set);
#ifdef __cplusplus
}
#endif

View file

@ -44,7 +44,7 @@
builtins.set,
# Append-only — must match CONSTANT_* in
# Include/internal/pycore_opcode_utils.h.
None, "", True, False, -1]
None, "", True, False, -1, builtins.frozenset]
_nb_ops = _opcode.get_nb_ops()
hascompare = [opmap["COMPARE_OP"]]

View file

@ -268,7 +268,10 @@ def f_list():
def f_set():
return set(2*x for x in [1,2,3])
funcs = [f_all, f_any, f_tuple, f_list, f_set]
def f_frozenset():
return frozenset(2*x for x in [1,2,3])
funcs = [f_all, f_any, f_tuple, f_list, f_set, f_frozenset]
for f in funcs:
# check that generator code object is not duplicated
@ -278,35 +281,37 @@ def f_set():
# check the overriding the builtins works
global all, any, tuple, list, set
saved = all, any, tuple, list, set
global all, any, tuple, list, set, frozenset
saved = all, any, tuple, list, set, frozenset
try:
all = lambda x : "all"
any = lambda x : "any"
tuple = lambda x : "tuple"
list = lambda x : "list"
set = lambda x : "set"
frozenset = lambda x : "frozenset"
overridden_outputs = [f() for f in funcs]
finally:
all, any, tuple, list, set = saved
all, any, tuple, list, set, frozenset = saved
self.assertEqual(overridden_outputs, ['all', 'any', 'tuple', 'list', 'set'])
self.assertEqual(overridden_outputs, ['all', 'any', 'tuple', 'list', 'set', 'frozenset'])
# Now repeat, overriding the builtins module as well
saved = all, any, tuple, list, set
saved = all, any, tuple, list, set, frozenset
try:
builtins.all = all = lambda x : "all"
builtins.any = any = lambda x : "any"
builtins.tuple = tuple = lambda x : "tuple"
builtins.list = list = lambda x : "list"
builtins.set = set = lambda x : "set"
builtins.frozenset = frozenset = lambda x : "frozenset"
overridden_outputs = [f() for f in funcs]
finally:
all, any, tuple, list, set = saved
builtins.all, builtins.any, builtins.tuple, builtins.list, builtins.set = saved
all, any, tuple, list, set, frozenset = saved
builtins.all, builtins.any, builtins.tuple, builtins.list, builtins.set, builtins.frozenset = saved
self.assertEqual(overridden_outputs, ['all', 'any', 'tuple', 'list', 'set'])
self.assertEqual(overridden_outputs, ['all', 'any', 'tuple', 'list', 'set', 'frozenset'])
def test_builtin_call_async_genexpr_no_crash(self):
async def f_all():

View file

@ -161,3 +161,34 @@ def test_syntax_error__return_not_in_function(self):
self.assertIsNone(cm.exception.text)
self.assertEqual(cm.exception.offset, 1)
self.assertEqual(cm.exception.end_offset, 10)
def test_frozenset_optimization(self):
l1 = self.Label()
snippet = "frozenset({1, 2, 3})"
expected = [
('RESUME', 0),
('ANNOTATIONS_PLACEHOLDER', None),
('LOAD_NAME', 0),
('COPY', 1),
('LOAD_COMMON_CONSTANT', 12),
('IS_OP', 0),
('POP_JUMP_IF_FALSE', l1),
('POP_TOP', None),
('LOAD_CONST', 1),
('LOAD_CONST', 2),
('LOAD_CONST', 3),
('BUILD_SET', 3),
('CALL_INTRINSIC_1', 12),
('JUMP', 0),
l1,
('PUSH_NULL', None),
('LOAD_CONST', 1),
('LOAD_CONST', 2),
('LOAD_CONST', 3),
('BUILD_SET', 3),
('CALL', 1),
('POP_TOP', None),
('LOAD_CONST', 0),
('RETURN_VALUE', None)
]
self.codegen_test(snippet, expected)

View file

@ -0,0 +1,2 @@
Improve performance of :class:`frozenset` objects by avoiding copies during
construction.

View file

@ -1545,6 +1545,16 @@ set_swap_bodies(PySetObject *a, PySetObject *b)
FT_ATOMIC_STORE_PTR_RELEASE(b->table, b_table);
}
PyObject *
_PySet_Freeze(PyObject *set)
{
assert(set != NULL);
assert(PySet_CheckExact(set));
assert(_PyObject_IsUniquelyReferenced(set));
set->ob_type = &PyFrozenSet_Type;
return Py_NewRef(set);
}
/*[clinic input]
@critical_section
set.copy

View file

@ -3953,22 +3953,45 @@ maybe_optimize_function_call(compiler *c, expr_ty e, jump_target_label end)
if (! (func->kind == Name_kind &&
asdl_seq_LEN(args) == 1 &&
asdl_seq_LEN(kwds) == 0 &&
asdl_seq_GET(args, 0)->kind == GeneratorExp_kind))
asdl_seq_LEN(kwds) == 0))
{
return 0;
}
expr_ty generator_exp = asdl_seq_GET(args, 0);
PySTEntryObject *generator_entry = _PySymtable_Lookup(SYMTABLE(c), (void *)generator_exp);
location loc = LOC(func);
expr_ty arg_expr = asdl_seq_GET(args, 0);
if (_PyUnicode_EqualToASCIIString(func->v.Name.id, "frozenset")
&& (arg_expr->kind == Set_kind || arg_expr->kind == SetComp_kind)) {
NEW_JUMP_TARGET_LABEL(c, skip_optimization);
ADDOP_I(c, loc, COPY, 1);
ADDOP_I(c, loc, LOAD_COMMON_CONSTANT, CONSTANT_BUILTIN_FROZENSET);
ADDOP_COMPARE(c, loc, Is);
ADDOP_JUMP(c, loc, POP_JUMP_IF_FALSE, skip_optimization);
ADDOP(c, loc, POP_TOP);
VISIT(c, expr, arg_expr);
ADDOP_I(c, loc, CALL_INTRINSIC_1, INTRINSIC_BUILD_FROZENSET);
ADDOP_JUMP(c, loc, JUMP, end);
USE_LABEL(c, skip_optimization);
return 1;
}
if (arg_expr->kind != GeneratorExp_kind) {
return 0;
}
PySTEntryObject *generator_entry = _PySymtable_Lookup(SYMTABLE(c), (void *)arg_expr);
if (generator_entry->ste_coroutine) {
Py_DECREF(generator_entry);
return 0;
}
Py_DECREF(generator_entry);
location loc = LOC(func);
int optimized = 0;
NEW_JUMP_TARGET_LABEL(c, skip_optimization);
@ -3994,6 +4017,9 @@ maybe_optimize_function_call(compiler *c, expr_ty e, jump_target_label end)
else if (_PyUnicode_EqualToASCIIString(func->v.Name.id, "set")) {
const_oparg = CONSTANT_BUILTIN_SET;
}
else if (_PyUnicode_EqualToASCIIString(func->v.Name.id, "frozenset")) {
const_oparg = CONSTANT_BUILTIN_FROZENSET;
}
if (const_oparg != -1) {
ADDOP_I(c, loc, COPY, 1); // the function
ADDOP_I(c, loc, LOAD_COMMON_CONSTANT, const_oparg);
@ -4003,10 +4029,10 @@ maybe_optimize_function_call(compiler *c, expr_ty e, jump_target_label end)
if (const_oparg == CONSTANT_BUILTIN_TUPLE || const_oparg == CONSTANT_BUILTIN_LIST) {
ADDOP_I(c, loc, BUILD_LIST, 0);
} else if (const_oparg == CONSTANT_BUILTIN_SET) {
} else if (const_oparg == CONSTANT_BUILTIN_SET || const_oparg == CONSTANT_BUILTIN_FROZENSET) {
ADDOP_I(c, loc, BUILD_SET, 0);
}
VISIT(c, expr, generator_exp);
VISIT(c, expr, arg_expr);
NEW_JUMP_TARGET_LABEL(c, loop);
NEW_JUMP_TARGET_LABEL(c, cleanup);
@ -4017,7 +4043,7 @@ maybe_optimize_function_call(compiler *c, expr_ty e, jump_target_label end)
if (const_oparg == CONSTANT_BUILTIN_TUPLE || const_oparg == CONSTANT_BUILTIN_LIST) {
ADDOP_I(c, loc, LIST_APPEND, 3);
ADDOP_JUMP(c, loc, JUMP, loop);
} else if (const_oparg == CONSTANT_BUILTIN_SET) {
} else if (const_oparg == CONSTANT_BUILTIN_SET || const_oparg == CONSTANT_BUILTIN_FROZENSET) {
ADDOP_I(c, loc, SET_ADD, 3);
ADDOP_JUMP(c, loc, JUMP, loop);
}
@ -4029,7 +4055,8 @@ maybe_optimize_function_call(compiler *c, expr_ty e, jump_target_label end)
ADDOP(c, NO_LOCATION, POP_ITER);
if (const_oparg != CONSTANT_BUILTIN_TUPLE &&
const_oparg != CONSTANT_BUILTIN_LIST &&
const_oparg != CONSTANT_BUILTIN_SET) {
const_oparg != CONSTANT_BUILTIN_SET &&
const_oparg != CONSTANT_BUILTIN_FROZENSET) {
ADDOP_LOAD_CONST(c, loc, initial_res == Py_True ? Py_False : Py_True);
}
ADDOP_JUMP(c, loc, JUMP, end);
@ -4044,6 +4071,9 @@ maybe_optimize_function_call(compiler *c, expr_ty e, jump_target_label end)
} else if (const_oparg == CONSTANT_BUILTIN_SET) {
// result is already a set
}
else if (const_oparg == CONSTANT_BUILTIN_FROZENSET) {
ADDOP_I(c, loc, CALL_INTRINSIC_1, INTRINSIC_BUILD_FROZENSET);
}
else {
ADDOP_LOAD_CONST(c, loc, initial_res);
}

View file

@ -9,6 +9,7 @@
#include "pycore_intrinsics.h" // INTRINSIC_PRINT
#include "pycore_list.h" // _PyList_AsTupleAndClear()
#include "pycore_object.h" // _PyObject_IsUniquelyReferenced()
#include "pycore_setobject.h" // _PySet_Freeze()
#include "pycore_pyerrors.h" // _PyErr_SetString()
#include "pycore_runtime.h" // _Py_ID()
#include "pycore_typevarobject.h" // _Py_make_typevar()
@ -207,6 +208,14 @@ make_typevar(PyThreadState* Py_UNUSED(ignored), PyObject *v)
return _Py_make_typevar(v, NULL, NULL);
}
static PyObject *
make_frozenset(PyThreadState* Py_UNUSED(ignored), PyObject *set)
{
assert(PySet_CheckExact(set));
assert(_PyObject_IsUniquelyReferenced(set));
return _PySet_Freeze(set);
}
#define INTRINSIC_FUNC_ENTRY(N, F) \
[N] = {F, #N},
@ -225,6 +234,7 @@ _PyIntrinsics_UnaryFunctions[] = {
INTRINSIC_FUNC_ENTRY(INTRINSIC_TYPEVARTUPLE, _Py_make_typevartuple)
INTRINSIC_FUNC_ENTRY(INTRINSIC_SUBSCRIPT_GENERIC, _Py_subscript_generic)
INTRINSIC_FUNC_ENTRY(INTRINSIC_TYPEALIAS, _Py_make_typealias)
INTRINSIC_FUNC_ENTRY(INTRINSIC_BUILD_FROZENSET, make_frozenset)
};

View file

@ -892,6 +892,7 @@ pycore_init_builtins(PyThreadState *tstate)
interp->common_consts[CONSTANT_FALSE] = Py_False;
interp->common_consts[CONSTANT_MINUS_ONE] =
(PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS - 1];
interp->common_consts[CONSTANT_BUILTIN_FROZENSET] = (PyObject *)&PyFrozenSet_Type;
for (int i = 0; i < NUM_COMMON_CONSTANTS; i++) {
assert(interp->common_consts[i] != NULL);
}