gh-132042: Remove resolve_slotdups() to speedup class creation (#132156)

Co-authored-by: Victor Stinner <vstinner@python.org>
Co-authored-by: sobolevn <mail@sobolevn.me>
Co-authored-by: Kumar Aditya <kumaraditya@python.org>
This commit is contained in:
Sergey Miryanov 2025-10-03 14:58:00 +05:00 committed by GitHub
parent aa99a7c70c
commit e6e376a760
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 95 additions and 61 deletions

View file

@ -672,11 +672,6 @@ struct _Py_interp_cached_objects {
/* object.__reduce__ */ /* object.__reduce__ */
PyObject *objreduce; PyObject *objreduce;
#ifndef Py_GIL_DISABLED
/* resolve_slotdups() */
PyObject *type_slots_pname;
pytype_slotdef *type_slots_ptrs[MAX_EQUIV];
#endif
/* TypeVar and related types */ /* TypeVar and related types */
PyTypeObject *generic_type; PyTypeObject *generic_type;

View file

@ -152,6 +152,9 @@ typedef int (*_py_validate_type)(PyTypeObject *);
extern int _PyType_Validate(PyTypeObject *ty, _py_validate_type validate, unsigned int *tp_version); extern int _PyType_Validate(PyTypeObject *ty, _py_validate_type validate, unsigned int *tp_version);
extern int _PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor, uint32_t tp_version); extern int _PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor, uint32_t tp_version);
// Precalculates count of non-unique slots and fills wrapperbase.name_count.
extern int _PyType_InitSlotDefs(PyInterpreterState *interp);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View file

@ -0,0 +1,2 @@
Improve class creation times by up to 12% by pre-computing type slots
just once. Patch by Sergey Miryanov.

View file

@ -11422,6 +11422,11 @@ static pytype_slotdef slotdefs[] = {
{NULL} {NULL}
}; };
/* Stores the number of times where slotdefs has elements with same name.
This counter precalculated by _PyType_InitSlotDefs() when the main
interpreter starts. */
static uint8_t slotdefs_name_counts[Py_ARRAY_LENGTH(slotdefs)];
/* Given a type pointer and an offset gotten from a slotdef entry, return a /* Given a type pointer and an offset gotten from a slotdef entry, return a
pointer to the actual slot. This is not quite the same as simply adding pointer to the actual slot. This is not quite the same as simply adding
the offset to the type pointer, since it takes care to indirect through the the offset to the type pointer, since it takes care to indirect through the
@ -11464,61 +11469,6 @@ slotptr(PyTypeObject *type, int ioffset)
return (void **)ptr; return (void **)ptr;
} }
/* Return a slot pointer for a given name, but ONLY if the attribute has
exactly one slot function. The name must be an interned string. */
static void **
resolve_slotdups(PyTypeObject *type, PyObject *name)
{
/* XXX Maybe this could be optimized more -- but is it worth it? */
#ifdef Py_GIL_DISABLED
pytype_slotdef *ptrs[MAX_EQUIV];
pytype_slotdef **pp = ptrs;
/* Collect all slotdefs that match name into ptrs. */
for (pytype_slotdef *p = slotdefs; p->name_strobj; p++) {
if (p->name_strobj == name)
*pp++ = p;
}
*pp = NULL;
#else
/* pname and ptrs act as a little cache */
PyInterpreterState *interp = _PyInterpreterState_GET();
#define pname _Py_INTERP_CACHED_OBJECT(interp, type_slots_pname)
#define ptrs _Py_INTERP_CACHED_OBJECT(interp, type_slots_ptrs)
pytype_slotdef *p, **pp;
if (pname != name) {
/* Collect all slotdefs that match name into ptrs. */
pname = name;
pp = ptrs;
for (p = slotdefs; p->name_strobj; p++) {
if (p->name_strobj == name)
*pp++ = p;
}
*pp = NULL;
}
#endif
/* Look in all slots of the type matching the name. If exactly one of these
has a filled-in slot, return a pointer to that slot.
Otherwise, return NULL. */
void **res, **ptr;
res = NULL;
for (pp = ptrs; *pp; pp++) {
ptr = slotptr(type, (*pp)->offset);
if (ptr == NULL || *ptr == NULL)
continue;
if (res != NULL)
return NULL;
res = ptr;
}
#ifndef Py_GIL_DISABLED
#undef pname
#undef ptrs
#endif
return res;
}
// Return true if "name" corresponds to at least one slot definition. This is // Return true if "name" corresponds to at least one slot definition. This is
// a more accurate but more expensive test compared to is_dunder_name(). // a more accurate but more expensive test compared to is_dunder_name().
static bool static bool
@ -11645,7 +11595,15 @@ update_one_slot(PyTypeObject *type, pytype_slotdef *p, pytype_slotdef **next_p,
} }
if (Py_IS_TYPE(descr, &PyWrapperDescr_Type) && if (Py_IS_TYPE(descr, &PyWrapperDescr_Type) &&
((PyWrapperDescrObject *)descr)->d_base->name_strobj == p->name_strobj) { ((PyWrapperDescrObject *)descr)->d_base->name_strobj == p->name_strobj) {
void **tptr = resolve_slotdups(type, p->name_strobj); void **tptr;
size_t index = (p - slotdefs) / sizeof(slotdefs[0]);
if (slotdefs_name_counts[index] == 1) {
tptr = slotptr(type, p->offset);
}
else {
tptr = NULL;
}
if (tptr == NULL || tptr == ptr) if (tptr == NULL || tptr == ptr)
generic = p->function; generic = p->function;
d = (PyWrapperDescrObject *)descr; d = (PyWrapperDescrObject *)descr;
@ -11858,6 +11816,76 @@ update_all_slots(PyTypeObject* type)
#endif #endif
int
_PyType_InitSlotDefs(PyInterpreterState *interp)
{
if (!_Py_IsMainInterpreter(interp)) {
return 0;
}
PyObject *bytearray = NULL;
PyObject *cache = PyDict_New();
if (!cache) {
return -1;
}
pytype_slotdef *p;
Py_ssize_t idx = 0;
for (p = slotdefs; p->name_strobj; p++, idx++) {
assert(idx < 255);
if (PyDict_GetItemRef(cache, p->name_strobj, &bytearray) < 0) {
goto error;
}
if (!bytearray) {
Py_ssize_t size = sizeof(uint8_t) * (1 + MAX_EQUIV);
bytearray = PyByteArray_FromStringAndSize(NULL, size);
if (!bytearray) {
goto error;
}
uint8_t *data = (uint8_t *)PyByteArray_AS_STRING(bytearray);
data[0] = 0;
if (PyDict_SetItem(cache, p->name_strobj, bytearray) < 0) {
goto error;
}
}
assert(PyByteArray_CheckExact(bytearray));
uint8_t *data = (uint8_t *)PyByteArray_AS_STRING(bytearray);
data[0] += 1;
assert(data[0] < MAX_EQUIV);
data[data[0]] = (uint8_t)idx;
Py_CLEAR(bytearray);
}
memset(slotdefs_name_counts, 0, sizeof(slotdefs_name_counts));
Py_ssize_t pos = 0;
PyObject *key = NULL;
PyObject *value = NULL;
while (PyDict_Next(cache, &pos, &key, &value)) {
uint8_t *data = (uint8_t *)PyByteArray_AS_STRING(value);
uint8_t n = data[0];
for (uint8_t i = 0; i < n; i++) {
uint8_t idx = data[i + 1];
slotdefs_name_counts[idx] = n;
}
}
Py_DECREF(cache);
return 0;
error:
Py_XDECREF(bytearray);
Py_DECREF(cache);
return -1;
}
PyObject * PyObject *
_PyType_GetSlotWrapperNames(void) _PyType_GetSlotWrapperNames(void)

View file

@ -836,6 +836,10 @@ pycore_init_builtins(PyThreadState *tstate)
} }
interp->callable_cache.object__getattribute__ = object__getattribute__; interp->callable_cache.object__getattribute__ = object__getattribute__;
if (_PyType_InitSlotDefs(interp) < 0) {
return _PyStatus_ERR("failed to init slotdefs");
}
if (_PyBuiltins_AddExceptions(bimod) < 0) { if (_PyBuiltins_AddExceptions(bimod) < 0) {
return _PyStatus_ERR("failed to add exceptions to builtins"); return _PyStatus_ERR("failed to add exceptions to builtins");
} }

View file

@ -344,6 +344,8 @@ Objects/obmalloc.c - obmalloc_state_main -
Objects/obmalloc.c - obmalloc_state_initialized - Objects/obmalloc.c - obmalloc_state_initialized -
Objects/typeobject.c - name_op - Objects/typeobject.c - name_op -
Objects/typeobject.c - slotdefs - Objects/typeobject.c - slotdefs -
# It initialized only once when main interpeter starts
Objects/typeobject.c - slotdefs_name_counts -
Objects/unicodeobject.c - stripfuncnames - Objects/unicodeobject.c - stripfuncnames -
Objects/unicodeobject.c - utf7_category - Objects/unicodeobject.c - utf7_category -
Objects/unicodeobject.c unicode_decode_call_errorhandler_wchar argparse - Objects/unicodeobject.c unicode_decode_call_errorhandler_wchar argparse -

Can't render this file because it has a wrong number of fields in line 4.