mirror of
https://github.com/python/cpython.git
synced 2025-11-01 22:21:35 +00:00
gh-132042: Remove resolve_slotdups() to speedup class creation (#132156)
Co-authored-by: Victor Stinner <vstinner@python.org> Co-authored-by: sobolevn <mail@sobolevn.me> Co-authored-by: Kumar Aditya <kumaraditya@python.org>
This commit is contained in:
parent
aa99a7c70c
commit
e6e376a760
6 changed files with 95 additions and 61 deletions
|
|
@ -672,11 +672,6 @@ struct _Py_interp_cached_objects {
|
|||
|
||||
/* object.__reduce__ */
|
||||
PyObject *objreduce;
|
||||
#ifndef Py_GIL_DISABLED
|
||||
/* resolve_slotdups() */
|
||||
PyObject *type_slots_pname;
|
||||
pytype_slotdef *type_slots_ptrs[MAX_EQUIV];
|
||||
#endif
|
||||
|
||||
/* TypeVar and related types */
|
||||
PyTypeObject *generic_type;
|
||||
|
|
|
|||
|
|
@ -152,6 +152,9 @@ typedef int (*_py_validate_type)(PyTypeObject *);
|
|||
extern int _PyType_Validate(PyTypeObject *ty, _py_validate_type validate, unsigned int *tp_version);
|
||||
extern int _PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor, uint32_t tp_version);
|
||||
|
||||
// Precalculates count of non-unique slots and fills wrapperbase.name_count.
|
||||
extern int _PyType_InitSlotDefs(PyInterpreterState *interp);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -0,0 +1,2 @@
|
|||
Improve class creation times by up to 12% by pre-computing type slots
|
||||
just once. Patch by Sergey Miryanov.
|
||||
|
|
@ -11422,6 +11422,11 @@ static pytype_slotdef slotdefs[] = {
|
|||
{NULL}
|
||||
};
|
||||
|
||||
/* Stores the number of times where slotdefs has elements with same name.
|
||||
This counter precalculated by _PyType_InitSlotDefs() when the main
|
||||
interpreter starts. */
|
||||
static uint8_t slotdefs_name_counts[Py_ARRAY_LENGTH(slotdefs)];
|
||||
|
||||
/* Given a type pointer and an offset gotten from a slotdef entry, return a
|
||||
pointer to the actual slot. This is not quite the same as simply adding
|
||||
the offset to the type pointer, since it takes care to indirect through the
|
||||
|
|
@ -11464,61 +11469,6 @@ slotptr(PyTypeObject *type, int ioffset)
|
|||
return (void **)ptr;
|
||||
}
|
||||
|
||||
/* Return a slot pointer for a given name, but ONLY if the attribute has
|
||||
exactly one slot function. The name must be an interned string. */
|
||||
static void **
|
||||
resolve_slotdups(PyTypeObject *type, PyObject *name)
|
||||
{
|
||||
/* XXX Maybe this could be optimized more -- but is it worth it? */
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
pytype_slotdef *ptrs[MAX_EQUIV];
|
||||
pytype_slotdef **pp = ptrs;
|
||||
/* Collect all slotdefs that match name into ptrs. */
|
||||
for (pytype_slotdef *p = slotdefs; p->name_strobj; p++) {
|
||||
if (p->name_strobj == name)
|
||||
*pp++ = p;
|
||||
}
|
||||
*pp = NULL;
|
||||
#else
|
||||
/* pname and ptrs act as a little cache */
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
#define pname _Py_INTERP_CACHED_OBJECT(interp, type_slots_pname)
|
||||
#define ptrs _Py_INTERP_CACHED_OBJECT(interp, type_slots_ptrs)
|
||||
pytype_slotdef *p, **pp;
|
||||
|
||||
if (pname != name) {
|
||||
/* Collect all slotdefs that match name into ptrs. */
|
||||
pname = name;
|
||||
pp = ptrs;
|
||||
for (p = slotdefs; p->name_strobj; p++) {
|
||||
if (p->name_strobj == name)
|
||||
*pp++ = p;
|
||||
}
|
||||
*pp = NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Look in all slots of the type matching the name. If exactly one of these
|
||||
has a filled-in slot, return a pointer to that slot.
|
||||
Otherwise, return NULL. */
|
||||
void **res, **ptr;
|
||||
res = NULL;
|
||||
for (pp = ptrs; *pp; pp++) {
|
||||
ptr = slotptr(type, (*pp)->offset);
|
||||
if (ptr == NULL || *ptr == NULL)
|
||||
continue;
|
||||
if (res != NULL)
|
||||
return NULL;
|
||||
res = ptr;
|
||||
}
|
||||
#ifndef Py_GIL_DISABLED
|
||||
#undef pname
|
||||
#undef ptrs
|
||||
#endif
|
||||
return res;
|
||||
}
|
||||
|
||||
// Return true if "name" corresponds to at least one slot definition. This is
|
||||
// a more accurate but more expensive test compared to is_dunder_name().
|
||||
static bool
|
||||
|
|
@ -11645,7 +11595,15 @@ update_one_slot(PyTypeObject *type, pytype_slotdef *p, pytype_slotdef **next_p,
|
|||
}
|
||||
if (Py_IS_TYPE(descr, &PyWrapperDescr_Type) &&
|
||||
((PyWrapperDescrObject *)descr)->d_base->name_strobj == p->name_strobj) {
|
||||
void **tptr = resolve_slotdups(type, p->name_strobj);
|
||||
void **tptr;
|
||||
size_t index = (p - slotdefs) / sizeof(slotdefs[0]);
|
||||
if (slotdefs_name_counts[index] == 1) {
|
||||
tptr = slotptr(type, p->offset);
|
||||
}
|
||||
else {
|
||||
tptr = NULL;
|
||||
}
|
||||
|
||||
if (tptr == NULL || tptr == ptr)
|
||||
generic = p->function;
|
||||
d = (PyWrapperDescrObject *)descr;
|
||||
|
|
@ -11858,6 +11816,76 @@ update_all_slots(PyTypeObject* type)
|
|||
|
||||
#endif
|
||||
|
||||
int
|
||||
_PyType_InitSlotDefs(PyInterpreterState *interp)
|
||||
{
|
||||
if (!_Py_IsMainInterpreter(interp)) {
|
||||
return 0;
|
||||
}
|
||||
PyObject *bytearray = NULL;
|
||||
PyObject *cache = PyDict_New();
|
||||
if (!cache) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
pytype_slotdef *p;
|
||||
Py_ssize_t idx = 0;
|
||||
for (p = slotdefs; p->name_strobj; p++, idx++) {
|
||||
assert(idx < 255);
|
||||
|
||||
if (PyDict_GetItemRef(cache, p->name_strobj, &bytearray) < 0) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (!bytearray) {
|
||||
Py_ssize_t size = sizeof(uint8_t) * (1 + MAX_EQUIV);
|
||||
bytearray = PyByteArray_FromStringAndSize(NULL, size);
|
||||
if (!bytearray) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
uint8_t *data = (uint8_t *)PyByteArray_AS_STRING(bytearray);
|
||||
data[0] = 0;
|
||||
|
||||
if (PyDict_SetItem(cache, p->name_strobj, bytearray) < 0) {
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
assert(PyByteArray_CheckExact(bytearray));
|
||||
uint8_t *data = (uint8_t *)PyByteArray_AS_STRING(bytearray);
|
||||
|
||||
data[0] += 1;
|
||||
assert(data[0] < MAX_EQUIV);
|
||||
|
||||
data[data[0]] = (uint8_t)idx;
|
||||
|
||||
Py_CLEAR(bytearray);
|
||||
}
|
||||
|
||||
memset(slotdefs_name_counts, 0, sizeof(slotdefs_name_counts));
|
||||
|
||||
Py_ssize_t pos = 0;
|
||||
PyObject *key = NULL;
|
||||
PyObject *value = NULL;
|
||||
while (PyDict_Next(cache, &pos, &key, &value)) {
|
||||
uint8_t *data = (uint8_t *)PyByteArray_AS_STRING(value);
|
||||
uint8_t n = data[0];
|
||||
for (uint8_t i = 0; i < n; i++) {
|
||||
uint8_t idx = data[i + 1];
|
||||
slotdefs_name_counts[idx] = n;
|
||||
}
|
||||
}
|
||||
|
||||
Py_DECREF(cache);
|
||||
return 0;
|
||||
|
||||
error:
|
||||
Py_XDECREF(bytearray);
|
||||
Py_DECREF(cache);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
PyObject *
|
||||
_PyType_GetSlotWrapperNames(void)
|
||||
|
|
|
|||
|
|
@ -836,6 +836,10 @@ pycore_init_builtins(PyThreadState *tstate)
|
|||
}
|
||||
interp->callable_cache.object__getattribute__ = object__getattribute__;
|
||||
|
||||
if (_PyType_InitSlotDefs(interp) < 0) {
|
||||
return _PyStatus_ERR("failed to init slotdefs");
|
||||
}
|
||||
|
||||
if (_PyBuiltins_AddExceptions(bimod) < 0) {
|
||||
return _PyStatus_ERR("failed to add exceptions to builtins");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -344,6 +344,8 @@ Objects/obmalloc.c - obmalloc_state_main -
|
|||
Objects/obmalloc.c - obmalloc_state_initialized -
|
||||
Objects/typeobject.c - name_op -
|
||||
Objects/typeobject.c - slotdefs -
|
||||
# It initialized only once when main interpeter starts
|
||||
Objects/typeobject.c - slotdefs_name_counts -
|
||||
Objects/unicodeobject.c - stripfuncnames -
|
||||
Objects/unicodeobject.c - utf7_category -
|
||||
Objects/unicodeobject.c unicode_decode_call_errorhandler_wchar argparse -
|
||||
|
|
|
|||
|
Can't render this file because it has a wrong number of fields in line 4.
|
Loading…
Add table
Add a link
Reference in a new issue