gh-139103: Improve namedtuple scaling in free-threaded build (gh-144332)

Add `_Py_type_getattro_stackref`, a variant of type attribute lookup
that returns `_PyStackRef` instead of `PyObject*`. This allows returning
deferred references in the free-threaded build, reducing reference count
contention when accessing type attributes.

This significantly improves scaling of namedtuple instantiation across
multiple threads.

* Add blurb

* Rename PyObject_GetAttrStackRef to _PyObject_GetAttrStackRef

* Apply suggestion from @vstinner

Co-authored-by: Victor Stinner <vstinner@python.org>

* Apply suggestion from @vstinner

Co-authored-by: Victor Stinner <vstinner@python.org>

* format

* Update Include/internal/pycore_function.h

Co-authored-by: Victor Stinner <vstinner@python.org>

---------

Co-authored-by: Victor Stinner <vstinner@python.org>
This commit is contained in:
Sam Gross 2026-02-06 09:43:05 -05:00 committed by GitHub
parent 638d22c6e7
commit d891b2bbd1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 254 additions and 106 deletions

View file

@ -47,6 +47,12 @@ static inline PyObject* _PyFunction_GET_BUILTINS(PyObject *func) {
#define _PyFunction_GET_BUILTINS(func) _PyFunction_GET_BUILTINS(_PyObject_CAST(func))
/* Get the callable wrapped by a staticmethod.
Returns a borrowed reference.
The caller must ensure 'sm' is a staticmethod object. */
extern PyObject *_PyStaticMethod_GetFunc(PyObject *sm);
#ifdef __cplusplus
}
#endif

View file

@ -898,6 +898,10 @@ _PyType_LookupStackRefAndVersion(PyTypeObject *type, PyObject *name, _PyStackRef
PyAPI_FUNC(int) _PyObject_GetMethodStackRef(PyThreadState *ts, PyObject *obj,
PyObject *name, _PyStackRef *method);
// Like PyObject_GetAttr but returns a _PyStackRef. For types, this can
// return a deferred reference to reduce reference count contention.
PyAPI_FUNC(_PyStackRef) _PyObject_GetAttrStackRef(PyObject *obj, PyObject *name);
// Cache the provided init method in the specialization cache of type if the
// provided type version matches the current version of the type.
//

View file

@ -10,6 +10,7 @@ extern "C" {
#include "pycore_interp_structs.h" // managed_static_type_state
#include "pycore_moduleobject.h" // PyModuleObject
#include "pycore_structs.h" // _PyStackRef
/* state */
@ -112,6 +113,8 @@ _PyType_IsReady(PyTypeObject *type)
extern PyObject* _Py_type_getattro_impl(PyTypeObject *type, PyObject *name,
int *suppress_missing_attribute);
extern PyObject* _Py_type_getattro(PyObject *type, PyObject *name);
extern _PyStackRef _Py_type_getattro_stackref(PyTypeObject *type, PyObject *name,
int *suppress_missing_attribute);
extern PyObject* _Py_BaseObject_RichCompare(PyObject* self, PyObject* other, int op);

View file

@ -0,0 +1,2 @@
Improve scaling of :func:`~collections.namedtuple` instantiation in the
free-threaded build.

View file

@ -7925,18 +7925,18 @@
}
else {
_PyFrame_SetStackPointer(frame, stack_pointer);
PyObject *attr_o = PyObject_GetAttr(PyStackRef_AsPyObjectBorrow(owner), name);
attr = _PyObject_GetAttrStackRef(PyStackRef_AsPyObjectBorrow(owner), name);
stack_pointer = _PyFrame_GetStackPointer(frame);
stack_pointer += -1;
stack_pointer[-1] = attr;
stack_pointer += (oparg&1);
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
_PyFrame_SetStackPointer(frame, stack_pointer);
PyStackRef_CLOSE(owner);
stack_pointer = _PyFrame_GetStackPointer(frame);
if (attr_o == NULL) {
if (PyStackRef_IsNull(attr)) {
JUMP_TO_LABEL(error);
}
attr = PyStackRef_FromPyObjectSteal(attr_o);
stack_pointer += 1;
stack_pointer += -(oparg&1);
}
}
stack_pointer[-1] = attr;

View file

@ -7,6 +7,7 @@
#include "pycore_long.h" // _PyLong_GetOne()
#include "pycore_modsupport.h" // _PyArg_NoKeywords()
#include "pycore_object.h" // _PyObject_GC_UNTRACK()
#include "pycore_object_deferred.h" // _PyObject_SetDeferredRefcount()
#include "pycore_pyerrors.h" // _PyErr_Occurred()
#include "pycore_setobject.h" // _PySet_NextEntry()
#include "pycore_stats.h"
@ -1760,6 +1761,7 @@ sm_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
if (sm == NULL) {
return NULL;
}
_PyObject_SetDeferredRefcount((PyObject *)sm);
if (sm_set_callable(sm, callable) < 0) {
Py_DECREF(sm);
return NULL;
@ -1926,9 +1928,17 @@ PyStaticMethod_New(PyObject *callable)
if (sm == NULL) {
return NULL;
}
_PyObject_SetDeferredRefcount((PyObject *)sm);
if (sm_set_callable(sm, callable) < 0) {
Py_DECREF(sm);
return NULL;
}
return (PyObject *)sm;
}
PyObject *
_PyStaticMethod_GetFunc(PyObject *self)
{
staticmethod *sm = _PyStaticMethod_CAST(self);
return sm->sm_callable;
}

View file

@ -31,6 +31,7 @@
#include "pycore_tuple.h" // _PyTuple_DebugMallocStats()
#include "pycore_typeobject.h" // _PyBufferWrapper_Type
#include "pycore_typevarobject.h" // _PyTypeAlias_Type
#include "pycore_stackref.h" // PyStackRef_FromPyObjectSteal
#include "pycore_unionobject.h" // _PyUnion_Type
@ -1334,6 +1335,54 @@ PyObject_GetAttr(PyObject *v, PyObject *name)
return result;
}
/* Like PyObject_GetAttr but returns a _PyStackRef.
For types (tp_getattro == _Py_type_getattro), this can return
a deferred reference to reduce reference count contention. */
_PyStackRef
_PyObject_GetAttrStackRef(PyObject *v, PyObject *name)
{
PyTypeObject *tp = Py_TYPE(v);
if (!PyUnicode_Check(name)) {
PyErr_Format(PyExc_TypeError,
"attribute name must be string, not '%.200s'",
Py_TYPE(name)->tp_name);
return PyStackRef_NULL;
}
/* Fast path for types - can return deferred references */
if (tp->tp_getattro == _Py_type_getattro) {
_PyStackRef result = _Py_type_getattro_stackref((PyTypeObject *)v, name, NULL);
if (PyStackRef_IsNull(result)) {
_PyObject_SetAttributeErrorContext(v, name);
}
return result;
}
/* Fall back to regular PyObject_GetAttr and convert to stackref */
PyObject *result = NULL;
if (tp->tp_getattro != NULL) {
result = (*tp->tp_getattro)(v, name);
}
else if (tp->tp_getattr != NULL) {
const char *name_str = PyUnicode_AsUTF8(name);
if (name_str == NULL) {
return PyStackRef_NULL;
}
result = (*tp->tp_getattr)(v, (char *)name_str);
}
else {
PyErr_Format(PyExc_AttributeError,
"'%.100s' object has no attribute '%U'",
tp->tp_name, name);
}
if (result == NULL) {
_PyObject_SetAttributeErrorContext(v, name);
return PyStackRef_NULL;
}
return PyStackRef_FromPyObjectSteal(result);
}
int
PyObject_GetOptionalAttr(PyObject *v, PyObject *name, PyObject **result)
{

View file

@ -6375,93 +6375,13 @@ _PyType_SetFlagsRecursive(PyTypeObject *self, unsigned long mask, unsigned long
*/
PyObject *
_Py_type_getattro_impl(PyTypeObject *type, PyObject *name, int * suppress_missing_attribute)
_Py_type_getattro_impl(PyTypeObject *type, PyObject *name, int *suppress_missing_attribute)
{
PyTypeObject *metatype = Py_TYPE(type);
PyObject *meta_attribute, *attribute;
descrgetfunc meta_get;
PyObject* res;
if (!PyUnicode_Check(name)) {
PyErr_Format(PyExc_TypeError,
"attribute name must be string, not '%.200s'",
Py_TYPE(name)->tp_name);
_PyStackRef ref = _Py_type_getattro_stackref(type, name, suppress_missing_attribute);
if (PyStackRef_IsNull(ref)) {
return NULL;
}
/* Initialize this type (we'll assume the metatype is initialized) */
if (!_PyType_IsReady(type)) {
if (PyType_Ready(type) < 0)
return NULL;
}
/* No readable descriptor found yet */
meta_get = NULL;
/* Look for the attribute in the metatype */
meta_attribute = _PyType_LookupRef(metatype, name);
if (meta_attribute != NULL) {
meta_get = Py_TYPE(meta_attribute)->tp_descr_get;
if (meta_get != NULL && PyDescr_IsData(meta_attribute)) {
/* Data descriptors implement tp_descr_set to intercept
* writes. Assume the attribute is not overridden in
* type's tp_dict (and bases): call the descriptor now.
*/
res = meta_get(meta_attribute, (PyObject *)type,
(PyObject *)metatype);
Py_DECREF(meta_attribute);
return res;
}
}
/* No data descriptor found on metatype. Look in tp_dict of this
* type and its bases */
attribute = _PyType_LookupRef(type, name);
if (attribute != NULL) {
/* Implement descriptor functionality, if any */
descrgetfunc local_get = Py_TYPE(attribute)->tp_descr_get;
Py_XDECREF(meta_attribute);
if (local_get != NULL) {
/* NULL 2nd argument indicates the descriptor was
* found on the target object itself (or a base) */
res = local_get(attribute, (PyObject *)NULL,
(PyObject *)type);
Py_DECREF(attribute);
return res;
}
return attribute;
}
/* No attribute found in local __dict__ (or bases): use the
* descriptor from the metatype, if any */
if (meta_get != NULL) {
PyObject *res;
res = meta_get(meta_attribute, (PyObject *)type,
(PyObject *)metatype);
Py_DECREF(meta_attribute);
return res;
}
/* If an ordinary attribute was found on the metatype, return it now */
if (meta_attribute != NULL) {
return meta_attribute;
}
/* Give up */
if (suppress_missing_attribute == NULL) {
PyErr_Format(PyExc_AttributeError,
"type object '%.100s' has no attribute '%U'",
type->tp_name, name);
} else {
// signal the caller we have not set an PyExc_AttributeError and gave up
*suppress_missing_attribute = 1;
}
return NULL;
return PyStackRef_AsPyObjectSteal(ref);
}
/* This is similar to PyObject_GenericGetAttr(),
@ -6473,6 +6393,137 @@ _Py_type_getattro(PyObject *tp, PyObject *name)
return _Py_type_getattro_impl(type, name, NULL);
}
/* Like _Py_type_getattro but returns a _PyStackRef.
This can return a deferred reference in the free-threaded build
when the attribute is found without going through a descriptor.
suppress_missing_attribute (optional):
* NULL: do not suppress the exception
* Non-zero pointer: suppress the PyExc_AttributeError and
set *suppress_missing_attribute to 1 to signal we are returning NULL while
having suppressed the exception (other exceptions are not suppressed)
*/
_PyStackRef
_Py_type_getattro_stackref(PyTypeObject *type, PyObject *name,
int *suppress_missing_attribute)
{
PyTypeObject *metatype = Py_TYPE(type);
descrgetfunc meta_get = NULL;
if (!PyUnicode_Check(name)) {
PyErr_Format(PyExc_TypeError,
"attribute name must be string, not '%.200s'",
Py_TYPE(name)->tp_name);
return PyStackRef_NULL;
}
/* Initialize this type (we'll assume the metatype is initialized) */
if (!_PyType_IsReady(type)) {
if (PyType_Ready(type) < 0)
return PyStackRef_NULL;
}
/* Set up GC-visible stack refs */
_PyCStackRef result_ref, meta_attribute_ref, attribute_ref;
PyThreadState *tstate = _PyThreadState_GET();
_PyThreadState_PushCStackRef(tstate, &result_ref);
_PyThreadState_PushCStackRef(tstate, &meta_attribute_ref);
_PyThreadState_PushCStackRef(tstate, &attribute_ref);
/* Look for the attribute in the metatype */
_PyType_LookupStackRefAndVersion(metatype, name, &meta_attribute_ref.ref);
if (!PyStackRef_IsNull(meta_attribute_ref.ref)) {
PyObject *meta_attr_obj = PyStackRef_AsPyObjectBorrow(meta_attribute_ref.ref);
meta_get = Py_TYPE(meta_attr_obj)->tp_descr_get;
if (meta_get != NULL && PyDescr_IsData(meta_attr_obj)) {
/* Data descriptors implement tp_descr_set to intercept
* writes. Assume the attribute is not overridden in
* type's tp_dict (and bases): call the descriptor now.
*/
PyObject *res = meta_get(meta_attr_obj, (PyObject *)type,
(PyObject *)metatype);
if (res != NULL) {
result_ref.ref = PyStackRef_FromPyObjectSteal(res);
}
goto done;
}
}
/* No data descriptor found on metatype. Look in tp_dict of this
* type and its bases */
_PyType_LookupStackRefAndVersion(type, name, &attribute_ref.ref);
if (!PyStackRef_IsNull(attribute_ref.ref)) {
/* Implement descriptor functionality, if any */
PyObject *attr_obj = PyStackRef_AsPyObjectBorrow(attribute_ref.ref);
descrgetfunc local_get = Py_TYPE(attr_obj)->tp_descr_get;
/* Release meta_attribute early since we found in local dict */
PyStackRef_CLEAR(meta_attribute_ref.ref);
if (local_get != NULL) {
/* Special case staticmethod to avoid descriptor call overhead.
* staticmethod.__get__ just returns the wrapped callable. */
if (Py_TYPE(attr_obj) == &PyStaticMethod_Type) {
PyObject *callable = _PyStaticMethod_GetFunc(attr_obj);
if (callable) {
result_ref.ref = PyStackRef_FromPyObjectNew(callable);
goto done;
}
}
/* NULL 2nd argument indicates the descriptor was
* found on the target object itself (or a base) */
PyObject *res = local_get(attr_obj, (PyObject *)NULL,
(PyObject *)type);
if (res != NULL) {
result_ref.ref = PyStackRef_FromPyObjectSteal(res);
}
goto done;
}
/* No descriptor, return the attribute directly */
result_ref.ref = attribute_ref.ref;
attribute_ref.ref = PyStackRef_NULL;
goto done;
}
/* No attribute found in local __dict__ (or bases): use the
* descriptor from the metatype, if any */
if (meta_get != NULL) {
PyObject *meta_attr_obj = PyStackRef_AsPyObjectBorrow(meta_attribute_ref.ref);
PyObject *res = meta_get(meta_attr_obj, (PyObject *)type,
(PyObject *)metatype);
if (res != NULL) {
result_ref.ref = PyStackRef_FromPyObjectSteal(res);
}
goto done;
}
/* If an ordinary attribute was found on the metatype, return it now */
if (!PyStackRef_IsNull(meta_attribute_ref.ref)) {
result_ref.ref = meta_attribute_ref.ref;
meta_attribute_ref.ref = PyStackRef_NULL;
goto done;
}
/* Give up */
if (suppress_missing_attribute == NULL) {
PyErr_Format(PyExc_AttributeError,
"type object '%.100s' has no attribute '%U'",
type->tp_name, name);
}
else {
// signal the caller we have not set an PyExc_AttributeError and gave up
*suppress_missing_attribute = 1;
}
done:
_PyThreadState_PopCStackRef(tstate, &attribute_ref);
_PyThreadState_PopCStackRef(tstate, &meta_attribute_ref);
return _PyThreadState_PopCStackRefSteal(tstate, &result_ref);
}
// Called by type_setattro(). Updates both the type dict and
// the type versions.
static int
@ -10937,15 +10988,19 @@ static PyObject *
slot_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
PyThreadState *tstate = _PyThreadState_GET();
PyObject *func, *result;
PyObject *result;
func = PyObject_GetAttr((PyObject *)type, &_Py_ID(__new__));
if (func == NULL) {
_PyCStackRef func_ref;
_PyThreadState_PushCStackRef(tstate, &func_ref);
func_ref.ref = _PyObject_GetAttrStackRef((PyObject *)type, &_Py_ID(__new__));
if (PyStackRef_IsNull(func_ref.ref)) {
_PyThreadState_PopCStackRef(tstate, &func_ref);
return NULL;
}
PyObject *func = PyStackRef_AsPyObjectBorrow(func_ref.ref);
result = _PyObject_Call_Prepend(tstate, func, (PyObject *)type, args, kwds);
Py_DECREF(func);
_PyThreadState_PopCStackRef(tstate, &func_ref);
return result;
}

View file

@ -2392,10 +2392,9 @@ dummy_func(
}
else {
/* Classic, pushes one value. */
PyObject *attr_o = PyObject_GetAttr(PyStackRef_AsPyObjectBorrow(owner), name);
attr = _PyObject_GetAttrStackRef(PyStackRef_AsPyObjectBorrow(owner), name);
PyStackRef_CLOSE(owner);
ERROR_IF(attr_o == NULL);
attr = PyStackRef_FromPyObjectSteal(attr_o);
ERROR_IF(PyStackRef_IsNull(attr));
}
}

View file

@ -8703,19 +8703,19 @@
stack_pointer += 1;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
_PyFrame_SetStackPointer(frame, stack_pointer);
PyObject *attr_o = PyObject_GetAttr(PyStackRef_AsPyObjectBorrow(owner), name);
attr = _PyObject_GetAttrStackRef(PyStackRef_AsPyObjectBorrow(owner), name);
stack_pointer = _PyFrame_GetStackPointer(frame);
stack_pointer += -1;
stack_pointer[-1] = attr;
stack_pointer += (oparg&1);
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
_PyFrame_SetStackPointer(frame, stack_pointer);
PyStackRef_CLOSE(owner);
stack_pointer = _PyFrame_GetStackPointer(frame);
if (attr_o == NULL) {
if (PyStackRef_IsNull(attr)) {
SET_CURRENT_CACHED_VALUES(0);
JUMP_TO_ERROR();
}
attr = PyStackRef_FromPyObjectSteal(attr_o);
stack_pointer += 1;
stack_pointer += -(oparg&1);
}
_tos_cache0 = PyStackRef_ZERO_BITS;
_tos_cache1 = PyStackRef_ZERO_BITS;

View file

@ -7924,18 +7924,18 @@
}
else {
_PyFrame_SetStackPointer(frame, stack_pointer);
PyObject *attr_o = PyObject_GetAttr(PyStackRef_AsPyObjectBorrow(owner), name);
attr = _PyObject_GetAttrStackRef(PyStackRef_AsPyObjectBorrow(owner), name);
stack_pointer = _PyFrame_GetStackPointer(frame);
stack_pointer += -1;
stack_pointer[-1] = attr;
stack_pointer += (oparg&1);
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
_PyFrame_SetStackPointer(frame, stack_pointer);
PyStackRef_CLOSE(owner);
stack_pointer = _PyFrame_GetStackPointer(frame);
if (attr_o == NULL) {
if (PyStackRef_IsNull(attr)) {
JUMP_TO_LABEL(error);
}
attr = PyStackRef_FromPyObjectSteal(attr_o);
stack_pointer += 1;
stack_pointer += -(oparg&1);
}
}
stack_pointer[-1] = attr;

View file

@ -28,8 +28,10 @@
import sys
import threading
import time
from collections import namedtuple
from dataclasses import dataclass
from operator import methodcaller
from typing import NamedTuple
# The iterations in individual benchmarks are scaled by this factor.
WORK_SCALE = 100
@ -215,6 +217,24 @@ def instantiate_dataclass():
for _ in range(1000 * WORK_SCALE):
obj = MyDataClass(x=1, y=2, z=3)
MyNamedTuple = namedtuple("MyNamedTuple", ["x", "y", "z"])
@register_benchmark
def instantiate_namedtuple():
for _ in range(1000 * WORK_SCALE):
obj = MyNamedTuple(x=1, y=2, z=3)
class MyTypingNamedTuple(NamedTuple):
x: int
y: int
z: int
@register_benchmark
def instantiate_typing_namedtuple():
for _ in range(1000 * WORK_SCALE):
obj = MyTypingNamedTuple(x=1, y=2, z=3)
@register_benchmark
def deepcopy():