[3.14] gh-139103: fix free-threading dataclass.__init__ perf issue (gh-141596) (gh-141750)

The dataclasses `__init__` function is generated dynamically by a call to `exec()` and so doesn't have deferred reference counting enabled. Enable deferred reference counting on functions when assigned as an attribute to type objects to avoid reference count contention when creating dataclass instances.
(cherry picked from commit ce79154176)

Co-authored-by: Edward Xu <xuxiangad@gmail.com>
This commit is contained in:
Sam Gross 2025-11-19 10:00:51 -05:00 committed by GitHub
parent 8c796889ff
commit b7c25eabd6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 25 additions and 0 deletions

View file

@ -0,0 +1 @@
Improve multithreaded scaling of dataclasses on the free-threaded build.

View file

@ -6181,6 +6181,18 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value)
assert(!_PyType_HasFeature(metatype, Py_TPFLAGS_INLINE_VALUES)); assert(!_PyType_HasFeature(metatype, Py_TPFLAGS_INLINE_VALUES));
assert(!_PyType_HasFeature(metatype, Py_TPFLAGS_MANAGED_DICT)); assert(!_PyType_HasFeature(metatype, Py_TPFLAGS_MANAGED_DICT));
#ifdef Py_GIL_DISABLED
// gh-139103: Enable deferred refcounting for functions assigned
// to type objects. This is important for `dataclass.__init__`,
// which is generated dynamically.
if (value != NULL &&
PyFunction_Check(value) &&
!_PyObject_HasDeferredRefcount(value))
{
PyUnstable_Object_EnableDeferredRefcount(value);
}
#endif
PyObject *old_value = NULL; PyObject *old_value = NULL;
PyObject *descr = _PyType_LookupRef(metatype, name); PyObject *descr = _PyType_LookupRef(metatype, name);
if (descr != NULL) { if (descr != NULL) {

View file

@ -27,6 +27,7 @@
import sys import sys
import threading import threading
import time import time
from dataclasses import dataclass
# The iterations in individual benchmarks are scaled by this factor. # The iterations in individual benchmarks are scaled by this factor.
WORK_SCALE = 100 WORK_SCALE = 100
@ -189,6 +190,17 @@ def thread_local_read():
_ = tmp.x _ = tmp.x
@dataclass
class MyDataClass:
x: int
y: int
z: int
@register_benchmark
def instantiate_dataclass():
for _ in range(1000 * WORK_SCALE):
obj = MyDataClass(x=1, y=2, z=3)
def bench_one_thread(func): def bench_one_thread(func):
t0 = time.perf_counter_ns() t0 = time.perf_counter_ns()
func() func()