gh-100239: Specialize more binary operations using BINARY_OP_EXTEND (GH-128956)

This commit is contained in:
Pieter Eendebak 2026-04-16 10:22:41 +02:00 committed by GitHub
parent 9d38143088
commit 1f6a09fb36
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 1850 additions and 1224 deletions

View file

@ -2,6 +2,7 @@
#include "opcode.h"
#include "pycore_bytesobject.h" // _PyBytes_Concat
#include "pycore_code.h"
#include "pycore_critical_section.h"
#include "pycore_descrobject.h" // _PyMethodWrapper_Type
@ -9,7 +10,8 @@
#include "pycore_function.h" // _PyFunction_GetVersionForCurrentState()
#include "pycore_interpframe.h" // FRAME_SPECIALS_SIZE
#include "pycore_lazyimportobject.h" // PyLazyImport_CheckExact
#include "pycore_list.h" // _PyListIterObject
#include "pycore_list.h" // _PyListIterObject, _PyList_Concat
#include "pycore_tuple.h" // _PyTuple_Concat
#include "pycore_long.h" // _PyLong_IsNonNegativeCompact()
#include "pycore_moduleobject.h"
#include "pycore_object.h"
@ -2120,6 +2122,56 @@ is_compactlong(PyObject *v)
_PyLong_IsCompact((PyLongObject *)v);
}
/* sequence * int helpers: bypass PyNumber_Multiply dispatch overhead
by calling sq_repeat directly with PyLong_AsSsize_t. */
static inline PyObject *
seq_int_multiply(PyObject *seq, PyObject *n,
ssizeargfunc repeat)
{
Py_ssize_t count = PyLong_AsSsize_t(n);
if (count == -1 && PyErr_Occurred()) {
return NULL;
}
return repeat(seq, count);
}
static PyObject *
str_int_multiply(PyObject *lhs, PyObject *rhs)
{
return seq_int_multiply(lhs, rhs, PyUnicode_Type.tp_as_sequence->sq_repeat);
}
static PyObject *
int_str_multiply(PyObject *lhs, PyObject *rhs)
{
return seq_int_multiply(rhs, lhs, PyUnicode_Type.tp_as_sequence->sq_repeat);
}
static PyObject *
bytes_int_multiply(PyObject *lhs, PyObject *rhs)
{
return seq_int_multiply(lhs, rhs, PyBytes_Type.tp_as_sequence->sq_repeat);
}
static PyObject *
int_bytes_multiply(PyObject *lhs, PyObject *rhs)
{
return seq_int_multiply(rhs, lhs, PyBytes_Type.tp_as_sequence->sq_repeat);
}
static PyObject *
tuple_int_multiply(PyObject *lhs, PyObject *rhs)
{
return seq_int_multiply(lhs, rhs, PyTuple_Type.tp_as_sequence->sq_repeat);
}
static PyObject *
int_tuple_multiply(PyObject *lhs, PyObject *rhs)
{
return seq_int_multiply(rhs, lhs, PyTuple_Type.tp_as_sequence->sq_repeat);
}
static int
compactlongs_guard(PyObject *lhs, PyObject *rhs)
{
@ -2210,25 +2262,63 @@ LONG_FLOAT_ACTION(compactlong_float_true_div, /)
#undef LONG_FLOAT_ACTION
static _PyBinaryOpSpecializationDescr binaryop_extend_descrs[] = {
/* long-long arithmetic */
{NB_OR, compactlongs_guard, compactlongs_or, &PyLong_Type, 1},
{NB_AND, compactlongs_guard, compactlongs_and, &PyLong_Type, 1},
{NB_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type, 1},
{NB_INPLACE_OR, compactlongs_guard, compactlongs_or, &PyLong_Type, 1},
{NB_INPLACE_AND, compactlongs_guard, compactlongs_and, &PyLong_Type, 1},
{NB_INPLACE_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type, 1},
/* long-long arithmetic: guards also check _PyLong_IsCompact, so
type alone is not sufficient to eliminate the guard. */
{NB_OR, compactlongs_guard, compactlongs_or, &PyLong_Type, 1, NULL, NULL},
{NB_AND, compactlongs_guard, compactlongs_and, &PyLong_Type, 1, NULL, NULL},
{NB_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type, 1, NULL, NULL},
{NB_INPLACE_OR, compactlongs_guard, compactlongs_or, &PyLong_Type, 1, NULL, NULL},
{NB_INPLACE_AND, compactlongs_guard, compactlongs_and, &PyLong_Type, 1, NULL, NULL},
{NB_INPLACE_XOR, compactlongs_guard, compactlongs_xor, &PyLong_Type, 1, NULL, NULL},
/* float-long arithemetic */
{NB_ADD, float_compactlong_guard, float_compactlong_add, &PyFloat_Type, 1},
{NB_SUBTRACT, float_compactlong_guard, float_compactlong_subtract, &PyFloat_Type, 1},
{NB_TRUE_DIVIDE, nonzero_float_compactlong_guard, float_compactlong_true_div, &PyFloat_Type, 1},
{NB_MULTIPLY, float_compactlong_guard, float_compactlong_multiply, &PyFloat_Type, 1},
/* float-long arithmetic: guards also check NaN and compactness. */
{NB_ADD, float_compactlong_guard, float_compactlong_add, &PyFloat_Type, 1, NULL, NULL},
{NB_SUBTRACT, float_compactlong_guard, float_compactlong_subtract, &PyFloat_Type, 1, NULL, NULL},
{NB_TRUE_DIVIDE, nonzero_float_compactlong_guard, float_compactlong_true_div, &PyFloat_Type, 1, NULL, NULL},
{NB_MULTIPLY, float_compactlong_guard, float_compactlong_multiply, &PyFloat_Type, 1, NULL, NULL},
/* float-float arithmetic */
{NB_ADD, compactlong_float_guard, compactlong_float_add, &PyFloat_Type, 1},
{NB_SUBTRACT, compactlong_float_guard, compactlong_float_subtract, &PyFloat_Type, 1},
{NB_TRUE_DIVIDE, nonzero_compactlong_float_guard, compactlong_float_true_div, &PyFloat_Type, 1},
{NB_MULTIPLY, compactlong_float_guard, compactlong_float_multiply, &PyFloat_Type, 1},
/* long-float arithmetic: guards also check NaN and compactness. */
{NB_ADD, compactlong_float_guard, compactlong_float_add, &PyFloat_Type, 1, NULL, NULL},
{NB_SUBTRACT, compactlong_float_guard, compactlong_float_subtract, &PyFloat_Type, 1, NULL, NULL},
{NB_TRUE_DIVIDE, nonzero_compactlong_float_guard, compactlong_float_true_div, &PyFloat_Type, 1, NULL, NULL},
{NB_MULTIPLY, compactlong_float_guard, compactlong_float_multiply, &PyFloat_Type, 1, NULL, NULL},
/* list-list concatenation: _PyList_Concat always allocates a new list */
{NB_ADD, NULL, _PyList_Concat, &PyList_Type, 1, &PyList_Type, &PyList_Type},
/* tuple-tuple concatenation: _PyTuple_Concat has a zero-length shortcut
that can return one of the operands, so the result is not guaranteed
to be a freshly allocated object. */
{NB_ADD, NULL, _PyTuple_Concat, &PyTuple_Type, 0, &PyTuple_Type, &PyTuple_Type},
/* str * int / int * str: call unicode_repeat directly.
unicode_repeat returns the original when n == 1. */
{NB_MULTIPLY, NULL, str_int_multiply, &PyUnicode_Type, 0, &PyUnicode_Type, &PyLong_Type},
{NB_MULTIPLY, NULL, int_str_multiply, &PyUnicode_Type, 0, &PyLong_Type, &PyUnicode_Type},
{NB_INPLACE_MULTIPLY, NULL, str_int_multiply, &PyUnicode_Type, 0, &PyUnicode_Type, &PyLong_Type},
{NB_INPLACE_MULTIPLY, NULL, int_str_multiply, &PyUnicode_Type, 0, &PyLong_Type, &PyUnicode_Type},
/* bytes + bytes: bytes_concat may return an operand when one side
is empty, so result is not always unique. */
{NB_ADD, NULL, _PyBytes_Concat, &PyBytes_Type, 0, &PyBytes_Type, &PyBytes_Type},
{NB_INPLACE_ADD, NULL, _PyBytes_Concat, &PyBytes_Type, 0, &PyBytes_Type, &PyBytes_Type},
/* bytes * int / int * bytes: call bytes_repeat directly.
bytes_repeat returns the original when n == 1. */
{NB_MULTIPLY, NULL, bytes_int_multiply, &PyBytes_Type, 0, &PyBytes_Type, &PyLong_Type},
{NB_MULTIPLY, NULL, int_bytes_multiply, &PyBytes_Type, 0, &PyLong_Type, &PyBytes_Type},
{NB_INPLACE_MULTIPLY, NULL, bytes_int_multiply, &PyBytes_Type, 0, &PyBytes_Type, &PyLong_Type},
{NB_INPLACE_MULTIPLY, NULL, int_bytes_multiply, &PyBytes_Type, 0, &PyLong_Type, &PyBytes_Type},
/* tuple * int / int * tuple: call tuple_repeat directly.
tuple_repeat returns the original when n == 1. */
{NB_MULTIPLY, NULL, tuple_int_multiply, &PyTuple_Type, 0, &PyTuple_Type, &PyLong_Type},
{NB_MULTIPLY, NULL, int_tuple_multiply, &PyTuple_Type, 0, &PyLong_Type, &PyTuple_Type},
{NB_INPLACE_MULTIPLY, NULL, tuple_int_multiply, &PyTuple_Type, 0, &PyTuple_Type, &PyLong_Type},
{NB_INPLACE_MULTIPLY, NULL, int_tuple_multiply, &PyTuple_Type, 0, &PyLong_Type, &PyTuple_Type},
/* dict | dict */
{NB_OR, NULL, _PyDict_Or, &PyDict_Type, 1, &PyDict_Type, &PyDict_Type},
{NB_INPLACE_OR, NULL, _PyDict_IOr, &PyDict_Type, 0, &PyDict_Type, &PyDict_Type},
};
static int
@ -2238,7 +2328,13 @@ binary_op_extended_specialization(PyObject *lhs, PyObject *rhs, int oparg,
size_t n = sizeof(binaryop_extend_descrs)/sizeof(_PyBinaryOpSpecializationDescr);
for (size_t i = 0; i < n; i++) {
_PyBinaryOpSpecializationDescr *d = &binaryop_extend_descrs[i];
if (d->oparg == oparg && d->guard(lhs, rhs)) {
if (d->oparg != oparg) {
continue;
}
int match = (d->guard != NULL)
? d->guard(lhs, rhs)
: (Py_TYPE(lhs) == d->lhs_type && Py_TYPE(rhs) == d->rhs_type);
if (match) {
*descr = d;
return 1;
}