mirror of
https://github.com/python/cpython.git
synced 2026-01-28 18:22:17 +00:00
gh-144157: Optimize bytes.translate() by deferring change detection (GH-144158)
Optimize bytes.translate() by deferring change detection Move the equality check out of the hot loop to allow better compiler optimization. Instead of checking each byte during translation, perform a single memcmp at the end to determine if the input can be returned unchanged. This allows compilers to unroll and pipeline the loops, resulting in ~2x throughput improvement for medium-to-large inputs (tested on an AMD zen2). No change observed on small inputs. It will also be faster for bytes subclasses as those do not need change detection.
This commit is contained in:
parent
77bf4ba732
commit
a966d94e76
2 changed files with 10 additions and 4 deletions
|
|
@ -0,0 +1,2 @@
|
|||
:meth:`bytes.translate` now allows the compiler to unroll its loop more
|
||||
usefully for a 2x speedup in the common no-deletions specified case.
|
||||
|
|
@ -2237,11 +2237,15 @@ bytes_translate_impl(PyBytesObject *self, PyObject *table,
|
|||
/* If no deletions are required, use faster code */
|
||||
for (i = inlen; --i >= 0; ) {
|
||||
c = Py_CHARMASK(*input++);
|
||||
if (Py_CHARMASK((*output++ = table_chars[c])) != c)
|
||||
changed = 1;
|
||||
*output++ = table_chars[c];
|
||||
}
|
||||
if (!changed && PyBytes_CheckExact(input_obj)) {
|
||||
Py_SETREF(result, Py_NewRef(input_obj));
|
||||
/* Check if anything changed (for returning original object) */
|
||||
/* We save this check until the end so that the compiler will */
|
||||
/* unroll the loop above leading to MUCH faster code. */
|
||||
if (PyBytes_CheckExact(input_obj)) {
|
||||
if (memcmp(PyBytes_AS_STRING(input_obj), output_start, inlen) == 0) {
|
||||
Py_SETREF(result, Py_NewRef(input_obj));
|
||||
}
|
||||
}
|
||||
PyBuffer_Release(&del_table_view);
|
||||
PyBuffer_Release(&table_view);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue