gh-139871: Optimize small takes in bytearray.take_bytes (GH-141741)

When less than half the buffer is taken just copy that small part out
rather than doing a big alloc + memmove + big shrink.
This commit is contained in:
Cody Maloney 2025-11-19 23:49:05 -08:00 committed by GitHub
parent a35c683da5
commit e265ce8a56
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 39 additions and 1 deletions

View file

@ -1524,6 +1524,32 @@ def test_take_bytes(self):
self.assertRaises(BufferError, ba.take_bytes)
self.assertEqual(ba.take_bytes(), b'abc')
@support.cpython_only # tests an implementation detail
def test_take_bytes_optimization(self):
# Validate optimization around taking lots of little chunks out of a
# much bigger buffer. Save work by only copying a little rather than
# moving a lot.
ba = bytearray(b'abcdef' + b'0' * 1000)
start_alloc = ba.__alloc__()
# Take two bytes at a time, checking alloc doesn't change.
self.assertEqual(ba.take_bytes(2), b'ab')
self.assertEqual(ba.__alloc__(), start_alloc)
self.assertEqual(len(ba), 4 + 1000)
self.assertEqual(ba.take_bytes(2), b'cd')
self.assertEqual(ba.__alloc__(), start_alloc)
self.assertEqual(len(ba), 2 + 1000)
self.assertEqual(ba.take_bytes(2), b'ef')
self.assertEqual(ba.__alloc__(), start_alloc)
self.assertEqual(len(ba), 0 + 1000)
self.assertEqual(ba.__alloc__(), start_alloc)
# Take over half, alloc shrinks to exact size.
self.assertEqual(ba.take_bytes(501), b'0' * 501)
self.assertEqual(len(ba), 499)
bytes_header_size = sys.getsizeof(b'')
self.assertEqual(ba.__alloc__(), 499 + bytes_header_size)
def test_setitem(self):
def setitem_as_mapping(b, i, val):
b[i] = val

View file

@ -1547,8 +1547,20 @@ bytearray_take_bytes_impl(PyByteArrayObject *self, PyObject *n)
return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
}
// Copy remaining bytes to a new bytes.
Py_ssize_t remaining_length = size - to_take;
// optimization: If taking less than leaving, just copy the small to_take
// portion out and move ob_start.
if (to_take < remaining_length) {
PyObject *ret = PyBytes_FromStringAndSize(self->ob_start, to_take);
if (ret == NULL) {
return NULL;
}
self->ob_start += to_take;
Py_SET_SIZE(self, remaining_length);
return ret;
}
// Copy remaining bytes to a new bytes.
PyObject *remaining = PyBytes_FromStringAndSize(self->ob_start + to_take,
remaining_length);
if (remaining == NULL) {