mirror of
https://github.com/python/cpython.git
synced 2025-10-19 07:53:46 +00:00
gh-139877: Use PyBytesWriter in pycore_blocks_output_buffer.h (#139976)
Previously, the _BlocksOutputBuffer code creates a list of bytes objects to handle the output data from compression libraries. This ends up being slow due to the output buffer code needing to copy each bytes element of the list into the final bytes object buffer at the end of compression. The new PyBytesWriter API introduced in PEP 782 is an ergonomic and fast method of writing data into a buffer that will later turn into a bytes object. Benchmarks show that using the PyBytesWriter API is 10-30% faster for decompression across a variety of settings. The performance gains are greatest when the decompressor is very performant, such as for Zstandard (and likely zlib-ng). Otherwise the decompressor can bottleneck decompression and the gains are more modest, but still sizable (e.g. 10% faster for zlib)! Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
This commit is contained in:
parent
404425575c
commit
f262297d52
7 changed files with 49 additions and 109 deletions
|
@ -45,12 +45,14 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
typedef struct {
|
||||
// List of bytes objects
|
||||
PyObject *list;
|
||||
// Bytes writer managing output buffer
|
||||
PyBytesWriter *writer;
|
||||
// Number of whole allocated size
|
||||
Py_ssize_t allocated;
|
||||
// Max length of the buffer, negative number means unlimited length.
|
||||
// Max length of the buffer, negative number means unlimited length
|
||||
Py_ssize_t max_length;
|
||||
// Number of blocks of bytes. Used to calculate next allocation size
|
||||
size_t num_blocks;
|
||||
} _BlocksOutputBuffer;
|
||||
|
||||
static const char unable_allocate_msg[] = "Unable to allocate output buffer.";
|
||||
|
@ -107,11 +109,10 @@ _BlocksOutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer,
|
|||
const Py_ssize_t max_length,
|
||||
void **next_out)
|
||||
{
|
||||
PyObject *b;
|
||||
Py_ssize_t block_size;
|
||||
|
||||
// ensure .list was set to NULL
|
||||
assert(buffer->list == NULL);
|
||||
// ensure .writer was set to NULL
|
||||
assert(buffer->writer == NULL);
|
||||
|
||||
// get block size
|
||||
if (0 <= max_length && max_length < BUFFER_BLOCK_SIZE[0]) {
|
||||
|
@ -120,25 +121,17 @@ _BlocksOutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer,
|
|||
block_size = BUFFER_BLOCK_SIZE[0];
|
||||
}
|
||||
|
||||
// the first block
|
||||
b = PyBytes_FromStringAndSize(NULL, block_size);
|
||||
if (b == NULL) {
|
||||
buffer->writer = PyBytesWriter_Create(block_size);
|
||||
if (buffer->writer == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// create the list
|
||||
buffer->list = PyList_New(1);
|
||||
if (buffer->list == NULL) {
|
||||
Py_DECREF(b);
|
||||
return -1;
|
||||
}
|
||||
PyList_SET_ITEM(buffer->list, 0, b);
|
||||
|
||||
// set variables
|
||||
buffer->allocated = block_size;
|
||||
buffer->max_length = max_length;
|
||||
buffer->num_blocks = 1;
|
||||
|
||||
*next_out = PyBytes_AS_STRING(b);
|
||||
*next_out = PyBytesWriter_GetData(buffer->writer);
|
||||
return block_size;
|
||||
}
|
||||
|
||||
|
@ -155,31 +148,21 @@ _BlocksOutputBuffer_InitWithSize(_BlocksOutputBuffer *buffer,
|
|||
const Py_ssize_t init_size,
|
||||
void **next_out)
|
||||
{
|
||||
PyObject *b;
|
||||
|
||||
// ensure .list was set to NULL
|
||||
assert(buffer->list == NULL);
|
||||
// ensure .writer was set to NULL
|
||||
assert(buffer->writer == NULL);
|
||||
|
||||
// the first block
|
||||
b = PyBytes_FromStringAndSize(NULL, init_size);
|
||||
if (b == NULL) {
|
||||
PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
|
||||
buffer->writer = PyBytesWriter_Create(init_size);
|
||||
if (buffer->writer == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// create the list
|
||||
buffer->list = PyList_New(1);
|
||||
if (buffer->list == NULL) {
|
||||
Py_DECREF(b);
|
||||
return -1;
|
||||
}
|
||||
PyList_SET_ITEM(buffer->list, 0, b);
|
||||
|
||||
// set variables
|
||||
buffer->allocated = init_size;
|
||||
buffer->max_length = -1;
|
||||
buffer->num_blocks = 1;
|
||||
|
||||
*next_out = PyBytes_AS_STRING(b);
|
||||
*next_out = PyBytesWriter_GetData(buffer->writer);
|
||||
return init_size;
|
||||
}
|
||||
|
||||
|
@ -193,8 +176,6 @@ _BlocksOutputBuffer_Grow(_BlocksOutputBuffer *buffer,
|
|||
void **next_out,
|
||||
const Py_ssize_t avail_out)
|
||||
{
|
||||
PyObject *b;
|
||||
const Py_ssize_t list_len = Py_SIZE(buffer->list);
|
||||
Py_ssize_t block_size;
|
||||
|
||||
// ensure no gaps in the data
|
||||
|
@ -205,11 +186,10 @@ _BlocksOutputBuffer_Grow(_BlocksOutputBuffer *buffer,
|
|||
}
|
||||
|
||||
// get block size
|
||||
if (list_len < (Py_ssize_t) Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE)) {
|
||||
block_size = BUFFER_BLOCK_SIZE[list_len];
|
||||
} else {
|
||||
block_size = BUFFER_BLOCK_SIZE[Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE) - 1];
|
||||
}
|
||||
size_t maxblock = Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE);
|
||||
assert(maxblock >= 1);
|
||||
size_t block_index = Py_MIN(buffer->num_blocks, maxblock - 1);
|
||||
block_size = BUFFER_BLOCK_SIZE[block_index];
|
||||
|
||||
// check max_length
|
||||
if (buffer->max_length >= 0) {
|
||||
|
@ -229,22 +209,19 @@ _BlocksOutputBuffer_Grow(_BlocksOutputBuffer *buffer,
|
|||
return -1;
|
||||
}
|
||||
|
||||
// create the block
|
||||
b = PyBytes_FromStringAndSize(NULL, block_size);
|
||||
if (b == NULL) {
|
||||
if (PyBytesWriter_Grow(buffer->writer, block_size)) {
|
||||
PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
|
||||
return -1;
|
||||
}
|
||||
if (PyList_Append(buffer->list, b) < 0) {
|
||||
Py_DECREF(b);
|
||||
return -1;
|
||||
}
|
||||
Py_DECREF(b);
|
||||
|
||||
Py_ssize_t current_size = buffer->allocated;
|
||||
|
||||
// set variables
|
||||
buffer->allocated += block_size;
|
||||
buffer->num_blocks += 1;
|
||||
|
||||
*next_out = PyBytes_AS_STRING(b);
|
||||
char *data = PyBytesWriter_GetData(buffer->writer);
|
||||
*next_out = data + current_size;
|
||||
return block_size;
|
||||
}
|
||||
|
||||
|
@ -265,54 +242,17 @@ static inline PyObject *
|
|||
_BlocksOutputBuffer_Finish(_BlocksOutputBuffer *buffer,
|
||||
const Py_ssize_t avail_out)
|
||||
{
|
||||
PyObject *result, *block;
|
||||
const Py_ssize_t list_len = Py_SIZE(buffer->list);
|
||||
|
||||
// fast path for single block
|
||||
if ((list_len == 1 && avail_out == 0) ||
|
||||
(list_len == 2 && Py_SIZE(PyList_GET_ITEM(buffer->list, 1)) == avail_out))
|
||||
{
|
||||
block = PyList_GET_ITEM(buffer->list, 0);
|
||||
Py_INCREF(block);
|
||||
|
||||
Py_CLEAR(buffer->list);
|
||||
return block;
|
||||
}
|
||||
|
||||
// final bytes object
|
||||
result = PyBytes_FromStringAndSize(NULL, buffer->allocated - avail_out);
|
||||
if (result == NULL) {
|
||||
PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// memory copy
|
||||
if (list_len > 0) {
|
||||
char *posi = PyBytes_AS_STRING(result);
|
||||
|
||||
// blocks except the last one
|
||||
Py_ssize_t i = 0;
|
||||
for (; i < list_len-1; i++) {
|
||||
block = PyList_GET_ITEM(buffer->list, i);
|
||||
memcpy(posi, PyBytes_AS_STRING(block), Py_SIZE(block));
|
||||
posi += Py_SIZE(block);
|
||||
}
|
||||
// the last block
|
||||
block = PyList_GET_ITEM(buffer->list, i);
|
||||
memcpy(posi, PyBytes_AS_STRING(block), Py_SIZE(block) - avail_out);
|
||||
} else {
|
||||
assert(Py_SIZE(result) == 0);
|
||||
}
|
||||
|
||||
Py_CLEAR(buffer->list);
|
||||
return result;
|
||||
assert(buffer->writer != NULL);
|
||||
return PyBytesWriter_FinishWithSize(buffer->writer,
|
||||
buffer->allocated - avail_out);
|
||||
}
|
||||
|
||||
/* Clean up the buffer when an error occurred. */
|
||||
static inline void
|
||||
_BlocksOutputBuffer_OnError(_BlocksOutputBuffer *buffer)
|
||||
{
|
||||
Py_CLEAR(buffer->list);
|
||||
PyBytesWriter_Discard(buffer->writer);
|
||||
buffer->writer = NULL;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -190,7 +190,7 @@ static PyObject *
|
|||
compress(BZ2Compressor *c, char *data, size_t len, int action)
|
||||
{
|
||||
PyObject *result;
|
||||
_BlocksOutputBuffer buffer = {.list = NULL};
|
||||
_BlocksOutputBuffer buffer = {.writer = NULL};
|
||||
|
||||
if (OutputBuffer_InitAndGrow(&buffer, -1, &c->bzs.next_out, &c->bzs.avail_out) < 0) {
|
||||
goto error;
|
||||
|
@ -429,7 +429,7 @@ decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length)
|
|||
compare against max_length and PyBytes_GET_SIZE we declare it as
|
||||
signed */
|
||||
PyObject *result;
|
||||
_BlocksOutputBuffer buffer = {.list = NULL};
|
||||
_BlocksOutputBuffer buffer = {.writer = NULL};
|
||||
bz_stream *bzs = &d->bzs;
|
||||
|
||||
if (OutputBuffer_InitAndGrow(&buffer, max_length, &bzs->next_out, &bzs->avail_out) < 0) {
|
||||
|
|
|
@ -554,7 +554,7 @@ static PyObject *
|
|||
compress(Compressor *c, uint8_t *data, size_t len, lzma_action action)
|
||||
{
|
||||
PyObject *result;
|
||||
_BlocksOutputBuffer buffer = {.list = NULL};
|
||||
_BlocksOutputBuffer buffer = {.writer = NULL};
|
||||
_lzma_state *state = PyType_GetModuleState(Py_TYPE(c));
|
||||
assert(state != NULL);
|
||||
|
||||
|
@ -940,7 +940,7 @@ decompress_buf(Decompressor *d, Py_ssize_t max_length)
|
|||
{
|
||||
PyObject *result;
|
||||
lzma_stream *lzs = &d->lzs;
|
||||
_BlocksOutputBuffer buffer = {.list = NULL};
|
||||
_BlocksOutputBuffer buffer = {.writer = NULL};
|
||||
_lzma_state *state = PyType_GetModuleState(Py_TYPE(d));
|
||||
assert(state != NULL);
|
||||
|
||||
|
|
|
@ -16,8 +16,8 @@ static inline int
|
|||
_OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob,
|
||||
Py_ssize_t max_length)
|
||||
{
|
||||
/* Ensure .list was set to NULL */
|
||||
assert(buffer->list == NULL);
|
||||
/* Ensure .writer was set to NULL */
|
||||
assert(buffer->writer == NULL);
|
||||
|
||||
Py_ssize_t res = _BlocksOutputBuffer_InitAndGrow(buffer, max_length,
|
||||
&ob->dst);
|
||||
|
@ -39,8 +39,8 @@ _OutputBuffer_InitWithSize(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob,
|
|||
{
|
||||
Py_ssize_t block_size;
|
||||
|
||||
/* Ensure .list was set to NULL */
|
||||
assert(buffer->list == NULL);
|
||||
/* Ensure .writer was set to NULL */
|
||||
assert(buffer->writer == NULL);
|
||||
|
||||
/* Get block size */
|
||||
if (0 <= max_length && max_length < init_size) {
|
||||
|
|
|
@ -446,7 +446,7 @@ compress_lock_held(ZstdCompressor *self, Py_buffer *data,
|
|||
assert(PyMutex_IsLocked(&self->lock));
|
||||
ZSTD_inBuffer in;
|
||||
ZSTD_outBuffer out;
|
||||
_BlocksOutputBuffer buffer = {.list = NULL};
|
||||
_BlocksOutputBuffer buffer = {.writer = NULL};
|
||||
size_t zstd_ret;
|
||||
PyObject *ret;
|
||||
|
||||
|
@ -527,7 +527,7 @@ compress_mt_continue_lock_held(ZstdCompressor *self, Py_buffer *data)
|
|||
assert(PyMutex_IsLocked(&self->lock));
|
||||
ZSTD_inBuffer in;
|
||||
ZSTD_outBuffer out;
|
||||
_BlocksOutputBuffer buffer = {.list = NULL};
|
||||
_BlocksOutputBuffer buffer = {.writer = NULL};
|
||||
size_t zstd_ret;
|
||||
PyObject *ret;
|
||||
|
||||
|
|
|
@ -216,7 +216,7 @@ decompress_lock_held(ZstdDecompressor *self, ZSTD_inBuffer *in,
|
|||
{
|
||||
size_t zstd_ret;
|
||||
ZSTD_outBuffer out;
|
||||
_BlocksOutputBuffer buffer = {.list = NULL};
|
||||
_BlocksOutputBuffer buffer = {.writer = NULL};
|
||||
PyObject *ret;
|
||||
|
||||
/* Initialize the output buffer */
|
||||
|
|
|
@ -344,7 +344,7 @@ zlib_compress_impl(PyObject *module, Py_buffer *data, int level, int wbits)
|
|||
PyObject *return_value;
|
||||
int flush;
|
||||
z_stream zst;
|
||||
_BlocksOutputBuffer buffer = {.list = NULL};
|
||||
_BlocksOutputBuffer buffer = {.writer = NULL};
|
||||
|
||||
zlibstate *state = get_zlib_state(module);
|
||||
|
||||
|
@ -445,7 +445,7 @@ zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits,
|
|||
Py_ssize_t ibuflen;
|
||||
int err, flush;
|
||||
z_stream zst;
|
||||
_BlocksOutputBuffer buffer = {.list = NULL};
|
||||
_BlocksOutputBuffer buffer = {.writer = NULL};
|
||||
_Uint32Window window; // output buffer's UINT32_MAX sliding window
|
||||
|
||||
zlibstate *state = get_zlib_state(module);
|
||||
|
@ -774,7 +774,7 @@ zlib_Compress_compress_impl(compobject *self, PyTypeObject *cls,
|
|||
{
|
||||
PyObject *return_value;
|
||||
int err;
|
||||
_BlocksOutputBuffer buffer = {.list = NULL};
|
||||
_BlocksOutputBuffer buffer = {.writer = NULL};
|
||||
zlibstate *state = PyType_GetModuleState(cls);
|
||||
|
||||
ENTER_ZLIB(self);
|
||||
|
@ -898,7 +898,7 @@ zlib_Decompress_decompress_impl(compobject *self, PyTypeObject *cls,
|
|||
int err = Z_OK;
|
||||
Py_ssize_t ibuflen;
|
||||
PyObject *return_value;
|
||||
_BlocksOutputBuffer buffer = {.list = NULL};
|
||||
_BlocksOutputBuffer buffer = {.writer = NULL};
|
||||
|
||||
PyObject *module = PyType_GetModule(cls);
|
||||
if (module == NULL)
|
||||
|
@ -1005,7 +1005,7 @@ zlib_Compress_flush_impl(compobject *self, PyTypeObject *cls, int mode)
|
|||
{
|
||||
int err;
|
||||
PyObject *return_value;
|
||||
_BlocksOutputBuffer buffer = {.list = NULL};
|
||||
_BlocksOutputBuffer buffer = {.writer = NULL};
|
||||
|
||||
zlibstate *state = PyType_GetModuleState(cls);
|
||||
/* Flushing with Z_NO_FLUSH is a no-op, so there's no point in
|
||||
|
@ -1267,7 +1267,7 @@ zlib_Decompress_flush_impl(compobject *self, PyTypeObject *cls,
|
|||
Py_buffer data;
|
||||
PyObject *return_value;
|
||||
Py_ssize_t ibuflen;
|
||||
_BlocksOutputBuffer buffer = {.list = NULL};
|
||||
_BlocksOutputBuffer buffer = {.writer = NULL};
|
||||
_Uint32Window window; // output buffer's UINT32_MAX sliding window
|
||||
|
||||
PyObject *module = PyType_GetModule(cls);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue