gh-129813, PEP 782: Remove the private _PyBytesWriter API (#139264)

It is now replaced with the new public PyBytesWriter API (PEP 782).
This commit is contained in:
Victor Stinner 2025-09-23 17:29:55 +02:00 committed by GitHub
parent 5854cf38a2
commit dd45179fa0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 4 additions and 372 deletions

View file

@ -60,93 +60,7 @@ PyAPI_FUNC(void)
_PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
const char* src, Py_ssize_t len_src);
/* --- _PyBytesWriter ----------------------------------------------------- */
/* The _PyBytesWriter structure is big: it contains an embedded "stack buffer".
A _PyBytesWriter variable must be declared at the end of variables in a
function to optimize the memory allocation on the stack. */
typedef struct {
/* bytes, bytearray or NULL (when the small buffer is used) */
PyObject *buffer;
/* Number of allocated size. */
Py_ssize_t allocated;
/* Minimum number of allocated bytes,
incremented by _PyBytesWriter_Prepare() */
Py_ssize_t min_size;
/* If non-zero, use a bytearray instead of a bytes object for buffer. */
int use_bytearray;
/* If non-zero, overallocate the buffer (default: 0).
This flag must be zero if use_bytearray is non-zero. */
int overallocate;
/* Stack buffer */
int use_small_buffer;
char small_buffer[512];
} _PyBytesWriter;
/* Initialize a bytes writer
By default, the overallocation is disabled. Set the overallocate attribute
to control the allocation of the buffer.
Export _PyBytesWriter API for '_pickle' shared extension. */
PyAPI_FUNC(void) _PyBytesWriter_Init(_PyBytesWriter *writer);
/* Get the buffer content and reset the writer.
Return a bytes object, or a bytearray object if use_bytearray is non-zero.
Raise an exception and return NULL on error. */
PyAPI_FUNC(PyObject *) _PyBytesWriter_Finish(_PyBytesWriter *writer,
void *str);
/* Deallocate memory of a writer (clear its internal buffer). */
PyAPI_FUNC(void) _PyBytesWriter_Dealloc(_PyBytesWriter *writer);
/* Allocate the buffer to write size bytes.
Return the pointer to the beginning of buffer data.
Raise an exception and return NULL on error. */
PyAPI_FUNC(void*) _PyBytesWriter_Alloc(_PyBytesWriter *writer,
Py_ssize_t size);
/* Ensure that the buffer is large enough to write *size* bytes.
Add size to the writer minimum size (min_size attribute).
str is the current pointer inside the buffer.
Return the updated current pointer inside the buffer.
Raise an exception and return NULL on error. */
PyAPI_FUNC(void*) _PyBytesWriter_Prepare(_PyBytesWriter *writer,
void *str,
Py_ssize_t size);
/* Resize the buffer to make it larger.
The new buffer may be larger than size bytes because of overallocation.
Return the updated current pointer inside the buffer.
Raise an exception and return NULL on error.
Note: size must be greater than the number of allocated bytes in the writer.
This function doesn't use the writer minimum size (min_size attribute).
See also _PyBytesWriter_Prepare().
*/
PyAPI_FUNC(void*) _PyBytesWriter_Resize(_PyBytesWriter *writer,
void *str,
Py_ssize_t size);
/* Write bytes.
Raise an exception and return NULL on error. */
PyAPI_FUNC(void*) _PyBytesWriter_WriteBytes(_PyBytesWriter *writer,
void *str,
const void *bytes,
Py_ssize_t size);
// Export for '_testcapi' shared extension.
PyAPI_FUNC(PyBytesWriter*) _PyBytesWriter_CreateByteArray(
Py_ssize_t size);
/* --- PyBytesWriter ------------------------------------------------------ */
struct PyBytesWriter {
char small_buffer[256];
@ -156,6 +70,9 @@ struct PyBytesWriter {
int overallocate;
};
// Export for '_testcapi' shared extension
PyAPI_FUNC(PyBytesWriter*) _PyBytesWriter_CreateByteArray(Py_ssize_t size);
#ifdef __cplusplus
}
#endif

View file

@ -34,8 +34,6 @@ class bytes "PyBytesObject *" "&PyBytes_Type"
#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
/* Forward declaration */
Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
char *str);
static void* _PyBytesWriter_ResizeAndUpdatePointer(PyBytesWriter *writer,
Py_ssize_t size, void *data);
static Py_ssize_t _PyBytesWriter_GetAllocated(PyBytesWriter *writer);
@ -3453,288 +3451,6 @@ bytes_iter(PyObject *seq)
}
/* _PyBytesWriter API */
#ifdef MS_WINDOWS
/* On Windows, overallocate by 50% is the best factor */
# define OVERALLOCATE_FACTOR 2
#else
/* On Linux, overallocate by 25% is the best factor */
# define OVERALLOCATE_FACTOR 4
#endif
void
_PyBytesWriter_Init(_PyBytesWriter *writer)
{
/* Set all attributes before small_buffer to 0 */
memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
#ifndef NDEBUG
memset(writer->small_buffer, PYMEM_CLEANBYTE,
sizeof(writer->small_buffer));
#endif
}
void
_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
{
Py_CLEAR(writer->buffer);
}
Py_LOCAL_INLINE(char*)
_PyBytesWriter_AsString(_PyBytesWriter *writer)
{
if (writer->use_small_buffer) {
assert(writer->buffer == NULL);
return writer->small_buffer;
}
else if (writer->use_bytearray) {
assert(writer->buffer != NULL);
return PyByteArray_AS_STRING(writer->buffer);
}
else {
assert(writer->buffer != NULL);
return PyBytes_AS_STRING(writer->buffer);
}
}
Py_LOCAL_INLINE(Py_ssize_t)
_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
{
const char *start = _PyBytesWriter_AsString(writer);
assert(str != NULL);
assert(str >= start);
assert(str - start <= writer->allocated);
return str - start;
}
#ifndef NDEBUG
Py_LOCAL_INLINE(int)
_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
{
const char *start, *end;
if (writer->use_small_buffer) {
assert(writer->buffer == NULL);
}
else {
assert(writer->buffer != NULL);
if (writer->use_bytearray)
assert(PyByteArray_CheckExact(writer->buffer));
else
assert(PyBytes_CheckExact(writer->buffer));
assert(Py_REFCNT(writer->buffer) == 1);
}
if (writer->use_bytearray) {
/* bytearray has its own overallocation algorithm,
writer overallocation must be disabled */
assert(!writer->overallocate);
}
assert(0 <= writer->allocated);
assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
/* the last byte must always be null */
start = _PyBytesWriter_AsString(writer);
assert(start[writer->allocated] == 0);
end = start + writer->allocated;
assert(str != NULL);
assert(start <= str && str <= end);
return 1;
}
#endif
void*
_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
{
Py_ssize_t allocated, pos;
assert(_PyBytesWriter_CheckConsistency(writer, str));
assert(writer->allocated < size);
allocated = size;
if (writer->overallocate
&& allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
/* overallocate to limit the number of realloc() */
allocated += allocated / OVERALLOCATE_FACTOR;
}
pos = _PyBytesWriter_GetSize(writer, str);
if (!writer->use_small_buffer) {
if (writer->use_bytearray) {
if (PyByteArray_Resize(writer->buffer, allocated))
goto error;
/* writer->allocated can be smaller than writer->buffer->ob_alloc,
but we cannot use ob_alloc because bytes may need to be moved
to use the whole buffer. bytearray uses an internal optimization
to avoid moving or copying bytes when bytes are removed at the
beginning (ex: del bytearray[:1]). */
}
else {
if (_PyBytes_Resize(&writer->buffer, allocated))
goto error;
}
}
else {
/* convert from stack buffer to bytes object buffer */
assert(writer->buffer == NULL);
if (writer->use_bytearray)
writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
else
writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
if (writer->buffer == NULL)
goto error;
if (pos != 0) {
char *dest;
if (writer->use_bytearray)
dest = PyByteArray_AS_STRING(writer->buffer);
else
dest = PyBytes_AS_STRING(writer->buffer);
memcpy(dest,
writer->small_buffer,
pos);
}
writer->use_small_buffer = 0;
#ifndef NDEBUG
memset(writer->small_buffer, PYMEM_CLEANBYTE,
sizeof(writer->small_buffer));
#endif
}
writer->allocated = allocated;
str = _PyBytesWriter_AsString(writer) + pos;
assert(_PyBytesWriter_CheckConsistency(writer, str));
return str;
error:
_PyBytesWriter_Dealloc(writer);
return NULL;
}
void*
_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
{
Py_ssize_t new_min_size;
assert(_PyBytesWriter_CheckConsistency(writer, str));
assert(size >= 0);
if (size == 0) {
/* nothing to do */
return str;
}
if (writer->min_size > PY_SSIZE_T_MAX - size) {
PyErr_NoMemory();
_PyBytesWriter_Dealloc(writer);
return NULL;
}
new_min_size = writer->min_size + size;
if (new_min_size > writer->allocated)
str = _PyBytesWriter_Resize(writer, str, new_min_size);
writer->min_size = new_min_size;
return str;
}
/* Allocate the buffer to write size bytes.
Return the pointer to the beginning of buffer data.
Raise an exception and return NULL on error. */
void*
_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
{
/* ensure that _PyBytesWriter_Alloc() is only called once */
assert(writer->min_size == 0 && writer->buffer == NULL);
assert(size >= 0);
writer->use_small_buffer = 1;
#ifndef NDEBUG
writer->allocated = sizeof(writer->small_buffer) - 1;
/* In debug mode, don't use the full small buffer because it is less
efficient than bytes and bytearray objects to detect buffer underflow
and buffer overflow. Use 10 bytes of the small buffer to test also
code using the smaller buffer in debug mode.
Don't modify the _PyBytesWriter structure (use a shorter small buffer)
in debug mode to also be able to detect stack overflow when running
tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
if _Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
stack overflow. */
writer->allocated = Py_MIN(writer->allocated, 10);
/* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
to detect buffer overflow */
writer->small_buffer[writer->allocated] = 0;
#else
writer->allocated = sizeof(writer->small_buffer);
#endif
return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
}
PyObject *
_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
{
Py_ssize_t size;
PyObject *result;
assert(_PyBytesWriter_CheckConsistency(writer, str));
size = _PyBytesWriter_GetSize(writer, str);
if (size == 0 && !writer->use_bytearray) {
Py_CLEAR(writer->buffer);
/* Get the empty byte string singleton */
result = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
}
else if (writer->use_small_buffer) {
if (writer->use_bytearray) {
result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
}
else {
result = PyBytes_FromStringAndSize(writer->small_buffer, size);
}
}
else {
result = writer->buffer;
writer->buffer = NULL;
if (size != writer->allocated) {
if (writer->use_bytearray) {
if (PyByteArray_Resize(result, size)) {
Py_DECREF(result);
return NULL;
}
}
else {
if (_PyBytes_Resize(&result, size)) {
assert(result == NULL);
return NULL;
}
}
}
}
return result;
}
void*
_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
const void *bytes, Py_ssize_t size)
{
char *str = (char *)ptr;
str = _PyBytesWriter_Prepare(writer, str, size);
if (str == NULL)
return NULL;
memcpy(str, bytes, size);
str += size;
return str;
}
void
_PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
const char* src, Py_ssize_t len_src)
@ -3799,7 +3515,6 @@ byteswriter_allocated(PyBytesWriter *writer)
# define OVERALLOCATE_FACTOR 4
#endif
static inline int
byteswriter_resize(PyBytesWriter *writer, Py_ssize_t size, int resize)
{