gh-139353: Add Objects/unicode_writer.c file (#139911)

Move the public PyUnicodeWriter API and the private _PyUnicodeWriter
API to a new Objects/unicode_writer.c file.

Rename a few helper functions to share them between unicodeobject.c
and unicode_writer.c, such as resize_compact() or unicode_result().
This commit is contained in:
Victor Stinner 2025-10-30 14:36:15 +01:00 committed by GitHub
parent 75a1cbdd38
commit efc37ba49e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 717 additions and 638 deletions

View file

@ -17,6 +17,46 @@ extern "C" {
extern int _PyUnicode_IsModifiable(PyObject *unicode);
extern void _PyUnicodeWriter_InitWithBuffer(
_PyUnicodeWriter *writer,
PyObject *buffer);
extern PyObject* _PyUnicode_Result(PyObject *unicode);
extern int _PyUnicode_DecodeUTF8Writer(
_PyUnicodeWriter *writer,
const char *s,
Py_ssize_t size,
_Py_error_handler error_handler,
const char *errors,
Py_ssize_t *consumed);
extern PyObject* _PyUnicode_ResizeCompact(
PyObject *unicode,
Py_ssize_t length);
extern PyObject* _PyUnicode_GetEmpty(void);
/* Generic helper macro to convert characters of different types.
from_type and to_type have to be valid type names, begin and end
are pointers to the source characters which should be of type
"from_type *". to is a pointer of type "to_type *" and points to the
buffer where the result characters are written to. */
#define _PyUnicode_CONVERT_BYTES(from_type, to_type, begin, end, to) \
do { \
to_type *_to = (to_type *)(to); \
const from_type *_iter = (const from_type *)(begin);\
const from_type *_end = (const from_type *)(end);\
Py_ssize_t n = (_end) - (_iter); \
const from_type *_unrolled_end = \
_iter + _Py_SIZE_ROUND_DOWN(n, 4); \
while (_iter < (_unrolled_end)) { \
_to[0] = (to_type) _iter[0]; \
_to[1] = (to_type) _iter[1]; \
_to[2] = (to_type) _iter[2]; \
_to[3] = (to_type) _iter[3]; \
_iter += 4; _to += 4; \
} \
while (_iter < (_end)) \
*_to++ = (to_type) *_iter++; \
} while (0)
static inline void