#ifndef Py_INTERNAL_UNICODEOBJECT_H #define Py_INTERNAL_UNICODEOBJECT_H #ifdef __cplusplus extern "C" { #endif #ifndef Py_BUILD_CORE # error "this header requires Py_BUILD_CORE define" #endif #include "pycore_fileutils.h" // _Py_error_handler #include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI void _PyUnicode_ExactDealloc(PyObject *op); Py_ssize_t _PyUnicode_InternedSize(void); /* --- _PyUnicodeWriter API ----------------------------------------------- */ typedef struct { PyObject *buffer; void *data; int kind; Py_UCS4 maxchar; Py_ssize_t size; Py_ssize_t pos; /* minimum number of allocated characters (default: 0) */ Py_ssize_t min_length; /* minimum character (default: 127, ASCII) */ Py_UCS4 min_char; /* If non-zero, overallocate the buffer (default: 0). */ unsigned char overallocate; /* If readonly is 1, buffer is a shared string (cannot be modified) and size is set to 0. */ unsigned char readonly; } _PyUnicodeWriter ; /* Initialize a Unicode writer. * * By default, the minimum buffer size is 0 character and overallocation is * disabled. Set min_length, min_char and overallocate attributes to control * the allocation of the buffer. */ PyAPI_FUNC(void) _PyUnicodeWriter_Init(_PyUnicodeWriter *writer); /* Prepare the buffer to write 'length' characters with the specified maximum character. Return 0 on success, raise an exception and return -1 on error. */ #define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \ (((MAXCHAR) <= (WRITER)->maxchar \ && (LENGTH) <= (WRITER)->size - (WRITER)->pos) \ ? 0 \ : (((LENGTH) == 0) \ ? 0 \ : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR)))) /* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro instead. */ PyAPI_FUNC(int) _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, Py_ssize_t length, Py_UCS4 maxchar); /* Prepare the buffer to have at least the kind KIND. For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will support characters in range U+000-U+FFFF. Return 0 on success, raise an exception and return -1 on error. */ #define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \ ((KIND) <= (WRITER)->kind \ ? 0 \ : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND))) /* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind() macro instead. */ PyAPI_FUNC(int) _PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer, int kind); /* Append a Unicode character. Return 0 on success, raise an exception and return -1 on error. */ PyAPI_FUNC(int) _PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, Py_UCS4 ch ); /* Append a Unicode string. Return 0 on success, raise an exception and return -1 on error. */ PyAPI_FUNC(int) _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str /* Unicode string */ ); /* Append a substring of a Unicode string. Return 0 on success, raise an exception and return -1 on error. */ PyAPI_FUNC(int) _PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, PyObject *str, /* Unicode string */ Py_ssize_t start, Py_ssize_t end ); /* Append an ASCII-encoded byte string. Return 0 on success, raise an exception and return -1 on error. */ PyAPI_FUNC(int) _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer, const char *str, /* ASCII-encoded byte string */ Py_ssize_t len /* number of bytes, or -1 if unknown */ ); /* Append a latin1-encoded byte string. Return 0 on success, raise an exception and return -1 on error. */ PyAPI_FUNC(int) _PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer, const char *str, /* latin1-encoded byte string */ Py_ssize_t len /* length in bytes */ ); /* Get the value of the writer as a Unicode string. Clear the buffer of the writer. Raise an exception and return NULL on error. */ PyAPI_FUNC(PyObject *) _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer); /* Deallocate memory of a writer (clear its internal buffer). */ PyAPI_FUNC(void) _PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer); /* Format the object based on the format_spec, as defined in PEP 3101 (Advanced String Formatting). */ PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter( _PyUnicodeWriter *writer, PyObject *obj, PyObject *format_spec, Py_ssize_t start, Py_ssize_t end); /* --- Methods & Slots ---------------------------------------------------- */ /* Using explicit passed-in values, insert the thousands grouping into the string pointed to by buffer. For the argument descriptions, see Objects/stringlib/localeutil.h */ PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping( _PyUnicodeWriter *writer, Py_ssize_t n_buffer, PyObject *digits, Py_ssize_t d_pos, Py_ssize_t n_digits, Py_ssize_t min_width, const char *grouping, PyObject *thousands_sep, Py_UCS4 *maxchar); /* --- Runtime lifecycle -------------------------------------------------- */ extern void _PyUnicode_InitState(PyInterpreterState *); extern PyStatus _PyUnicode_InitGlobalObjects(PyInterpreterState *); extern PyStatus _PyUnicode_InitTypes(PyInterpreterState *); extern void _PyUnicode_Fini(PyInterpreterState *); extern void _PyUnicode_FiniTypes(PyInterpreterState *); extern PyTypeObject _PyUnicodeASCIIIter_Type; /* --- Other API ---------------------------------------------------------- */ struct _Py_unicode_runtime_ids { PyThread_type_lock lock; // next_index value must be preserved when Py_Initialize()/Py_Finalize() // is called multiple times: see _PyUnicode_FromId() implementation. Py_ssize_t next_index; }; struct _Py_unicode_runtime_state { struct _Py_unicode_runtime_ids ids; }; /* fs_codec.encoding is initialized to NULL. Later, it is set to a non-NULL string by _PyUnicode_InitEncodings(). */ struct _Py_unicode_fs_codec { char *encoding; // Filesystem encoding (encoded to UTF-8) int utf8; // encoding=="utf-8"? char *errors; // Filesystem errors (encoded to UTF-8) _Py_error_handler error_handler; }; struct _Py_unicode_ids { Py_ssize_t size; PyObject **array; }; struct _Py_unicode_state { struct _Py_unicode_fs_codec fs_codec; _PyUnicode_Name_CAPI *ucnhash_capi; // Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId() struct _Py_unicode_ids ids; }; extern void _PyUnicode_InternInPlace(PyInterpreterState *interp, PyObject **p); extern void _PyUnicode_ClearInterned(PyInterpreterState *interp); #ifdef __cplusplus } #endif #endif /* !Py_INTERNAL_UNICODEOBJECT_H */