mirror of
https://github.com/python/cpython.git
synced 2025-10-24 18:33:49 +00:00
gh-129813, PEP 782: Use PyBytesWriter in utf8_encoder() (#138874)
Replace the private _PyBytesWriter API with the new public PyBytesWriter API in utf8_encoder() and unicode_encode_ucs1().
This commit is contained in:
parent
49e83e31bd
commit
8cfd7b4ecf
2 changed files with 98 additions and 77 deletions
|
|
@ -828,7 +828,7 @@ unicode_result_unchanged(PyObject *unicode)
|
|||
/* Implementation of the "backslashreplace" error handler for 8-bit encodings:
|
||||
ASCII, Latin1, UTF-8, etc. */
|
||||
static char*
|
||||
backslashreplace(_PyBytesWriter *writer, char *str,
|
||||
backslashreplace(PyBytesWriter *writer, char *str,
|
||||
PyObject *unicode, Py_ssize_t collstart, Py_ssize_t collend)
|
||||
{
|
||||
Py_ssize_t size, i;
|
||||
|
|
@ -861,9 +861,10 @@ backslashreplace(_PyBytesWriter *writer, char *str,
|
|||
size += incr;
|
||||
}
|
||||
|
||||
str = _PyBytesWriter_Prepare(writer, str, size);
|
||||
if (str == NULL)
|
||||
str = PyBytesWriter_GrowAndUpdatePointer(writer, size, str);
|
||||
if (str == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* generate replacement */
|
||||
for (i = collstart; i < collend; ++i) {
|
||||
|
|
@ -894,7 +895,7 @@ backslashreplace(_PyBytesWriter *writer, char *str,
|
|||
/* Implementation of the "xmlcharrefreplace" error handler for 8-bit encodings:
|
||||
ASCII, Latin1, UTF-8, etc. */
|
||||
static char*
|
||||
xmlcharrefreplace(_PyBytesWriter *writer, char *str,
|
||||
xmlcharrefreplace(PyBytesWriter *writer, char *str,
|
||||
PyObject *unicode, Py_ssize_t collstart, Py_ssize_t collend)
|
||||
{
|
||||
Py_ssize_t size, i;
|
||||
|
|
@ -935,9 +936,10 @@ xmlcharrefreplace(_PyBytesWriter *writer, char *str,
|
|||
size += incr;
|
||||
}
|
||||
|
||||
str = _PyBytesWriter_Prepare(writer, str, size);
|
||||
if (str == NULL)
|
||||
str = PyBytesWriter_GrowAndUpdatePointer(writer, size, str);
|
||||
if (str == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* generate replacement */
|
||||
for (i = collstart; i < collend; ++i) {
|
||||
|
|
@ -5828,7 +5830,7 @@ unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
|
|||
const void *data = PyUnicode_DATA(unicode);
|
||||
Py_ssize_t size = PyUnicode_GET_LENGTH(unicode);
|
||||
|
||||
_PyBytesWriter writer;
|
||||
PyBytesWriter *writer;
|
||||
char *end;
|
||||
|
||||
switch (kind) {
|
||||
|
|
@ -5837,21 +5839,24 @@ unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
|
|||
case PyUnicode_1BYTE_KIND:
|
||||
/* the string cannot be ASCII, or PyUnicode_UTF8() would be set */
|
||||
assert(!PyUnicode_IS_ASCII(unicode));
|
||||
end = ucs1lib_utf8_encoder(&writer, unicode, data, size, error_handler, errors);
|
||||
writer = ucs1lib_utf8_encoder(unicode, data, size,
|
||||
error_handler, errors, &end);
|
||||
break;
|
||||
case PyUnicode_2BYTE_KIND:
|
||||
end = ucs2lib_utf8_encoder(&writer, unicode, data, size, error_handler, errors);
|
||||
writer = ucs2lib_utf8_encoder(unicode, data, size,
|
||||
error_handler, errors, &end);
|
||||
break;
|
||||
case PyUnicode_4BYTE_KIND:
|
||||
end = ucs4lib_utf8_encoder(&writer, unicode, data, size, error_handler, errors);
|
||||
writer = ucs4lib_utf8_encoder(unicode, data, size,
|
||||
error_handler, errors, &end);
|
||||
break;
|
||||
}
|
||||
|
||||
if (end == NULL) {
|
||||
_PyBytesWriter_Dealloc(&writer);
|
||||
if (writer == NULL) {
|
||||
PyBytesWriter_Discard(writer);
|
||||
return NULL;
|
||||
}
|
||||
return _PyBytesWriter_Finish(&writer, end);
|
||||
return PyBytesWriter_FinishWithPointer(writer, end);
|
||||
}
|
||||
|
||||
static int
|
||||
|
|
@ -5865,37 +5870,35 @@ unicode_fill_utf8(PyObject *unicode)
|
|||
const void *data = PyUnicode_DATA(unicode);
|
||||
Py_ssize_t size = PyUnicode_GET_LENGTH(unicode);
|
||||
|
||||
_PyBytesWriter writer;
|
||||
PyBytesWriter *writer;
|
||||
char *end;
|
||||
|
||||
switch (kind) {
|
||||
default:
|
||||
Py_UNREACHABLE();
|
||||
case PyUnicode_1BYTE_KIND:
|
||||
end = ucs1lib_utf8_encoder(&writer, unicode, data, size,
|
||||
_Py_ERROR_STRICT, NULL);
|
||||
writer = ucs1lib_utf8_encoder(unicode, data, size,
|
||||
_Py_ERROR_STRICT, NULL, &end);
|
||||
break;
|
||||
case PyUnicode_2BYTE_KIND:
|
||||
end = ucs2lib_utf8_encoder(&writer, unicode, data, size,
|
||||
_Py_ERROR_STRICT, NULL);
|
||||
writer = ucs2lib_utf8_encoder(unicode, data, size,
|
||||
_Py_ERROR_STRICT, NULL, &end);
|
||||
break;
|
||||
case PyUnicode_4BYTE_KIND:
|
||||
end = ucs4lib_utf8_encoder(&writer, unicode, data, size,
|
||||
_Py_ERROR_STRICT, NULL);
|
||||
writer = ucs4lib_utf8_encoder(unicode, data, size,
|
||||
_Py_ERROR_STRICT, NULL, &end);
|
||||
break;
|
||||
}
|
||||
if (end == NULL) {
|
||||
_PyBytesWriter_Dealloc(&writer);
|
||||
if (writer == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
const char *start = writer.use_small_buffer ? writer.small_buffer :
|
||||
PyBytes_AS_STRING(writer.buffer);
|
||||
const char *start = PyBytesWriter_GetData(writer);
|
||||
Py_ssize_t len = end - start;
|
||||
|
||||
char *cache = PyMem_Malloc(len + 1);
|
||||
if (cache == NULL) {
|
||||
_PyBytesWriter_Dealloc(&writer);
|
||||
PyBytesWriter_Discard(writer);
|
||||
PyErr_NoMemory();
|
||||
return -1;
|
||||
}
|
||||
|
|
@ -5903,7 +5906,7 @@ unicode_fill_utf8(PyObject *unicode)
|
|||
cache[len] = '\0';
|
||||
PyUnicode_SET_UTF8_LENGTH(unicode, len);
|
||||
PyUnicode_SET_UTF8(unicode, cache);
|
||||
_PyBytesWriter_Dealloc(&writer);
|
||||
PyBytesWriter_Discard(writer);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -7323,16 +7326,12 @@ unicode_encode_ucs1(PyObject *unicode,
|
|||
Py_ssize_t pos=0, size;
|
||||
int kind;
|
||||
const void *data;
|
||||
/* pointer into the output */
|
||||
char *str;
|
||||
const char *encoding = (limit == 256) ? "latin-1" : "ascii";
|
||||
const char *reason = (limit == 256) ? "ordinal not in range(256)" : "ordinal not in range(128)";
|
||||
PyObject *error_handler_obj = NULL;
|
||||
PyObject *exc = NULL;
|
||||
_Py_error_handler error_handler = _Py_ERROR_UNKNOWN;
|
||||
PyObject *rep = NULL;
|
||||
/* output object */
|
||||
_PyBytesWriter writer;
|
||||
|
||||
size = PyUnicode_GET_LENGTH(unicode);
|
||||
kind = PyUnicode_KIND(unicode);
|
||||
|
|
@ -7342,10 +7341,13 @@ unicode_encode_ucs1(PyObject *unicode,
|
|||
if (size == 0)
|
||||
return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
|
||||
|
||||
_PyBytesWriter_Init(&writer);
|
||||
str = _PyBytesWriter_Alloc(&writer, size);
|
||||
if (str == NULL)
|
||||
/* output object */
|
||||
PyBytesWriter *writer = PyBytesWriter_Create(size);
|
||||
if (writer == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
/* pointer into the output */
|
||||
char *str = PyBytesWriter_GetData(writer);
|
||||
|
||||
while (pos < size) {
|
||||
Py_UCS4 ch = PyUnicode_READ(kind, data, pos);
|
||||
|
|
@ -7367,7 +7369,7 @@ unicode_encode_ucs1(PyObject *unicode,
|
|||
++collend;
|
||||
|
||||
/* Only overallocate the buffer if it's not the last write */
|
||||
writer.overallocate = (collend < size);
|
||||
writer->overallocate = (collend < size);
|
||||
|
||||
/* cache callback name lookup (if not done yet, i.e. it's the first error) */
|
||||
if (error_handler == _Py_ERROR_UNKNOWN)
|
||||
|
|
@ -7388,8 +7390,8 @@ unicode_encode_ucs1(PyObject *unicode,
|
|||
|
||||
case _Py_ERROR_BACKSLASHREPLACE:
|
||||
/* subtract preallocated bytes */
|
||||
writer.min_size -= (collend - collstart);
|
||||
str = backslashreplace(&writer, str,
|
||||
writer->size -= (collend - collstart);
|
||||
str = backslashreplace(writer, str,
|
||||
unicode, collstart, collend);
|
||||
if (str == NULL)
|
||||
goto onError;
|
||||
|
|
@ -7398,8 +7400,8 @@ unicode_encode_ucs1(PyObject *unicode,
|
|||
|
||||
case _Py_ERROR_XMLCHARREFREPLACE:
|
||||
/* subtract preallocated bytes */
|
||||
writer.min_size -= (collend - collstart);
|
||||
str = xmlcharrefreplace(&writer, str,
|
||||
writer->size -= (collend - collstart);
|
||||
str = xmlcharrefreplace(writer, str,
|
||||
unicode, collstart, collend);
|
||||
if (str == NULL)
|
||||
goto onError;
|
||||
|
|
@ -7430,24 +7432,27 @@ unicode_encode_ucs1(PyObject *unicode,
|
|||
goto onError;
|
||||
|
||||
if (newpos < collstart) {
|
||||
writer.overallocate = 1;
|
||||
str = _PyBytesWriter_Prepare(&writer, str,
|
||||
collstart - newpos);
|
||||
if (str == NULL)
|
||||
writer->overallocate = 1;
|
||||
str = PyBytesWriter_GrowAndUpdatePointer(writer,
|
||||
collstart - newpos,
|
||||
str);
|
||||
if (str == NULL) {
|
||||
goto onError;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* subtract preallocated bytes */
|
||||
writer.min_size -= newpos - collstart;
|
||||
writer->size -= newpos - collstart;
|
||||
/* Only overallocate the buffer if it's not the last write */
|
||||
writer.overallocate = (newpos < size);
|
||||
writer->overallocate = (newpos < size);
|
||||
}
|
||||
|
||||
char *rep_str;
|
||||
Py_ssize_t rep_len;
|
||||
if (PyBytes_Check(rep)) {
|
||||
/* Directly copy bytes result to output. */
|
||||
str = _PyBytesWriter_WriteBytes(&writer, str,
|
||||
PyBytes_AS_STRING(rep),
|
||||
PyBytes_GET_SIZE(rep));
|
||||
rep_str = PyBytes_AS_STRING(rep);
|
||||
rep_len = PyBytes_GET_SIZE(rep);
|
||||
}
|
||||
else {
|
||||
assert(PyUnicode_Check(rep));
|
||||
|
|
@ -7462,12 +7467,16 @@ unicode_encode_ucs1(PyObject *unicode,
|
|||
goto onError;
|
||||
}
|
||||
assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND);
|
||||
str = _PyBytesWriter_WriteBytes(&writer, str,
|
||||
PyUnicode_DATA(rep),
|
||||
PyUnicode_GET_LENGTH(rep));
|
||||
rep_str = PyUnicode_DATA(rep);
|
||||
rep_len = PyUnicode_GET_LENGTH(rep);
|
||||
}
|
||||
if (str == NULL)
|
||||
|
||||
str = PyBytesWriter_GrowAndUpdatePointer(writer, rep_len, str);
|
||||
if (str == NULL) {
|
||||
goto onError;
|
||||
}
|
||||
memcpy(str, rep_str, rep_len);
|
||||
str += rep_len;
|
||||
|
||||
pos = newpos;
|
||||
Py_CLEAR(rep);
|
||||
|
|
@ -7475,17 +7484,17 @@ unicode_encode_ucs1(PyObject *unicode,
|
|||
|
||||
/* If overallocation was disabled, ensure that it was the last
|
||||
write. Otherwise, we missed an optimization */
|
||||
assert(writer.overallocate || pos == size);
|
||||
assert(writer->overallocate || pos == size);
|
||||
}
|
||||
}
|
||||
|
||||
Py_XDECREF(error_handler_obj);
|
||||
Py_XDECREF(exc);
|
||||
return _PyBytesWriter_Finish(&writer, str);
|
||||
return PyBytesWriter_FinishWithPointer(writer, str);
|
||||
|
||||
onError:
|
||||
Py_XDECREF(rep);
|
||||
_PyBytesWriter_Dealloc(&writer);
|
||||
PyBytesWriter_Discard(writer);
|
||||
Py_XDECREF(error_handler_obj);
|
||||
Py_XDECREF(exc);
|
||||
return NULL;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue