mirror of
https://github.com/python/cpython.git
synced 2026-01-06 07:22:09 +00:00
gh-135336: Add fast path to json string encoding (#133239)
Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> Co-authored-by: Serhiy Storchaka <storchaka@gmail.com> Co-authored-by: Victor Stinner <vstinner@python.org>
This commit is contained in:
parent
7ab68cd506
commit
dec624e0af
2 changed files with 145 additions and 44 deletions
|
|
@ -0,0 +1 @@
|
|||
:mod:`json` now encodes strings up to 2.2x faster if they consist solely of characters that don’t require escaping.
|
||||
188
Modules/_json.c
188
Modules/_json.c
|
|
@ -51,7 +51,7 @@ typedef struct _PyEncoderObject {
|
|||
char sort_keys;
|
||||
char skipkeys;
|
||||
int allow_nan;
|
||||
PyCFunction fast_encode;
|
||||
int (*fast_encode)(PyUnicodeWriter *, PyObject *);
|
||||
} PyEncoderObject;
|
||||
|
||||
#define PyEncoderObject_CAST(op) ((PyEncoderObject *)(op))
|
||||
|
|
@ -102,8 +102,10 @@ static PyObject *
|
|||
_encoded_const(PyObject *obj);
|
||||
static void
|
||||
raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
|
||||
static PyObject *
|
||||
encoder_encode_string(PyEncoderObject *s, PyObject *obj);
|
||||
static int
|
||||
_steal_accumulate(PyUnicodeWriter *writer, PyObject *stolen);
|
||||
static int
|
||||
encoder_write_string(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj);
|
||||
static PyObject *
|
||||
encoder_encode_float(PyEncoderObject *s, PyObject *obj);
|
||||
|
||||
|
|
@ -146,22 +148,11 @@ ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
|
|||
return chars;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
ascii_escape_unicode(PyObject *pystr)
|
||||
static Py_ssize_t
|
||||
ascii_escape_size(const void *input, int kind, Py_ssize_t input_chars)
|
||||
{
|
||||
/* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
|
||||
Py_ssize_t i;
|
||||
Py_ssize_t input_chars;
|
||||
Py_ssize_t output_size;
|
||||
Py_ssize_t chars;
|
||||
PyObject *rval;
|
||||
const void *input;
|
||||
Py_UCS1 *output;
|
||||
int kind;
|
||||
|
||||
input_chars = PyUnicode_GET_LENGTH(pystr);
|
||||
input = PyUnicode_DATA(pystr);
|
||||
kind = PyUnicode_KIND(pystr);
|
||||
|
||||
/* Compute the output size */
|
||||
for (i = 0, output_size = 2; i < input_chars; i++) {
|
||||
|
|
@ -181,11 +172,22 @@ ascii_escape_unicode(PyObject *pystr)
|
|||
}
|
||||
if (output_size > PY_SSIZE_T_MAX - d) {
|
||||
PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
|
||||
return NULL;
|
||||
return -1;
|
||||
}
|
||||
output_size += d;
|
||||
}
|
||||
|
||||
return output_size;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
ascii_escape_unicode_and_size(const void *input, int kind, Py_ssize_t input_chars, Py_ssize_t output_size)
|
||||
{
|
||||
Py_ssize_t i;
|
||||
Py_ssize_t chars;
|
||||
PyObject *rval;
|
||||
Py_UCS1 *output;
|
||||
|
||||
rval = PyUnicode_New(output_size, 127);
|
||||
if (rval == NULL) {
|
||||
return NULL;
|
||||
|
|
@ -210,23 +212,62 @@ ascii_escape_unicode(PyObject *pystr)
|
|||
}
|
||||
|
||||
static PyObject *
|
||||
escape_unicode(PyObject *pystr)
|
||||
ascii_escape_unicode(PyObject *pystr)
|
||||
{
|
||||
/* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
|
||||
Py_ssize_t input_chars = PyUnicode_GET_LENGTH(pystr);
|
||||
const void *input = PyUnicode_DATA(pystr);
|
||||
int kind = PyUnicode_KIND(pystr);
|
||||
|
||||
Py_ssize_t output_size = ascii_escape_size(input, kind, input_chars);
|
||||
if (output_size < 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return ascii_escape_unicode_and_size(input, kind, input_chars, output_size);
|
||||
}
|
||||
|
||||
static int
|
||||
write_escaped_ascii(PyUnicodeWriter *writer, PyObject *pystr)
|
||||
{
|
||||
/* Take a PyUnicode pystr and return a new escaped PyUnicode */
|
||||
Py_ssize_t i;
|
||||
Py_ssize_t input_chars;
|
||||
Py_ssize_t output_size;
|
||||
Py_ssize_t chars;
|
||||
PyObject *rval;
|
||||
const void *input;
|
||||
int kind;
|
||||
Py_UCS4 maxchar;
|
||||
|
||||
maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
|
||||
input_chars = PyUnicode_GET_LENGTH(pystr);
|
||||
input = PyUnicode_DATA(pystr);
|
||||
kind = PyUnicode_KIND(pystr);
|
||||
|
||||
Py_ssize_t output_size = ascii_escape_size(input, kind, input_chars);
|
||||
if (output_size < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (output_size == input_chars + 2) {
|
||||
/* No need to escape anything */
|
||||
if (PyUnicodeWriter_WriteChar(writer, '"') < 0) {
|
||||
return -1;
|
||||
}
|
||||
if (PyUnicodeWriter_WriteStr(writer, pystr) < 0) {
|
||||
return -1;
|
||||
}
|
||||
return PyUnicodeWriter_WriteChar(writer, '"');
|
||||
}
|
||||
|
||||
PyObject *rval = ascii_escape_unicode_and_size(input, kind, input_chars, output_size);
|
||||
if (rval == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return _steal_accumulate(writer, rval);
|
||||
}
|
||||
|
||||
static Py_ssize_t
|
||||
escape_size(const void *input, int kind, Py_ssize_t input_chars)
|
||||
{
|
||||
Py_ssize_t i;
|
||||
Py_ssize_t output_size;
|
||||
|
||||
/* Compute the output size */
|
||||
for (i = 0, output_size = 2; i < input_chars; i++) {
|
||||
Py_UCS4 c = PyUnicode_READ(kind, input, i);
|
||||
|
|
@ -244,11 +285,21 @@ escape_unicode(PyObject *pystr)
|
|||
}
|
||||
if (output_size > PY_SSIZE_T_MAX - d) {
|
||||
PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
|
||||
return NULL;
|
||||
return -1;
|
||||
}
|
||||
output_size += d;
|
||||
}
|
||||
|
||||
return output_size;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
escape_unicode_and_size(const void *input, int kind, Py_UCS4 maxchar, Py_ssize_t input_chars, Py_ssize_t output_size)
|
||||
{
|
||||
Py_ssize_t i;
|
||||
Py_ssize_t chars;
|
||||
PyObject *rval;
|
||||
|
||||
rval = PyUnicode_New(output_size, maxchar);
|
||||
if (rval == NULL)
|
||||
return NULL;
|
||||
|
|
@ -303,6 +354,55 @@ escape_unicode(PyObject *pystr)
|
|||
return rval;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
escape_unicode(PyObject *pystr)
|
||||
{
|
||||
/* Take a PyUnicode pystr and return a new escaped PyUnicode */
|
||||
Py_ssize_t input_chars = PyUnicode_GET_LENGTH(pystr);
|
||||
const void *input = PyUnicode_DATA(pystr);
|
||||
int kind = PyUnicode_KIND(pystr);
|
||||
Py_UCS4 maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
|
||||
|
||||
Py_ssize_t output_size = escape_size(input, kind, input_chars);
|
||||
if (output_size < 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return escape_unicode_and_size(input, kind, maxchar, input_chars, output_size);
|
||||
}
|
||||
|
||||
static int
|
||||
write_escaped_unicode(PyUnicodeWriter *writer, PyObject *pystr)
|
||||
{
|
||||
Py_ssize_t input_chars = PyUnicode_GET_LENGTH(pystr);
|
||||
const void *input = PyUnicode_DATA(pystr);
|
||||
int kind = PyUnicode_KIND(pystr);
|
||||
Py_UCS4 maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
|
||||
|
||||
Py_ssize_t output_size = escape_size(input, kind, input_chars);
|
||||
if (output_size < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (output_size == input_chars + 2) {
|
||||
/* No need to escape anything */
|
||||
if (PyUnicodeWriter_WriteChar(writer, '"') < 0) {
|
||||
return -1;
|
||||
}
|
||||
if (PyUnicodeWriter_WriteStr(writer, pystr) < 0) {
|
||||
return -1;
|
||||
}
|
||||
return PyUnicodeWriter_WriteChar(writer, '"');
|
||||
}
|
||||
|
||||
PyObject *rval = escape_unicode_and_size(input, kind, maxchar, input_chars, output_size);
|
||||
if (rval == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return _steal_accumulate(writer, rval);
|
||||
}
|
||||
|
||||
static void
|
||||
raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
|
||||
{
|
||||
|
|
@ -1256,8 +1356,11 @@ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
|||
|
||||
if (PyCFunction_Check(s->encoder)) {
|
||||
PyCFunction f = PyCFunction_GetFunction(s->encoder);
|
||||
if (f == py_encode_basestring_ascii || f == py_encode_basestring) {
|
||||
s->fast_encode = f;
|
||||
if (f == py_encode_basestring_ascii) {
|
||||
s->fast_encode = write_escaped_ascii;
|
||||
}
|
||||
else if (f == py_encode_basestring) {
|
||||
s->fast_encode = write_escaped_unicode;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1438,24 +1541,27 @@ encoder_encode_float(PyEncoderObject *s, PyObject *obj)
|
|||
return PyFloat_Type.tp_repr(obj);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
encoder_encode_string(PyEncoderObject *s, PyObject *obj)
|
||||
static int
|
||||
encoder_write_string(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj)
|
||||
{
|
||||
/* Return the JSON representation of a string */
|
||||
PyObject *encoded;
|
||||
|
||||
if (s->fast_encode) {
|
||||
return s->fast_encode(NULL, obj);
|
||||
return s->fast_encode(writer, obj);
|
||||
}
|
||||
encoded = PyObject_CallOneArg(s->encoder, obj);
|
||||
if (encoded != NULL && !PyUnicode_Check(encoded)) {
|
||||
if (encoded == NULL) {
|
||||
return -1;
|
||||
}
|
||||
if (!PyUnicode_Check(encoded)) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"encoder() must return a string, not %.80s",
|
||||
Py_TYPE(encoded)->tp_name);
|
||||
Py_DECREF(encoded);
|
||||
return NULL;
|
||||
return -1;
|
||||
}
|
||||
return encoded;
|
||||
return _steal_accumulate(writer, encoded);
|
||||
}
|
||||
|
||||
static int
|
||||
|
|
@ -1486,10 +1592,7 @@ encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer,
|
|||
return PyUnicodeWriter_WriteASCII(writer, "false", 5);
|
||||
}
|
||||
else if (PyUnicode_Check(obj)) {
|
||||
PyObject *encoded = encoder_encode_string(s, obj);
|
||||
if (encoded == NULL)
|
||||
return -1;
|
||||
return _steal_accumulate(writer, encoded);
|
||||
return encoder_write_string(s, writer, obj);
|
||||
}
|
||||
else if (PyLong_Check(obj)) {
|
||||
if (PyLong_CheckExact(obj)) {
|
||||
|
|
@ -1578,7 +1681,7 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs
|
|||
PyObject *item_separator)
|
||||
{
|
||||
PyObject *keystr = NULL;
|
||||
PyObject *encoded;
|
||||
int rv;
|
||||
|
||||
if (PyUnicode_Check(key)) {
|
||||
keystr = Py_NewRef(key);
|
||||
|
|
@ -1624,13 +1727,10 @@ encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *firs
|
|||
}
|
||||
}
|
||||
|
||||
encoded = encoder_encode_string(s, keystr);
|
||||
rv = encoder_write_string(s, writer, keystr);
|
||||
Py_DECREF(keystr);
|
||||
if (encoded == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (_steal_accumulate(writer, encoded) < 0) {
|
||||
if (rv < 0) {
|
||||
return -1;
|
||||
}
|
||||
if (PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue