mirror of
				https://github.com/python/cpython.git
				synced 2025-10-21 17:04:17 +00:00 
			
		
		
		
	Issue #23206: Make `json.dumps(..., ensure_ascii=False) as fast as the default case of ensure_ascii=True`.  Patch by Naoki Inada.
				
					
				
			This commit is contained in:
		
							parent
							
								
									2cae11e87e
								
							
						
					
					
						commit
						dc3eaa80d4
					
				
					 5 changed files with 142 additions and 7 deletions
				
			
		|  | @ -6,6 +6,10 @@ | ||||||
|     from _json import encode_basestring_ascii as c_encode_basestring_ascii |     from _json import encode_basestring_ascii as c_encode_basestring_ascii | ||||||
| except ImportError: | except ImportError: | ||||||
|     c_encode_basestring_ascii = None |     c_encode_basestring_ascii = None | ||||||
|  | try: | ||||||
|  |     from _json import encode_basestring as c_encode_basestring | ||||||
|  | except ImportError: | ||||||
|  |     c_encode_basestring = None | ||||||
| try: | try: | ||||||
|     from _json import make_encoder as c_make_encoder |     from _json import make_encoder as c_make_encoder | ||||||
| except ImportError: | except ImportError: | ||||||
|  | @ -30,7 +34,7 @@ | ||||||
| INFINITY = float('inf') | INFINITY = float('inf') | ||||||
| FLOAT_REPR = repr | FLOAT_REPR = repr | ||||||
| 
 | 
 | ||||||
| def encode_basestring(s): | def py_encode_basestring(s): | ||||||
|     """Return a JSON representation of a Python string |     """Return a JSON representation of a Python string | ||||||
| 
 | 
 | ||||||
|     """ |     """ | ||||||
|  | @ -39,6 +43,9 @@ def replace(match): | ||||||
|     return '"' + ESCAPE.sub(replace, s) + '"' |     return '"' + ESCAPE.sub(replace, s) + '"' | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | encode_basestring = (c_encode_basestring or py_encode_basestring) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def py_encode_basestring_ascii(s): | def py_encode_basestring_ascii(s): | ||||||
|     """Return an ASCII-only JSON representation of a Python string |     """Return an ASCII-only JSON representation of a Python string | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -11,9 +11,6 @@ | ||||||
|     (' s p a c e d ', '" s p a c e d "'), |     (' s p a c e d ', '" s p a c e d "'), | ||||||
|     ('\U0001d120', '"\\ud834\\udd20"'), |     ('\U0001d120', '"\\ud834\\udd20"'), | ||||||
|     ('\u03b1\u03a9', '"\\u03b1\\u03a9"'), |     ('\u03b1\u03a9', '"\\u03b1\\u03a9"'), | ||||||
|     ('\u03b1\u03a9', '"\\u03b1\\u03a9"'), |  | ||||||
|     ('\u03b1\u03a9', '"\\u03b1\\u03a9"'), |  | ||||||
|     ('\u03b1\u03a9', '"\\u03b1\\u03a9"'), |  | ||||||
|     ("`1~!@#$%^&*()_+-={':[,]}|;.</>?", '"`1~!@#$%^&*()_+-={\':[,]}|;.</>?"'), |     ("`1~!@#$%^&*()_+-={':[,]}|;.</>?", '"`1~!@#$%^&*()_+-={\':[,]}|;.</>?"'), | ||||||
|     ('\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), |     ('\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'), | ||||||
|     ('\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), |     ('\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'), | ||||||
|  |  | ||||||
|  | @ -626,6 +626,7 @@ Ali Ikinci | ||||||
| Aaron Iles | Aaron Iles | ||||||
| Lars Immisch | Lars Immisch | ||||||
| Bobby Impollonia | Bobby Impollonia | ||||||
|  | Naoki Inada | ||||||
| Meador Inge | Meador Inge | ||||||
| Peter Ingebretson | Peter Ingebretson | ||||||
| Tony Ingraldi | Tony Ingraldi | ||||||
|  |  | ||||||
|  | @ -203,6 +203,9 @@ Core and Builtins | ||||||
| Library | Library | ||||||
| ------- | ------- | ||||||
| 
 | 
 | ||||||
|  | - Issue #23206: Make ``json.dumps(..., ensure_ascii=False)`` as fast as the | ||||||
|  |   default case of ``ensure_ascii=True``.  Patch by Naoki Inada. | ||||||
|  | 
 | ||||||
| - Issue #23185: Add math.inf and math.nan constants. | - Issue #23185: Add math.inf and math.nan constants. | ||||||
| 
 | 
 | ||||||
| - Issue #23186: Add ssl.SSLObject.shared_ciphers() and | - Issue #23186: Add ssl.SSLObject.shared_ciphers() and | ||||||
|  |  | ||||||
							
								
								
									
										133
									
								
								Modules/_json.c
									
										
									
									
									
								
							
							
						
						
									
										133
									
								
								Modules/_json.c
									
										
									
									
									
								
							|  | @ -47,7 +47,7 @@ typedef struct _PyEncoderObject { | ||||||
|     PyObject *item_separator; |     PyObject *item_separator; | ||||||
|     PyObject *sort_keys; |     PyObject *sort_keys; | ||||||
|     PyObject *skipkeys; |     PyObject *skipkeys; | ||||||
|     int fast_encode; |     PyCFunction fast_encode; | ||||||
|     int allow_nan; |     int allow_nan; | ||||||
| } PyEncoderObject; | } PyEncoderObject; | ||||||
| 
 | 
 | ||||||
|  | @ -218,6 +218,97 @@ ascii_escape_unicode(PyObject *pystr) | ||||||
|     return rval; |     return rval; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static PyObject * | ||||||
|  | escape_unicode(PyObject *pystr) | ||||||
|  | { | ||||||
|  |     /* Take a PyUnicode pystr and return a new escaped PyUnicode */ | ||||||
|  |     Py_ssize_t i; | ||||||
|  |     Py_ssize_t input_chars; | ||||||
|  |     Py_ssize_t output_size; | ||||||
|  |     Py_ssize_t chars; | ||||||
|  |     PyObject *rval; | ||||||
|  |     void *input; | ||||||
|  |     int kind; | ||||||
|  |     Py_UCS4 maxchar; | ||||||
|  | 
 | ||||||
|  |     if (PyUnicode_READY(pystr) == -1) | ||||||
|  |         return NULL; | ||||||
|  | 
 | ||||||
|  |     maxchar = PyUnicode_MAX_CHAR_VALUE(pystr); | ||||||
|  |     input_chars = PyUnicode_GET_LENGTH(pystr); | ||||||
|  |     input = PyUnicode_DATA(pystr); | ||||||
|  |     kind = PyUnicode_KIND(pystr); | ||||||
|  | 
 | ||||||
|  |     /* Compute the output size */ | ||||||
|  |     for (i = 0, output_size = 2; i < input_chars; i++) { | ||||||
|  |         Py_UCS4 c = PyUnicode_READ(kind, input, i); | ||||||
|  |         switch (c) { | ||||||
|  |         case '\\': case '"': case '\b': case '\f': | ||||||
|  |         case '\n': case '\r': case '\t': | ||||||
|  |             output_size += 2; | ||||||
|  |             break; | ||||||
|  |         default: | ||||||
|  |             if (c <= 0x1f) | ||||||
|  |                 output_size += 6; | ||||||
|  |             else | ||||||
|  |                 output_size++; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     rval = PyUnicode_New(output_size, maxchar); | ||||||
|  |     if (rval == NULL) | ||||||
|  |         return NULL; | ||||||
|  | 
 | ||||||
|  |     kind = PyUnicode_KIND(rval); | ||||||
|  | 
 | ||||||
|  | #define ENCODE_OUTPUT do { \ | ||||||
|  |         chars = 0; \ | ||||||
|  |         output[chars++] = '"'; \ | ||||||
|  |         for (i = 0; i < input_chars; i++) { \ | ||||||
|  |             Py_UCS4 c = PyUnicode_READ(kind, input, i); \ | ||||||
|  |             switch (c) { \ | ||||||
|  |             case '\\': output[chars++] = '\\'; output[chars++] = c; break; \ | ||||||
|  |             case '"':  output[chars++] = '\\'; output[chars++] = c; break; \ | ||||||
|  |             case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \ | ||||||
|  |             case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \ | ||||||
|  |             case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \ | ||||||
|  |             case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \ | ||||||
|  |             case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \ | ||||||
|  |             default: \ | ||||||
|  |                 if (c <= 0x1f) { \ | ||||||
|  |                     output[chars++] = '\\'; \ | ||||||
|  |                     output[chars++] = 'u'; \ | ||||||
|  |                     output[chars++] = '0'; \ | ||||||
|  |                     output[chars++] = '0'; \ | ||||||
|  |                     output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \ | ||||||
|  |                     output[chars++] = Py_hexdigits[(c     ) & 0xf]; \ | ||||||
|  |                 } else { \ | ||||||
|  |                     output[chars++] = c; \ | ||||||
|  |                 } \ | ||||||
|  |             } \ | ||||||
|  |         } \ | ||||||
|  |         output[chars++] = '"'; \ | ||||||
|  |     } while (0) | ||||||
|  | 
 | ||||||
|  |     if (kind == PyUnicode_1BYTE_KIND) { | ||||||
|  |         Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval); | ||||||
|  |         ENCODE_OUTPUT; | ||||||
|  |     } else if (kind == PyUnicode_2BYTE_KIND) { | ||||||
|  |         Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval); | ||||||
|  |         ENCODE_OUTPUT; | ||||||
|  |     } else { | ||||||
|  |         Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval); | ||||||
|  |         assert(kind == PyUnicode_4BYTE_KIND); | ||||||
|  |         ENCODE_OUTPUT; | ||||||
|  |     } | ||||||
|  | #undef ENCODE_OUTPUT | ||||||
|  | 
 | ||||||
|  | #ifdef Py_DEBUG | ||||||
|  |     assert(_PyUnicode_CheckConsistency(rval, 1)); | ||||||
|  | #endif | ||||||
|  |     return rval; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static void | static void | ||||||
| raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) | raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) | ||||||
| { | { | ||||||
|  | @ -530,6 +621,31 @@ py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr) | ||||||
|     return rval; |     return rval; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
|  | PyDoc_STRVAR(pydoc_encode_basestring, | ||||||
|  |     "encode_basestring(string) -> string\n" | ||||||
|  |     "\n" | ||||||
|  |     "Return a JSON representation of a Python string" | ||||||
|  | ); | ||||||
|  | 
 | ||||||
|  | static PyObject * | ||||||
|  | py_encode_basestring(PyObject* self UNUSED, PyObject *pystr) | ||||||
|  | { | ||||||
|  |     PyObject *rval; | ||||||
|  |     /* Return a JSON representation of a Python string */ | ||||||
|  |     /* METH_O */ | ||||||
|  |     if (PyUnicode_Check(pystr)) { | ||||||
|  |         rval = escape_unicode(pystr); | ||||||
|  |     } | ||||||
|  |     else { | ||||||
|  |         PyErr_Format(PyExc_TypeError, | ||||||
|  |                      "first argument must be a string, not %.80s", | ||||||
|  |                      Py_TYPE(pystr)->tp_name); | ||||||
|  |         return NULL; | ||||||
|  |     } | ||||||
|  |     return rval; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static void | static void | ||||||
| scanner_dealloc(PyObject *self) | scanner_dealloc(PyObject *self) | ||||||
| { | { | ||||||
|  | @ -1223,7 +1339,14 @@ encoder_init(PyObject *self, PyObject *args, PyObject *kwds) | ||||||
|     s->item_separator = item_separator; |     s->item_separator = item_separator; | ||||||
|     s->sort_keys = sort_keys; |     s->sort_keys = sort_keys; | ||||||
|     s->skipkeys = skipkeys; |     s->skipkeys = skipkeys; | ||||||
|     s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii); |     s->fast_encode = NULL; | ||||||
|  |     if (PyCFunction_Check(s->encoder)) { | ||||||
|  |         PyCFunction f = PyCFunction_GetFunction(s->encoder); | ||||||
|  |         if (f == (PyCFunction)py_encode_basestring_ascii || | ||||||
|  |                 f == (PyCFunction)py_encode_basestring) { | ||||||
|  |             s->fast_encode = f; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|     s->allow_nan = PyObject_IsTrue(allow_nan); |     s->allow_nan = PyObject_IsTrue(allow_nan); | ||||||
| 
 | 
 | ||||||
|     Py_INCREF(s->markers); |     Py_INCREF(s->markers); | ||||||
|  | @ -1372,7 +1495,7 @@ encoder_encode_string(PyEncoderObject *s, PyObject *obj) | ||||||
| { | { | ||||||
|     /* Return the JSON representation of a string */ |     /* Return the JSON representation of a string */ | ||||||
|     if (s->fast_encode) |     if (s->fast_encode) | ||||||
|         return py_encode_basestring_ascii(NULL, obj); |         return s->fast_encode(NULL, obj); | ||||||
|     else |     else | ||||||
|         return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL); |         return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL); | ||||||
| } | } | ||||||
|  | @ -1840,6 +1963,10 @@ static PyMethodDef speedups_methods[] = { | ||||||
|         (PyCFunction)py_encode_basestring_ascii, |         (PyCFunction)py_encode_basestring_ascii, | ||||||
|         METH_O, |         METH_O, | ||||||
|         pydoc_encode_basestring_ascii}, |         pydoc_encode_basestring_ascii}, | ||||||
|  |     {"encode_basestring", | ||||||
|  |         (PyCFunction)py_encode_basestring, | ||||||
|  |         METH_O, | ||||||
|  |         pydoc_encode_basestring}, | ||||||
|     {"scanstring", |     {"scanstring", | ||||||
|         (PyCFunction)py_scanstring, |         (PyCFunction)py_scanstring, | ||||||
|         METH_VARARGS, |         METH_VARARGS, | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Antoine Pitrou
						Antoine Pitrou