mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 05:31:20 +00:00 
			
		
		
		
	gh-122163: Add notes for JSON serialization errors (GH-122165)
This allows to identify the source of the error.
This commit is contained in:
		
							parent
							
								
									c908d1f87d
								
							
						
					
					
						commit
						e6b25e9a09
					
				
					 8 changed files with 135 additions and 66 deletions
				
			
		|  | @ -112,6 +112,13 @@ Added support for converting any objects that have the | ||||||
| :meth:`!as_integer_ratio` method to a :class:`~fractions.Fraction`. | :meth:`!as_integer_ratio` method to a :class:`~fractions.Fraction`. | ||||||
| (Contributed by Serhiy Storchaka in :gh:`82017`.) | (Contributed by Serhiy Storchaka in :gh:`82017`.) | ||||||
| 
 | 
 | ||||||
|  | json | ||||||
|  | ---- | ||||||
|  | 
 | ||||||
|  | Add notes for JSON serialization errors that allow to identify the source | ||||||
|  | of the error. | ||||||
|  | (Contributed by Serhiy Storchaka in :gh:`122163`.) | ||||||
|  | 
 | ||||||
| os | os | ||||||
| -- | -- | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -161,7 +161,8 @@ extern PyObject* _Py_Offer_Suggestions(PyObject* exception); | ||||||
| PyAPI_FUNC(Py_ssize_t) _Py_UTF8_Edit_Cost(PyObject *str_a, PyObject *str_b, | PyAPI_FUNC(Py_ssize_t) _Py_UTF8_Edit_Cost(PyObject *str_a, PyObject *str_b, | ||||||
|                                           Py_ssize_t max_cost); |                                           Py_ssize_t max_cost); | ||||||
| 
 | 
 | ||||||
| void _PyErr_FormatNote(const char *format, ...); | // Export for '_json' shared extension
 | ||||||
|  | PyAPI_FUNC(void) _PyErr_FormatNote(const char *format, ...); | ||||||
| 
 | 
 | ||||||
| /* Context manipulation (PEP 3134) */ | /* Context manipulation (PEP 3134) */ | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -293,37 +293,40 @@ def _iterencode_list(lst, _current_indent_level): | ||||||
|         else: |         else: | ||||||
|             newline_indent = None |             newline_indent = None | ||||||
|             separator = _item_separator |             separator = _item_separator | ||||||
|         first = True |         for i, value in enumerate(lst): | ||||||
|         for value in lst: |             if i: | ||||||
|             if first: |  | ||||||
|                 first = False |  | ||||||
|             else: |  | ||||||
|                 buf = separator |                 buf = separator | ||||||
|             if isinstance(value, str): |             try: | ||||||
|                 yield buf + _encoder(value) |                 if isinstance(value, str): | ||||||
|             elif value is None: |                     yield buf + _encoder(value) | ||||||
|                 yield buf + 'null' |                 elif value is None: | ||||||
|             elif value is True: |                     yield buf + 'null' | ||||||
|                 yield buf + 'true' |                 elif value is True: | ||||||
|             elif value is False: |                     yield buf + 'true' | ||||||
|                 yield buf + 'false' |                 elif value is False: | ||||||
|             elif isinstance(value, int): |                     yield buf + 'false' | ||||||
|                 # Subclasses of int/float may override __repr__, but we still |                 elif isinstance(value, int): | ||||||
|                 # want to encode them as integers/floats in JSON. One example |                     # Subclasses of int/float may override __repr__, but we still | ||||||
|                 # within the standard library is IntEnum. |                     # want to encode them as integers/floats in JSON. One example | ||||||
|                 yield buf + _intstr(value) |                     # within the standard library is IntEnum. | ||||||
|             elif isinstance(value, float): |                     yield buf + _intstr(value) | ||||||
|                 # see comment above for int |                 elif isinstance(value, float): | ||||||
|                 yield buf + _floatstr(value) |                     # see comment above for int | ||||||
|             else: |                     yield buf + _floatstr(value) | ||||||
|                 yield buf |  | ||||||
|                 if isinstance(value, (list, tuple)): |  | ||||||
|                     chunks = _iterencode_list(value, _current_indent_level) |  | ||||||
|                 elif isinstance(value, dict): |  | ||||||
|                     chunks = _iterencode_dict(value, _current_indent_level) |  | ||||||
|                 else: |                 else: | ||||||
|                     chunks = _iterencode(value, _current_indent_level) |                     yield buf | ||||||
|                 yield from chunks |                     if isinstance(value, (list, tuple)): | ||||||
|  |                         chunks = _iterencode_list(value, _current_indent_level) | ||||||
|  |                     elif isinstance(value, dict): | ||||||
|  |                         chunks = _iterencode_dict(value, _current_indent_level) | ||||||
|  |                     else: | ||||||
|  |                         chunks = _iterencode(value, _current_indent_level) | ||||||
|  |                     yield from chunks | ||||||
|  |             except GeneratorExit: | ||||||
|  |                 raise | ||||||
|  |             except BaseException as exc: | ||||||
|  |                 exc.add_note(f'when serializing {type(lst).__name__} item {i}') | ||||||
|  |                 raise | ||||||
|         if newline_indent is not None: |         if newline_indent is not None: | ||||||
|             _current_indent_level -= 1 |             _current_indent_level -= 1 | ||||||
|             yield '\n' + _indent * _current_indent_level |             yield '\n' + _indent * _current_indent_level | ||||||
|  | @ -382,28 +385,34 @@ def _iterencode_dict(dct, _current_indent_level): | ||||||
|                 yield item_separator |                 yield item_separator | ||||||
|             yield _encoder(key) |             yield _encoder(key) | ||||||
|             yield _key_separator |             yield _key_separator | ||||||
|             if isinstance(value, str): |             try: | ||||||
|                 yield _encoder(value) |                 if isinstance(value, str): | ||||||
|             elif value is None: |                     yield _encoder(value) | ||||||
|                 yield 'null' |                 elif value is None: | ||||||
|             elif value is True: |                     yield 'null' | ||||||
|                 yield 'true' |                 elif value is True: | ||||||
|             elif value is False: |                     yield 'true' | ||||||
|                 yield 'false' |                 elif value is False: | ||||||
|             elif isinstance(value, int): |                     yield 'false' | ||||||
|                 # see comment for int/float in _make_iterencode |                 elif isinstance(value, int): | ||||||
|                 yield _intstr(value) |                     # see comment for int/float in _make_iterencode | ||||||
|             elif isinstance(value, float): |                     yield _intstr(value) | ||||||
|                 # see comment for int/float in _make_iterencode |                 elif isinstance(value, float): | ||||||
|                 yield _floatstr(value) |                     # see comment for int/float in _make_iterencode | ||||||
|             else: |                     yield _floatstr(value) | ||||||
|                 if isinstance(value, (list, tuple)): |  | ||||||
|                     chunks = _iterencode_list(value, _current_indent_level) |  | ||||||
|                 elif isinstance(value, dict): |  | ||||||
|                     chunks = _iterencode_dict(value, _current_indent_level) |  | ||||||
|                 else: |                 else: | ||||||
|                     chunks = _iterencode(value, _current_indent_level) |                     if isinstance(value, (list, tuple)): | ||||||
|                 yield from chunks |                         chunks = _iterencode_list(value, _current_indent_level) | ||||||
|  |                     elif isinstance(value, dict): | ||||||
|  |                         chunks = _iterencode_dict(value, _current_indent_level) | ||||||
|  |                     else: | ||||||
|  |                         chunks = _iterencode(value, _current_indent_level) | ||||||
|  |                     yield from chunks | ||||||
|  |             except GeneratorExit: | ||||||
|  |                 raise | ||||||
|  |             except BaseException as exc: | ||||||
|  |                 exc.add_note(f'when serializing {type(dct).__name__} item {key!r}') | ||||||
|  |                 raise | ||||||
|         if newline_indent is not None: |         if newline_indent is not None: | ||||||
|             _current_indent_level -= 1 |             _current_indent_level -= 1 | ||||||
|             yield '\n' + _indent * _current_indent_level |             yield '\n' + _indent * _current_indent_level | ||||||
|  | @ -436,8 +445,14 @@ def _iterencode(o, _current_indent_level): | ||||||
|                 if markerid in markers: |                 if markerid in markers: | ||||||
|                     raise ValueError("Circular reference detected") |                     raise ValueError("Circular reference detected") | ||||||
|                 markers[markerid] = o |                 markers[markerid] = o | ||||||
|             o = _default(o) |             newobj = _default(o) | ||||||
|             yield from _iterencode(o, _current_indent_level) |             try: | ||||||
|  |                 yield from _iterencode(newobj, _current_indent_level) | ||||||
|  |             except GeneratorExit: | ||||||
|  |                 raise | ||||||
|  |             except BaseException as exc: | ||||||
|  |                 exc.add_note(f'when serializing {type(o).__name__} object') | ||||||
|  |                 raise | ||||||
|             if markers is not None: |             if markers is not None: | ||||||
|                 del markers[markerid] |                 del markers[markerid] | ||||||
|     return _iterencode |     return _iterencode | ||||||
|  |  | ||||||
|  | @ -8,6 +8,24 @@ def test_default(self): | ||||||
|             self.dumps(type, default=repr), |             self.dumps(type, default=repr), | ||||||
|             self.dumps(repr(type))) |             self.dumps(repr(type))) | ||||||
| 
 | 
 | ||||||
|  |     def test_bad_default(self): | ||||||
|  |         def default(obj): | ||||||
|  |             if obj is NotImplemented: | ||||||
|  |                 raise ValueError | ||||||
|  |             if obj is ...: | ||||||
|  |                 return NotImplemented | ||||||
|  |             if obj is type: | ||||||
|  |                 return collections | ||||||
|  |             return [...] | ||||||
|  | 
 | ||||||
|  |         with self.assertRaises(ValueError) as cm: | ||||||
|  |             self.dumps(type, default=default) | ||||||
|  |         self.assertEqual(cm.exception.__notes__, | ||||||
|  |                          ['when serializing ellipsis object', | ||||||
|  |                           'when serializing list item 0', | ||||||
|  |                           'when serializing module object', | ||||||
|  |                           'when serializing type object']) | ||||||
|  | 
 | ||||||
|     def test_ordereddict(self): |     def test_ordereddict(self): | ||||||
|         od = collections.OrderedDict(a=1, b=2, c=3, d=4) |         od = collections.OrderedDict(a=1, b=2, c=3, d=4) | ||||||
|         od.move_to_end('b') |         od.move_to_end('b') | ||||||
|  |  | ||||||
|  | @ -100,8 +100,27 @@ def test_non_string_keys_dict(self): | ||||||
|     def test_not_serializable(self): |     def test_not_serializable(self): | ||||||
|         import sys |         import sys | ||||||
|         with self.assertRaisesRegex(TypeError, |         with self.assertRaisesRegex(TypeError, | ||||||
|                 'Object of type module is not JSON serializable'): |                 'Object of type module is not JSON serializable') as cm: | ||||||
|             self.dumps(sys) |             self.dumps(sys) | ||||||
|  |         self.assertFalse(hasattr(cm.exception, '__notes__')) | ||||||
|  | 
 | ||||||
|  |         with self.assertRaises(TypeError) as cm: | ||||||
|  |             self.dumps([1, [2, 3, sys]]) | ||||||
|  |         self.assertEqual(cm.exception.__notes__, | ||||||
|  |                          ['when serializing list item 2', | ||||||
|  |                           'when serializing list item 1']) | ||||||
|  | 
 | ||||||
|  |         with self.assertRaises(TypeError) as cm: | ||||||
|  |             self.dumps((1, (2, 3, sys))) | ||||||
|  |         self.assertEqual(cm.exception.__notes__, | ||||||
|  |                          ['when serializing tuple item 2', | ||||||
|  |                           'when serializing tuple item 1']) | ||||||
|  | 
 | ||||||
|  |         with self.assertRaises(TypeError) as cm: | ||||||
|  |             self.dumps({'a': {'b': sys}}) | ||||||
|  |         self.assertEqual(cm.exception.__notes__, | ||||||
|  |                          ["when serializing dict item 'b'", | ||||||
|  |                           "when serializing dict item 'a'"]) | ||||||
| 
 | 
 | ||||||
|     def test_truncated_input(self): |     def test_truncated_input(self): | ||||||
|         test_cases = [ |         test_cases = [ | ||||||
|  |  | ||||||
|  | @ -12,8 +12,8 @@ def test_listrecursion(self): | ||||||
|         x.append(x) |         x.append(x) | ||||||
|         try: |         try: | ||||||
|             self.dumps(x) |             self.dumps(x) | ||||||
|         except ValueError: |         except ValueError as exc: | ||||||
|             pass |             self.assertEqual(exc.__notes__, ["when serializing list item 0"]) | ||||||
|         else: |         else: | ||||||
|             self.fail("didn't raise ValueError on list recursion") |             self.fail("didn't raise ValueError on list recursion") | ||||||
|         x = [] |         x = [] | ||||||
|  | @ -21,8 +21,8 @@ def test_listrecursion(self): | ||||||
|         x.append(y) |         x.append(y) | ||||||
|         try: |         try: | ||||||
|             self.dumps(x) |             self.dumps(x) | ||||||
|         except ValueError: |         except ValueError as exc: | ||||||
|             pass |             self.assertEqual(exc.__notes__, ["when serializing list item 0"]*2) | ||||||
|         else: |         else: | ||||||
|             self.fail("didn't raise ValueError on alternating list recursion") |             self.fail("didn't raise ValueError on alternating list recursion") | ||||||
|         y = [] |         y = [] | ||||||
|  | @ -35,8 +35,8 @@ def test_dictrecursion(self): | ||||||
|         x["test"] = x |         x["test"] = x | ||||||
|         try: |         try: | ||||||
|             self.dumps(x) |             self.dumps(x) | ||||||
|         except ValueError: |         except ValueError as exc: | ||||||
|             pass |             self.assertEqual(exc.__notes__, ["when serializing dict item 'test'"]) | ||||||
|         else: |         else: | ||||||
|             self.fail("didn't raise ValueError on dict recursion") |             self.fail("didn't raise ValueError on dict recursion") | ||||||
|         x = {} |         x = {} | ||||||
|  | @ -60,8 +60,10 @@ def default(self, o): | ||||||
|         enc.recurse = True |         enc.recurse = True | ||||||
|         try: |         try: | ||||||
|             enc.encode(JSONTestObject) |             enc.encode(JSONTestObject) | ||||||
|         except ValueError: |         except ValueError as exc: | ||||||
|             pass |             self.assertEqual(exc.__notes__, | ||||||
|  |                              ["when serializing list item 0", | ||||||
|  |                               "when serializing type object"]) | ||||||
|         else: |         else: | ||||||
|             self.fail("didn't raise ValueError on default recursion") |             self.fail("didn't raise ValueError on default recursion") | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -0,0 +1,2 @@ | ||||||
|  | Add notes for JSON serialization errors that allow to identify the source of | ||||||
|  | the error. | ||||||
|  | @ -11,6 +11,7 @@ | ||||||
| #include "Python.h" | #include "Python.h" | ||||||
| #include "pycore_ceval.h"           // _Py_EnterRecursiveCall() | #include "pycore_ceval.h"           // _Py_EnterRecursiveCall() | ||||||
| #include "pycore_runtime.h"         // _PyRuntime | #include "pycore_runtime.h"         // _PyRuntime | ||||||
|  | #include "pycore_pyerrors.h"        // _PyErr_FormatNote | ||||||
| 
 | 
 | ||||||
| #include "pycore_global_strings.h"  // _Py_ID() | #include "pycore_global_strings.h"  // _Py_ID() | ||||||
| #include <stdbool.h>                // bool | #include <stdbool.h>                // bool | ||||||
|  | @ -1461,6 +1462,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, | ||||||
| 
 | 
 | ||||||
|         Py_DECREF(newobj); |         Py_DECREF(newobj); | ||||||
|         if (rv) { |         if (rv) { | ||||||
|  |             _PyErr_FormatNote("when serializing %T object", obj); | ||||||
|             Py_XDECREF(ident); |             Py_XDECREF(ident); | ||||||
|             return -1; |             return -1; | ||||||
|         } |         } | ||||||
|  | @ -1477,7 +1479,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, | ||||||
| 
 | 
 | ||||||
| static int | static int | ||||||
| encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first, | encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first, | ||||||
|                          PyObject *key, PyObject *value, |                          PyObject *dct, PyObject *key, PyObject *value, | ||||||
|                          PyObject *newline_indent, |                          PyObject *newline_indent, | ||||||
|                          PyObject *item_separator) |                          PyObject *item_separator) | ||||||
| { | { | ||||||
|  | @ -1535,6 +1537,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir | ||||||
|         return -1; |         return -1; | ||||||
|     } |     } | ||||||
|     if (encoder_listencode_obj(s, writer, value, newline_indent) < 0) { |     if (encoder_listencode_obj(s, writer, value, newline_indent) < 0) { | ||||||
|  |         _PyErr_FormatNote("when serializing %T item %R", dct, key); | ||||||
|         return -1; |         return -1; | ||||||
|     } |     } | ||||||
|     return 0; |     return 0; | ||||||
|  | @ -1606,7 +1609,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, | ||||||
| 
 | 
 | ||||||
|             key = PyTuple_GET_ITEM(item, 0); |             key = PyTuple_GET_ITEM(item, 0); | ||||||
|             value = PyTuple_GET_ITEM(item, 1); |             value = PyTuple_GET_ITEM(item, 1); | ||||||
|             if (encoder_encode_key_value(s, writer, &first, key, value, |             if (encoder_encode_key_value(s, writer, &first, dct, key, value, | ||||||
|                                          new_newline_indent, |                                          new_newline_indent, | ||||||
|                                          current_item_separator) < 0) |                                          current_item_separator) < 0) | ||||||
|                 goto bail; |                 goto bail; | ||||||
|  | @ -1616,7 +1619,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, | ||||||
|     } else { |     } else { | ||||||
|         Py_ssize_t pos = 0; |         Py_ssize_t pos = 0; | ||||||
|         while (PyDict_Next(dct, &pos, &key, &value)) { |         while (PyDict_Next(dct, &pos, &key, &value)) { | ||||||
|             if (encoder_encode_key_value(s, writer, &first, key, value, |             if (encoder_encode_key_value(s, writer, &first, dct, key, value, | ||||||
|                                          new_newline_indent, |                                          new_newline_indent, | ||||||
|                                          current_item_separator) < 0) |                                          current_item_separator) < 0) | ||||||
|                 goto bail; |                 goto bail; | ||||||
|  | @ -1710,8 +1713,10 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, | ||||||
|             if (_PyUnicodeWriter_WriteStr(writer, separator) < 0) |             if (_PyUnicodeWriter_WriteStr(writer, separator) < 0) | ||||||
|                 goto bail; |                 goto bail; | ||||||
|         } |         } | ||||||
|         if (encoder_listencode_obj(s, writer, obj, new_newline_indent)) |         if (encoder_listencode_obj(s, writer, obj, new_newline_indent)) { | ||||||
|  |             _PyErr_FormatNote("when serializing %T item %zd", seq, i); | ||||||
|             goto bail; |             goto bail; | ||||||
|  |         } | ||||||
|     } |     } | ||||||
|     if (ident != NULL) { |     if (ident != NULL) { | ||||||
|         if (PyDict_DelItem(s->markers, ident)) |         if (PyDict_DelItem(s->markers, ident)) | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Serhiy Storchaka
						Serhiy Storchaka