mirror of
				https://github.com/python/cpython.git
				synced 2025-10-24 18:33:49 +00:00 
			
		
		
		
	 bcf6f92dc5
			
		
	
	
		bcf6f92dc5
		
	
	
	
	
		
			
			* Fix an error where True/False were being written-out as title-cased strings when used a dictionary keys. * Speed-up iteration over dicts by looping over items() rather than keys() followed by value lookups. * TODO: sort only by keys, not keys and values.
		
			
				
	
	
		
			421 lines
		
	
	
	
		
			15 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			421 lines
		
	
	
	
		
			15 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """Implementation of JSONEncoder
 | |
| """
 | |
| import re
 | |
| 
 | |
| try:
 | |
|     from _json import encode_basestring_ascii as c_encode_basestring_ascii
 | |
| except ImportError:
 | |
|     c_encode_basestring_ascii = None
 | |
| try:
 | |
|     from _json import make_encoder as c_make_encoder
 | |
| except ImportError:
 | |
|     c_make_encoder = None
 | |
| 
 | |
| ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
 | |
| ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
 | |
| HAS_UTF8 = re.compile(b'[\x80-\xff]')
 | |
| ESCAPE_DCT = {
 | |
|     '\\': '\\\\',
 | |
|     '"': '\\"',
 | |
|     '\b': '\\b',
 | |
|     '\f': '\\f',
 | |
|     '\n': '\\n',
 | |
|     '\r': '\\r',
 | |
|     '\t': '\\t',
 | |
| }
 | |
| for i in range(0x20):
 | |
|     ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
 | |
|     #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
 | |
| 
 | |
| # Assume this produces an infinity on all machines (probably not guaranteed)
 | |
| INFINITY = float('1e66666')
 | |
| FLOAT_REPR = repr
 | |
| 
 | |
| def encode_basestring(s):
 | |
|     """Return a JSON representation of a Python string
 | |
| 
 | |
|     """
 | |
|     def replace(match):
 | |
|         return ESCAPE_DCT[match.group(0)]
 | |
|     return '"' + ESCAPE.sub(replace, s) + '"'
 | |
| 
 | |
| 
 | |
| def py_encode_basestring_ascii(s):
 | |
|     """Return an ASCII-only JSON representation of a Python string
 | |
| 
 | |
|     """
 | |
|     def replace(match):
 | |
|         s = match.group(0)
 | |
|         try:
 | |
|             return ESCAPE_DCT[s]
 | |
|         except KeyError:
 | |
|             n = ord(s)
 | |
|             if n < 0x10000:
 | |
|                 return '\\u{0:04x}'.format(n)
 | |
|                 #return '\\u%04x' % (n,)
 | |
|             else:
 | |
|                 # surrogate pair
 | |
|                 n -= 0x10000
 | |
|                 s1 = 0xd800 | ((n >> 10) & 0x3ff)
 | |
|                 s2 = 0xdc00 | (n & 0x3ff)
 | |
|                 return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
 | |
|     return '"' + ESCAPE_ASCII.sub(replace, s) + '"'
 | |
| 
 | |
| 
 | |
| encode_basestring_ascii = (
 | |
|     c_encode_basestring_ascii or py_encode_basestring_ascii)
 | |
| 
 | |
| class JSONEncoder(object):
 | |
|     """Extensible JSON <http://json.org> encoder for Python data structures.
 | |
| 
 | |
|     Supports the following objects and types by default:
 | |
| 
 | |
|     +-------------------+---------------+
 | |
|     | Python            | JSON          |
 | |
|     +===================+===============+
 | |
|     | dict              | object        |
 | |
|     +-------------------+---------------+
 | |
|     | list, tuple       | array         |
 | |
|     +-------------------+---------------+
 | |
|     | str, unicode      | string        |
 | |
|     +-------------------+---------------+
 | |
|     | int, long, float  | number        |
 | |
|     +-------------------+---------------+
 | |
|     | True              | true          |
 | |
|     +-------------------+---------------+
 | |
|     | False             | false         |
 | |
|     +-------------------+---------------+
 | |
|     | None              | null          |
 | |
|     +-------------------+---------------+
 | |
| 
 | |
|     To extend this to recognize other objects, subclass and implement a
 | |
|     ``.default()`` method with another method that returns a serializable
 | |
|     object for ``o`` if possible, otherwise it should call the superclass
 | |
|     implementation (to raise ``TypeError``).
 | |
| 
 | |
|     """
 | |
|     item_separator = ', '
 | |
|     key_separator = ': '
 | |
|     def __init__(self, skipkeys=False, ensure_ascii=True,
 | |
|             check_circular=True, allow_nan=True, sort_keys=False,
 | |
|             indent=None, separators=None, default=None):
 | |
|         """Constructor for JSONEncoder, with sensible defaults.
 | |
| 
 | |
|         If skipkeys is false, then it is a TypeError to attempt
 | |
|         encoding of keys that are not str, int, long, float or None.  If
 | |
|         skipkeys is True, such items are simply skipped.
 | |
| 
 | |
|         If ensure_ascii is true, the output is guaranteed to be str
 | |
|         objects with all incoming unicode characters escaped.  If
 | |
|         ensure_ascii is false, the output will be unicode object.
 | |
| 
 | |
|         If check_circular is true, then lists, dicts, and custom encoded
 | |
|         objects will be checked for circular references during encoding to
 | |
|         prevent an infinite recursion (which would cause an OverflowError).
 | |
|         Otherwise, no such check takes place.
 | |
| 
 | |
|         If allow_nan is true, then NaN, Infinity, and -Infinity will be
 | |
|         encoded as such.  This behavior is not JSON specification compliant,
 | |
|         but is consistent with most JavaScript based encoders and decoders.
 | |
|         Otherwise, it will be a ValueError to encode such floats.
 | |
| 
 | |
|         If sort_keys is true, then the output of dictionaries will be
 | |
|         sorted by key; this is useful for regression tests to ensure
 | |
|         that JSON serializations can be compared on a day-to-day basis.
 | |
| 
 | |
|         If indent is a non-negative integer, then JSON array
 | |
|         elements and object members will be pretty-printed with that
 | |
|         indent level.  An indent level of 0 will only insert newlines.
 | |
|         None is the most compact representation.
 | |
| 
 | |
|         If specified, separators should be a (item_separator, key_separator)
 | |
|         tuple.  The default is (', ', ': ').  To get the most compact JSON
 | |
|         representation you should specify (',', ':') to eliminate whitespace.
 | |
| 
 | |
|         If specified, default is a function that gets called for objects
 | |
|         that can't otherwise be serialized.  It should return a JSON encodable
 | |
|         version of the object or raise a ``TypeError``.
 | |
| 
 | |
|         """
 | |
| 
 | |
|         self.skipkeys = skipkeys
 | |
|         self.ensure_ascii = ensure_ascii
 | |
|         self.check_circular = check_circular
 | |
|         self.allow_nan = allow_nan
 | |
|         self.sort_keys = sort_keys
 | |
|         self.indent = indent
 | |
|         if separators is not None:
 | |
|             self.item_separator, self.key_separator = separators
 | |
|         if default is not None:
 | |
|             self.default = default
 | |
| 
 | |
|     def default(self, o):
 | |
|         """Implement this method in a subclass such that it returns
 | |
|         a serializable object for ``o``, or calls the base implementation
 | |
|         (to raise a ``TypeError``).
 | |
| 
 | |
|         For example, to support arbitrary iterators, you could
 | |
|         implement default like this::
 | |
| 
 | |
|             def default(self, o):
 | |
|                 try:
 | |
|                     iterable = iter(o)
 | |
|                 except TypeError:
 | |
|                     pass
 | |
|                 else:
 | |
|                     return list(iterable)
 | |
|                 return JSONEncoder.default(self, o)
 | |
| 
 | |
|         """
 | |
|         raise TypeError(repr(o) + " is not JSON serializable")
 | |
| 
 | |
|     def encode(self, o):
 | |
|         """Return a JSON string representation of a Python data structure.
 | |
| 
 | |
|         >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
 | |
|         '{"foo": ["bar", "baz"]}'
 | |
| 
 | |
|         """
 | |
|         # This is for extremely simple cases and benchmarks.
 | |
|         if isinstance(o, str):
 | |
|             if self.ensure_ascii:
 | |
|                 return encode_basestring_ascii(o)
 | |
|             else:
 | |
|                 return encode_basestring(o)
 | |
|         # This doesn't pass the iterator directly to ''.join() because the
 | |
|         # exceptions aren't as detailed.  The list call should be roughly
 | |
|         # equivalent to the PySequence_Fast that ''.join() would do.
 | |
|         chunks = self.iterencode(o, _one_shot=True)
 | |
|         if not isinstance(chunks, (list, tuple)):
 | |
|             chunks = list(chunks)
 | |
|         return ''.join(chunks)
 | |
| 
 | |
|     def iterencode(self, o, _one_shot=False):
 | |
|         """Encode the given object and yield each string
 | |
|         representation as available.
 | |
| 
 | |
|         For example::
 | |
| 
 | |
|             for chunk in JSONEncoder().iterencode(bigobject):
 | |
|                 mysocket.write(chunk)
 | |
| 
 | |
|         """
 | |
|         if self.check_circular:
 | |
|             markers = {}
 | |
|         else:
 | |
|             markers = None
 | |
|         if self.ensure_ascii:
 | |
|             _encoder = encode_basestring_ascii
 | |
|         else:
 | |
|             _encoder = encode_basestring
 | |
| 
 | |
|         def floatstr(o, allow_nan=self.allow_nan,
 | |
|                 _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
 | |
|             # Check for specials.  Note that this type of test is processor
 | |
|             # and/or platform-specific, so do tests which don't depend on the
 | |
|             # internals.
 | |
| 
 | |
|             if o != o:
 | |
|                 text = 'NaN'
 | |
|             elif o == _inf:
 | |
|                 text = 'Infinity'
 | |
|             elif o == _neginf:
 | |
|                 text = '-Infinity'
 | |
|             else:
 | |
|                 return _repr(o)
 | |
| 
 | |
|             if not allow_nan:
 | |
|                 raise ValueError(
 | |
|                     "Out of range float values are not JSON compliant: " +
 | |
|                     repr(o))
 | |
| 
 | |
|             return text
 | |
| 
 | |
| 
 | |
|         if (_one_shot and c_make_encoder is not None
 | |
|                 and not self.indent):
 | |
|             _iterencode = c_make_encoder(
 | |
|                 markers, self.default, _encoder, self.indent,
 | |
|                 self.key_separator, self.item_separator, self.sort_keys,
 | |
|                 self.skipkeys, self.allow_nan)
 | |
|         else:
 | |
|             _iterencode = _make_iterencode(
 | |
|                 markers, self.default, _encoder, self.indent, floatstr,
 | |
|                 self.key_separator, self.item_separator, self.sort_keys,
 | |
|                 self.skipkeys, _one_shot)
 | |
|         return _iterencode(o, 0)
 | |
| 
 | |
| def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
 | |
|         _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
 | |
|         ## HACK: hand-optimized bytecode; turn globals into locals
 | |
|         ValueError=ValueError,
 | |
|         dict=dict,
 | |
|         float=float,
 | |
|         id=id,
 | |
|         int=int,
 | |
|         isinstance=isinstance,
 | |
|         list=list,
 | |
|         str=str,
 | |
|         tuple=tuple,
 | |
|     ):
 | |
| 
 | |
|     def _iterencode_list(lst, _current_indent_level):
 | |
|         if not lst:
 | |
|             yield '[]'
 | |
|             return
 | |
|         if markers is not None:
 | |
|             markerid = id(lst)
 | |
|             if markerid in markers:
 | |
|                 raise ValueError("Circular reference detected")
 | |
|             markers[markerid] = lst
 | |
|         buf = '['
 | |
|         if _indent is not None:
 | |
|             _current_indent_level += 1
 | |
|             newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
 | |
|             separator = _item_separator + newline_indent
 | |
|             buf += newline_indent
 | |
|         else:
 | |
|             newline_indent = None
 | |
|             separator = _item_separator
 | |
|         first = True
 | |
|         for value in lst:
 | |
|             if first:
 | |
|                 first = False
 | |
|             else:
 | |
|                 buf = separator
 | |
|             if isinstance(value, str):
 | |
|                 yield buf + _encoder(value)
 | |
|             elif value is None:
 | |
|                 yield buf + 'null'
 | |
|             elif value is True:
 | |
|                 yield buf + 'true'
 | |
|             elif value is False:
 | |
|                 yield buf + 'false'
 | |
|             elif isinstance(value, int):
 | |
|                 yield buf + str(value)
 | |
|             elif isinstance(value, float):
 | |
|                 yield buf + _floatstr(value)
 | |
|             else:
 | |
|                 yield buf
 | |
|                 if isinstance(value, (list, tuple)):
 | |
|                     chunks = _iterencode_list(value, _current_indent_level)
 | |
|                 elif isinstance(value, dict):
 | |
|                     chunks = _iterencode_dict(value, _current_indent_level)
 | |
|                 else:
 | |
|                     chunks = _iterencode(value, _current_indent_level)
 | |
|                 for chunk in chunks:
 | |
|                     yield chunk
 | |
|         if newline_indent is not None:
 | |
|             _current_indent_level -= 1
 | |
|             yield '\n' + (' ' * (_indent * _current_indent_level))
 | |
|         yield ']'
 | |
|         if markers is not None:
 | |
|             del markers[markerid]
 | |
| 
 | |
|     def _iterencode_dict(dct, _current_indent_level):
 | |
|         if not dct:
 | |
|             yield '{}'
 | |
|             return
 | |
|         if markers is not None:
 | |
|             markerid = id(dct)
 | |
|             if markerid in markers:
 | |
|                 raise ValueError("Circular reference detected")
 | |
|             markers[markerid] = dct
 | |
|         yield '{'
 | |
|         if _indent is not None:
 | |
|             _current_indent_level += 1
 | |
|             newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
 | |
|             item_separator = _item_separator + newline_indent
 | |
|             yield newline_indent
 | |
|         else:
 | |
|             newline_indent = None
 | |
|             item_separator = _item_separator
 | |
|         first = True
 | |
|         if _sort_keys:
 | |
|             items = sorted(dct.items(), key=lambda kv: kv[0])
 | |
|         else:
 | |
|             items = dct.items()
 | |
|         for key, value in items:
 | |
|             if isinstance(key, str):
 | |
|                 pass
 | |
|             # JavaScript is weakly typed for these, so it makes sense to
 | |
|             # also allow them.  Many encoders seem to do something like this.
 | |
|             elif isinstance(key, float):
 | |
|                 key = _floatstr(key)
 | |
|             elif key is True:
 | |
|                 key = 'true'
 | |
|             elif key is False:
 | |
|                 key = 'false'
 | |
|             elif key is None:
 | |
|                 key = 'null'
 | |
|             elif isinstance(key, int):
 | |
|                 key = str(key)
 | |
|             elif _skipkeys:
 | |
|                 continue
 | |
|             else:
 | |
|                 raise TypeError("key " + repr(key) + " is not a string")
 | |
|             if first:
 | |
|                 first = False
 | |
|             else:
 | |
|                 yield item_separator
 | |
|             yield _encoder(key)
 | |
|             yield _key_separator
 | |
|             if isinstance(value, str):
 | |
|                 yield _encoder(value)
 | |
|             elif value is None:
 | |
|                 yield 'null'
 | |
|             elif value is True:
 | |
|                 yield 'true'
 | |
|             elif value is False:
 | |
|                 yield 'false'
 | |
|             elif isinstance(value, int):
 | |
|                 yield str(value)
 | |
|             elif isinstance(value, float):
 | |
|                 yield _floatstr(value)
 | |
|             else:
 | |
|                 if isinstance(value, (list, tuple)):
 | |
|                     chunks = _iterencode_list(value, _current_indent_level)
 | |
|                 elif isinstance(value, dict):
 | |
|                     chunks = _iterencode_dict(value, _current_indent_level)
 | |
|                 else:
 | |
|                     chunks = _iterencode(value, _current_indent_level)
 | |
|                 for chunk in chunks:
 | |
|                     yield chunk
 | |
|         if newline_indent is not None:
 | |
|             _current_indent_level -= 1
 | |
|             yield '\n' + (' ' * (_indent * _current_indent_level))
 | |
|         yield '}'
 | |
|         if markers is not None:
 | |
|             del markers[markerid]
 | |
| 
 | |
|     def _iterencode(o, _current_indent_level):
 | |
|         if isinstance(o, str):
 | |
|             yield _encoder(o)
 | |
|         elif o is None:
 | |
|             yield 'null'
 | |
|         elif o is True:
 | |
|             yield 'true'
 | |
|         elif o is False:
 | |
|             yield 'false'
 | |
|         elif isinstance(o, (int, int)):
 | |
|             yield str(o)
 | |
|         elif isinstance(o, float):
 | |
|             yield _floatstr(o)
 | |
|         elif isinstance(o, (list, tuple)):
 | |
|             for chunk in _iterencode_list(o, _current_indent_level):
 | |
|                 yield chunk
 | |
|         elif isinstance(o, dict):
 | |
|             for chunk in _iterencode_dict(o, _current_indent_level):
 | |
|                 yield chunk
 | |
|         else:
 | |
|             if markers is not None:
 | |
|                 markerid = id(o)
 | |
|                 if markerid in markers:
 | |
|                     raise ValueError("Circular reference detected")
 | |
|                 markers[markerid] = o
 | |
|             o = _default(o)
 | |
|             for chunk in _iterencode(o, _current_indent_level):
 | |
|                 yield chunk
 | |
|             if markers is not None:
 | |
|                 del markers[markerid]
 | |
|     return _iterencode
 |