mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 21:51:50 +00:00 
			
		
		
		
	
		
			
	
	
		
			350 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			350 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
|   | """Implementation of JSONDecoder
 | ||
|  | """
 | ||
|  | 
 | ||
|  | import re | ||
|  | import sys | ||
|  | 
 | ||
|  | from json.scanner import Scanner, pattern | ||
|  | try: | ||
|  |     from _json import scanstring as c_scanstring | ||
|  | except ImportError: | ||
|  |     c_scanstring = None | ||
|  | 
 | ||
|  | __all__ = ['JSONDecoder'] | ||
|  | 
 | ||
|  | FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL | ||
|  | 
 | ||
|  | 
 | ||
|  | def _floatconstants(): | ||
|  |     import struct | ||
|  |     import sys | ||
|  |     _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') | ||
|  |     if sys.byteorder != 'big': | ||
|  |         _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] | ||
|  |     nan, inf = struct.unpack('dd', _BYTES) | ||
|  |     return nan, inf, -inf | ||
|  | 
 | ||
|  | NaN, PosInf, NegInf = _floatconstants() | ||
|  | 
 | ||
|  | 
 | ||
|  | def linecol(doc, pos): | ||
|  |     lineno = doc.count('\n', 0, pos) + 1 | ||
|  |     if lineno == 1: | ||
|  |         colno = pos | ||
|  |     else: | ||
|  |         colno = pos - doc.rindex('\n', 0, pos) | ||
|  |     return lineno, colno | ||
|  | 
 | ||
|  | 
 | ||
|  | def errmsg(msg, doc, pos, end=None): | ||
|  |     lineno, colno = linecol(doc, pos) | ||
|  |     if end is None: | ||
|  |         fmt = '{0}: line {1} column {2} (char {3})' | ||
|  |         return fmt.format(msg, lineno, colno, pos) | ||
|  |     endlineno, endcolno = linecol(doc, end) | ||
|  |     fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' | ||
|  |     return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) | ||
|  | 
 | ||
|  | 
 | ||
|  | _CONSTANTS = { | ||
|  |     '-Infinity': NegInf, | ||
|  |     'Infinity': PosInf, | ||
|  |     'NaN': NaN, | ||
|  |     'true': True, | ||
|  |     'false': False, | ||
|  |     'null': None, | ||
|  | } | ||
|  | 
 | ||
|  | 
 | ||
|  | def JSONConstant(match, context, c=_CONSTANTS): | ||
|  |     s = match.group(0) | ||
|  |     fn = getattr(context, 'parse_constant', None) | ||
|  |     if fn is None: | ||
|  |         rval = c[s] | ||
|  |     else: | ||
|  |         rval = fn(s) | ||
|  |     return rval, None | ||
|  | pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant) | ||
|  | 
 | ||
|  | 
 | ||
|  | def JSONNumber(match, context): | ||
|  |     match = JSONNumber.regex.match(match.string, *match.span()) | ||
|  |     integer, frac, exp = match.groups() | ||
|  |     if frac or exp: | ||
|  |         fn = getattr(context, 'parse_float', None) or float | ||
|  |         res = fn(integer + (frac or '') + (exp or '')) | ||
|  |     else: | ||
|  |         fn = getattr(context, 'parse_int', None) or int | ||
|  |         res = fn(integer) | ||
|  |     return res, None | ||
|  | pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber) | ||
|  | 
 | ||
|  | 
 | ||
|  | STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) | ||
|  | BACKSLASH = { | ||
|  |     '"': u'"', '\\': u'\\', '/': u'/', | ||
|  |     'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', | ||
|  | } | ||
|  | 
 | ||
|  | DEFAULT_ENCODING = "utf-8" | ||
|  | 
 | ||
|  | 
 | ||
|  | def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): | ||
|  |     if encoding is None: | ||
|  |         encoding = DEFAULT_ENCODING | ||
|  |     chunks = [] | ||
|  |     _append = chunks.append | ||
|  |     begin = end - 1 | ||
|  |     while 1: | ||
|  |         chunk = _m(s, end) | ||
|  |         if chunk is None: | ||
|  |             raise ValueError( | ||
|  |                 errmsg("Unterminated string starting at", s, begin)) | ||
|  |         end = chunk.end() | ||
|  |         content, terminator = chunk.groups() | ||
|  |         if content: | ||
|  |             if not isinstance(content, unicode): | ||
|  |                 content = unicode(content, encoding) | ||
|  |             _append(content) | ||
|  |         if terminator == '"': | ||
|  |             break | ||
|  |         elif terminator != '\\': | ||
|  |             if strict: | ||
|  |                 msg = "Invalid control character {0!r} at".format(terminator) | ||
|  |                 raise ValueError(errmsg(msg, s, end)) | ||
|  |             else: | ||
|  |                 _append(terminator) | ||
|  |                 continue | ||
|  |         try: | ||
|  |             esc = s[end] | ||
|  |         except IndexError: | ||
|  |             raise ValueError( | ||
|  |                 errmsg("Unterminated string starting at", s, begin)) | ||
|  |         if esc != 'u': | ||
|  |             try: | ||
|  |                 m = _b[esc] | ||
|  |             except KeyError: | ||
|  |                 msg = "Invalid \\escape: {0!r}".format(esc) | ||
|  |                 raise ValueError(errmsg(msg, s, end)) | ||
|  |             end += 1 | ||
|  |         else: | ||
|  |             esc = s[end + 1:end + 5] | ||
|  |             next_end = end + 5 | ||
|  |             msg = "Invalid \\uXXXX escape" | ||
|  |             try: | ||
|  |                 if len(esc) != 4: | ||
|  |                     raise ValueError | ||
|  |                 uni = int(esc, 16) | ||
|  |                 if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: | ||
|  |                     msg = "Invalid \\uXXXX\\uXXXX surrogate pair" | ||
|  |                     if not s[end + 5:end + 7] == '\\u': | ||
|  |                         raise ValueError | ||
|  |                     esc2 = s[end + 7:end + 11] | ||
|  |                     if len(esc2) != 4: | ||
|  |                         raise ValueError | ||
|  |                     uni2 = int(esc2, 16) | ||
|  |                     uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) | ||
|  |                     next_end += 6 | ||
|  |                 m = unichr(uni) | ||
|  |             except ValueError: | ||
|  |                 raise ValueError(errmsg(msg, s, end)) | ||
|  |             end = next_end | ||
|  |         _append(m) | ||
|  |     return u''.join(chunks), end | ||
|  | 
 | ||
|  | 
 | ||
|  | # Use speedup | ||
|  | if c_scanstring is not None: | ||
|  |     scanstring = c_scanstring | ||
|  | else: | ||
|  |     scanstring = py_scanstring | ||
|  | 
 | ||
|  | def JSONString(match, context): | ||
|  |     encoding = getattr(context, 'encoding', None) | ||
|  |     strict = getattr(context, 'strict', True) | ||
|  |     return scanstring(match.string, match.end(), encoding, strict) | ||
|  | pattern(r'"')(JSONString) | ||
|  | 
 | ||
|  | 
 | ||
|  | WHITESPACE = re.compile(r'\s*', FLAGS) | ||
|  | 
 | ||
|  | 
 | ||
|  | def JSONObject(match, context, _w=WHITESPACE.match): | ||
|  |     pairs = {} | ||
|  |     s = match.string | ||
|  |     end = _w(s, match.end()).end() | ||
|  |     nextchar = s[end:end + 1] | ||
|  |     # Trivial empty object | ||
|  |     if nextchar == '}': | ||
|  |         return pairs, end + 1 | ||
|  |     if nextchar != '"': | ||
|  |         raise ValueError(errmsg("Expecting property name", s, end)) | ||
|  |     end += 1 | ||
|  |     encoding = getattr(context, 'encoding', None) | ||
|  |     strict = getattr(context, 'strict', True) | ||
|  |     iterscan = JSONScanner.iterscan | ||
|  |     while True: | ||
|  |         key, end = scanstring(s, end, encoding, strict) | ||
|  |         end = _w(s, end).end() | ||
|  |         if s[end:end + 1] != ':': | ||
|  |             raise ValueError(errmsg("Expecting : delimiter", s, end)) | ||
|  |         end = _w(s, end + 1).end() | ||
|  |         try: | ||
|  |             value, end = iterscan(s, idx=end, context=context).next() | ||
|  |         except StopIteration: | ||
|  |             raise ValueError(errmsg("Expecting object", s, end)) | ||
|  |         pairs[key] = value | ||
|  |         end = _w(s, end).end() | ||
|  |         nextchar = s[end:end + 1] | ||
|  |         end += 1 | ||
|  |         if nextchar == '}': | ||
|  |             break | ||
|  |         if nextchar != ',': | ||
|  |             raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) | ||
|  |         end = _w(s, end).end() | ||
|  |         nextchar = s[end:end + 1] | ||
|  |         end += 1 | ||
|  |         if nextchar != '"': | ||
|  |             raise ValueError(errmsg("Expecting property name", s, end - 1)) | ||
|  |     object_hook = getattr(context, 'object_hook', None) | ||
|  |     if object_hook is not None: | ||
|  |         pairs = object_hook(pairs) | ||
|  |     return pairs, end | ||
|  | pattern(r'{')(JSONObject) | ||
|  | 
 | ||
|  | 
 | ||
|  | def JSONArray(match, context, _w=WHITESPACE.match): | ||
|  |     values = [] | ||
|  |     s = match.string | ||
|  |     end = _w(s, match.end()).end() | ||
|  |     # Look-ahead for trivial empty array | ||
|  |     nextchar = s[end:end + 1] | ||
|  |     if nextchar == ']': | ||
|  |         return values, end + 1 | ||
|  |     iterscan = JSONScanner.iterscan | ||
|  |     while True: | ||
|  |         try: | ||
|  |             value, end = iterscan(s, idx=end, context=context).next() | ||
|  |         except StopIteration: | ||
|  |             raise ValueError(errmsg("Expecting object", s, end)) | ||
|  |         values.append(value) | ||
|  |         end = _w(s, end).end() | ||
|  |         nextchar = s[end:end + 1] | ||
|  |         end += 1 | ||
|  |         if nextchar == ']': | ||
|  |             break | ||
|  |         if nextchar != ',': | ||
|  |             raise ValueError(errmsg("Expecting , delimiter", s, end)) | ||
|  |         end = _w(s, end).end() | ||
|  |     return values, end | ||
|  | pattern(r'\[')(JSONArray) | ||
|  | 
 | ||
|  | 
 | ||
|  | ANYTHING = [ | ||
|  |     JSONObject, | ||
|  |     JSONArray, | ||
|  |     JSONString, | ||
|  |     JSONConstant, | ||
|  |     JSONNumber, | ||
|  | ] | ||
|  | 
 | ||
|  | JSONScanner = Scanner(ANYTHING) | ||
|  | 
 | ||
|  | 
 | ||
|  | class JSONDecoder(object): | ||
|  |     """Simple JSON <http://json.org> decoder
 | ||
|  | 
 | ||
|  |     Performs the following translations in decoding by default: | ||
|  | 
 | ||
|  |     +---------------+-------------------+ | ||
|  |     | JSON          | Python            | | ||
|  |     +===============+===================+ | ||
|  |     | object        | dict              | | ||
|  |     +---------------+-------------------+ | ||
|  |     | array         | list              | | ||
|  |     +---------------+-------------------+ | ||
|  |     | string        | unicode           | | ||
|  |     +---------------+-------------------+ | ||
|  |     | number (int)  | int, long         | | ||
|  |     +---------------+-------------------+ | ||
|  |     | number (real) | float             | | ||
|  |     +---------------+-------------------+ | ||
|  |     | true          | True              | | ||
|  |     +---------------+-------------------+ | ||
|  |     | false         | False             | | ||
|  |     +---------------+-------------------+ | ||
|  |     | null          | None              | | ||
|  |     +---------------+-------------------+ | ||
|  | 
 | ||
|  |     It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as | ||
|  |     their corresponding ``float`` values, which is outside the JSON spec. | ||
|  |     """
 | ||
|  | 
 | ||
|  |     _scanner = Scanner(ANYTHING) | ||
|  |     __all__ = ['__init__', 'decode', 'raw_decode'] | ||
|  | 
 | ||
|  |     def __init__(self, encoding=None, object_hook=None, parse_float=None, | ||
|  |             parse_int=None, parse_constant=None, strict=True): | ||
|  |         """``encoding`` determines the encoding used to interpret any ``str``
 | ||
|  |         objects decoded by this instance (utf-8 by default).  It has no | ||
|  |         effect when decoding ``unicode`` objects. | ||
|  | 
 | ||
|  |         Note that currently only encodings that are a superset of ASCII work, | ||
|  |         strings of other encodings should be passed in as ``unicode``. | ||
|  | 
 | ||
|  |         ``object_hook``, if specified, will be called with the result of | ||
|  |         every JSON object decoded and its return value will be used in | ||
|  |         place of the given ``dict``.  This can be used to provide custom | ||
|  |         deserializations (e.g. to support JSON-RPC class hinting). | ||
|  | 
 | ||
|  |         ``parse_float``, if specified, will be called with the string | ||
|  |         of every JSON float to be decoded. By default this is equivalent to | ||
|  |         float(num_str). This can be used to use another datatype or parser | ||
|  |         for JSON floats (e.g. decimal.Decimal). | ||
|  | 
 | ||
|  |         ``parse_int``, if specified, will be called with the string | ||
|  |         of every JSON int to be decoded. By default this is equivalent to | ||
|  |         int(num_str). This can be used to use another datatype or parser | ||
|  |         for JSON integers (e.g. float). | ||
|  | 
 | ||
|  |         ``parse_constant``, if specified, will be called with one of the | ||
|  |         following strings: -Infinity, Infinity, NaN, null, true, false. | ||
|  |         This can be used to raise an exception if invalid JSON numbers | ||
|  |         are encountered. | ||
|  | 
 | ||
|  |         """
 | ||
|  |         self.encoding = encoding | ||
|  |         self.object_hook = object_hook | ||
|  |         self.parse_float = parse_float | ||
|  |         self.parse_int = parse_int | ||
|  |         self.parse_constant = parse_constant | ||
|  |         self.strict = strict | ||
|  | 
 | ||
|  |     def decode(self, s, _w=WHITESPACE.match): | ||
|  |         """
 | ||
|  |         Return the Python representation of ``s`` (a ``str`` or ``unicode`` | ||
|  |         instance containing a JSON document) | ||
|  | 
 | ||
|  |         """
 | ||
|  |         obj, end = self.raw_decode(s, idx=_w(s, 0).end()) | ||
|  |         end = _w(s, end).end() | ||
|  |         if end != len(s): | ||
|  |             raise ValueError(errmsg("Extra data", s, end, len(s))) | ||
|  |         return obj | ||
|  | 
 | ||
|  |     def raw_decode(self, s, **kw): | ||
|  |         """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning
 | ||
|  |         with a JSON document) and return a 2-tuple of the Python | ||
|  |         representation and the index in ``s`` where the document ended. | ||
|  | 
 | ||
|  |         This can be used to decode a JSON document from a string that may | ||
|  |         have extraneous data at the end. | ||
|  | 
 | ||
|  |         """
 | ||
|  |         kw.setdefault('context', self) | ||
|  |         try: | ||
|  |             obj, end = self._scanner.iterscan(s, **kw).next() | ||
|  |         except StopIteration: | ||
|  |             raise ValueError("No JSON object could be decoded") | ||
|  |         return obj, end |