| 
									
										
										
										
											2008-05-05 20:21:38 +00:00
										 |  |  | """Implementation of JSONDecoder
 | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import re | 
					
						
							|  |  |  | import sys | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from json.scanner import Scanner, pattern | 
					
						
							|  |  |  | try: | 
					
						
							|  |  |  |     from _json import scanstring as c_scanstring | 
					
						
							|  |  |  | except ImportError: | 
					
						
							|  |  |  |     c_scanstring = None | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | __all__ = ['JSONDecoder'] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2008-05-06 16:18:41 +00:00
										 |  |  | NaN, PosInf, NegInf = float('nan'), float('inf'), float('-inf') | 
					
						
							| 
									
										
										
										
											2008-05-05 20:21:38 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def linecol(doc, pos): | 
					
						
							|  |  |  |     lineno = doc.count('\n', 0, pos) + 1 | 
					
						
							|  |  |  |     if lineno == 1: | 
					
						
							|  |  |  |         colno = pos | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         colno = pos - doc.rindex('\n', 0, pos) | 
					
						
							|  |  |  |     return lineno, colno | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def errmsg(msg, doc, pos, end=None): | 
					
						
							|  |  |  |     lineno, colno = linecol(doc, pos) | 
					
						
							|  |  |  |     if end is None: | 
					
						
							|  |  |  |         fmt = '{0}: line {1} column {2} (char {3})' | 
					
						
							|  |  |  |         return fmt.format(msg, lineno, colno, pos) | 
					
						
							|  |  |  |     endlineno, endcolno = linecol(doc, end) | 
					
						
							|  |  |  |     fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' | 
					
						
							|  |  |  |     return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | _CONSTANTS = { | 
					
						
							|  |  |  |     '-Infinity': NegInf, | 
					
						
							|  |  |  |     'Infinity': PosInf, | 
					
						
							|  |  |  |     'NaN': NaN, | 
					
						
							|  |  |  |     'true': True, | 
					
						
							|  |  |  |     'false': False, | 
					
						
							|  |  |  |     'null': None, | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def JSONConstant(match, context, c=_CONSTANTS): | 
					
						
							|  |  |  |     s = match.group(0) | 
					
						
							|  |  |  |     fn = getattr(context, 'parse_constant', None) | 
					
						
							|  |  |  |     if fn is None: | 
					
						
							|  |  |  |         rval = c[s] | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         rval = fn(s) | 
					
						
							|  |  |  |     return rval, None | 
					
						
							|  |  |  | pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def JSONNumber(match, context): | 
					
						
							|  |  |  |     match = JSONNumber.regex.match(match.string, *match.span()) | 
					
						
							|  |  |  |     integer, frac, exp = match.groups() | 
					
						
							|  |  |  |     if frac or exp: | 
					
						
							|  |  |  |         fn = getattr(context, 'parse_float', None) or float | 
					
						
							|  |  |  |         res = fn(integer + (frac or '') + (exp or '')) | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         fn = getattr(context, 'parse_int', None) or int | 
					
						
							|  |  |  |         res = fn(integer) | 
					
						
							|  |  |  |     return res, None | 
					
						
							|  |  |  | pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) | 
					
						
							|  |  |  | BACKSLASH = { | 
					
						
							|  |  |  |     '"': u'"', '\\': u'\\', '/': u'/', | 
					
						
							|  |  |  |     'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | DEFAULT_ENCODING = "utf-8" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): | 
					
						
							|  |  |  |     if encoding is None: | 
					
						
							|  |  |  |         encoding = DEFAULT_ENCODING | 
					
						
							|  |  |  |     chunks = [] | 
					
						
							|  |  |  |     _append = chunks.append | 
					
						
							|  |  |  |     begin = end - 1 | 
					
						
							|  |  |  |     while 1: | 
					
						
							|  |  |  |         chunk = _m(s, end) | 
					
						
							|  |  |  |         if chunk is None: | 
					
						
							|  |  |  |             raise ValueError( | 
					
						
							|  |  |  |                 errmsg("Unterminated string starting at", s, begin)) | 
					
						
							|  |  |  |         end = chunk.end() | 
					
						
							|  |  |  |         content, terminator = chunk.groups() | 
					
						
							|  |  |  |         if content: | 
					
						
							|  |  |  |             if not isinstance(content, unicode): | 
					
						
							|  |  |  |                 content = unicode(content, encoding) | 
					
						
							|  |  |  |             _append(content) | 
					
						
							|  |  |  |         if terminator == '"': | 
					
						
							|  |  |  |             break | 
					
						
							|  |  |  |         elif terminator != '\\': | 
					
						
							|  |  |  |             if strict: | 
					
						
							|  |  |  |                 msg = "Invalid control character {0!r} at".format(terminator) | 
					
						
							|  |  |  |                 raise ValueError(errmsg(msg, s, end)) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 _append(terminator) | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |         try: | 
					
						
							|  |  |  |             esc = s[end] | 
					
						
							|  |  |  |         except IndexError: | 
					
						
							|  |  |  |             raise ValueError( | 
					
						
							|  |  |  |                 errmsg("Unterminated string starting at", s, begin)) | 
					
						
							|  |  |  |         if esc != 'u': | 
					
						
							|  |  |  |             try: | 
					
						
							|  |  |  |                 m = _b[esc] | 
					
						
							|  |  |  |             except KeyError: | 
					
						
							|  |  |  |                 msg = "Invalid \\escape: {0!r}".format(esc) | 
					
						
							|  |  |  |                 raise ValueError(errmsg(msg, s, end)) | 
					
						
							|  |  |  |             end += 1 | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             esc = s[end + 1:end + 5] | 
					
						
							|  |  |  |             next_end = end + 5 | 
					
						
							|  |  |  |             msg = "Invalid \\uXXXX escape" | 
					
						
							|  |  |  |             try: | 
					
						
							|  |  |  |                 if len(esc) != 4: | 
					
						
							|  |  |  |                     raise ValueError | 
					
						
							|  |  |  |                 uni = int(esc, 16) | 
					
						
							|  |  |  |                 if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: | 
					
						
							|  |  |  |                     msg = "Invalid \\uXXXX\\uXXXX surrogate pair" | 
					
						
							|  |  |  |                     if not s[end + 5:end + 7] == '\\u': | 
					
						
							|  |  |  |                         raise ValueError | 
					
						
							|  |  |  |                     esc2 = s[end + 7:end + 11] | 
					
						
							|  |  |  |                     if len(esc2) != 4: | 
					
						
							|  |  |  |                         raise ValueError | 
					
						
							|  |  |  |                     uni2 = int(esc2, 16) | 
					
						
							|  |  |  |                     uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) | 
					
						
							|  |  |  |                     next_end += 6 | 
					
						
							|  |  |  |                 m = unichr(uni) | 
					
						
							|  |  |  |             except ValueError: | 
					
						
							|  |  |  |                 raise ValueError(errmsg(msg, s, end)) | 
					
						
							|  |  |  |             end = next_end | 
					
						
							|  |  |  |         _append(m) | 
					
						
							|  |  |  |     return u''.join(chunks), end | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Use speedup | 
					
						
							|  |  |  | if c_scanstring is not None: | 
					
						
							|  |  |  |     scanstring = c_scanstring | 
					
						
							|  |  |  | else: | 
					
						
							|  |  |  |     scanstring = py_scanstring | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def JSONString(match, context): | 
					
						
							|  |  |  |     encoding = getattr(context, 'encoding', None) | 
					
						
							|  |  |  |     strict = getattr(context, 'strict', True) | 
					
						
							|  |  |  |     return scanstring(match.string, match.end(), encoding, strict) | 
					
						
							|  |  |  | pattern(r'"')(JSONString) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | WHITESPACE = re.compile(r'\s*', FLAGS) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def JSONObject(match, context, _w=WHITESPACE.match): | 
					
						
							|  |  |  |     pairs = {} | 
					
						
							|  |  |  |     s = match.string | 
					
						
							|  |  |  |     end = _w(s, match.end()).end() | 
					
						
							|  |  |  |     nextchar = s[end:end + 1] | 
					
						
							|  |  |  |     # Trivial empty object | 
					
						
							|  |  |  |     if nextchar == '}': | 
					
						
							|  |  |  |         return pairs, end + 1 | 
					
						
							|  |  |  |     if nextchar != '"': | 
					
						
							|  |  |  |         raise ValueError(errmsg("Expecting property name", s, end)) | 
					
						
							|  |  |  |     end += 1 | 
					
						
							|  |  |  |     encoding = getattr(context, 'encoding', None) | 
					
						
							|  |  |  |     strict = getattr(context, 'strict', True) | 
					
						
							|  |  |  |     iterscan = JSONScanner.iterscan | 
					
						
							|  |  |  |     while True: | 
					
						
							|  |  |  |         key, end = scanstring(s, end, encoding, strict) | 
					
						
							|  |  |  |         end = _w(s, end).end() | 
					
						
							|  |  |  |         if s[end:end + 1] != ':': | 
					
						
							|  |  |  |             raise ValueError(errmsg("Expecting : delimiter", s, end)) | 
					
						
							|  |  |  |         end = _w(s, end + 1).end() | 
					
						
							|  |  |  |         try: | 
					
						
							|  |  |  |             value, end = iterscan(s, idx=end, context=context).next() | 
					
						
							|  |  |  |         except StopIteration: | 
					
						
							|  |  |  |             raise ValueError(errmsg("Expecting object", s, end)) | 
					
						
							|  |  |  |         pairs[key] = value | 
					
						
							|  |  |  |         end = _w(s, end).end() | 
					
						
							|  |  |  |         nextchar = s[end:end + 1] | 
					
						
							|  |  |  |         end += 1 | 
					
						
							|  |  |  |         if nextchar == '}': | 
					
						
							|  |  |  |             break | 
					
						
							|  |  |  |         if nextchar != ',': | 
					
						
							|  |  |  |             raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) | 
					
						
							|  |  |  |         end = _w(s, end).end() | 
					
						
							|  |  |  |         nextchar = s[end:end + 1] | 
					
						
							|  |  |  |         end += 1 | 
					
						
							|  |  |  |         if nextchar != '"': | 
					
						
							|  |  |  |             raise ValueError(errmsg("Expecting property name", s, end - 1)) | 
					
						
							|  |  |  |     object_hook = getattr(context, 'object_hook', None) | 
					
						
							|  |  |  |     if object_hook is not None: | 
					
						
							|  |  |  |         pairs = object_hook(pairs) | 
					
						
							|  |  |  |     return pairs, end | 
					
						
							|  |  |  | pattern(r'{')(JSONObject) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def JSONArray(match, context, _w=WHITESPACE.match): | 
					
						
							|  |  |  |     values = [] | 
					
						
							|  |  |  |     s = match.string | 
					
						
							|  |  |  |     end = _w(s, match.end()).end() | 
					
						
							|  |  |  |     # Look-ahead for trivial empty array | 
					
						
							|  |  |  |     nextchar = s[end:end + 1] | 
					
						
							|  |  |  |     if nextchar == ']': | 
					
						
							|  |  |  |         return values, end + 1 | 
					
						
							|  |  |  |     iterscan = JSONScanner.iterscan | 
					
						
							|  |  |  |     while True: | 
					
						
							|  |  |  |         try: | 
					
						
							|  |  |  |             value, end = iterscan(s, idx=end, context=context).next() | 
					
						
							|  |  |  |         except StopIteration: | 
					
						
							|  |  |  |             raise ValueError(errmsg("Expecting object", s, end)) | 
					
						
							|  |  |  |         values.append(value) | 
					
						
							|  |  |  |         end = _w(s, end).end() | 
					
						
							|  |  |  |         nextchar = s[end:end + 1] | 
					
						
							|  |  |  |         end += 1 | 
					
						
							|  |  |  |         if nextchar == ']': | 
					
						
							|  |  |  |             break | 
					
						
							|  |  |  |         if nextchar != ',': | 
					
						
							|  |  |  |             raise ValueError(errmsg("Expecting , delimiter", s, end)) | 
					
						
							|  |  |  |         end = _w(s, end).end() | 
					
						
							|  |  |  |     return values, end | 
					
						
							|  |  |  | pattern(r'\[')(JSONArray) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ANYTHING = [ | 
					
						
							|  |  |  |     JSONObject, | 
					
						
							|  |  |  |     JSONArray, | 
					
						
							|  |  |  |     JSONString, | 
					
						
							|  |  |  |     JSONConstant, | 
					
						
							|  |  |  |     JSONNumber, | 
					
						
							|  |  |  | ] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | JSONScanner = Scanner(ANYTHING) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class JSONDecoder(object): | 
					
						
							|  |  |  |     """Simple JSON <http://json.org> decoder
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Performs the following translations in decoding by default: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     +---------------+-------------------+ | 
					
						
							|  |  |  |     | JSON          | Python            | | 
					
						
							|  |  |  |     +===============+===================+ | 
					
						
							|  |  |  |     | object        | dict              | | 
					
						
							|  |  |  |     +---------------+-------------------+ | 
					
						
							|  |  |  |     | array         | list              | | 
					
						
							|  |  |  |     +---------------+-------------------+ | 
					
						
							|  |  |  |     | string        | unicode           | | 
					
						
							|  |  |  |     +---------------+-------------------+ | 
					
						
							|  |  |  |     | number (int)  | int, long         | | 
					
						
							|  |  |  |     +---------------+-------------------+ | 
					
						
							|  |  |  |     | number (real) | float             | | 
					
						
							|  |  |  |     +---------------+-------------------+ | 
					
						
							|  |  |  |     | true          | True              | | 
					
						
							|  |  |  |     +---------------+-------------------+ | 
					
						
							|  |  |  |     | false         | False             | | 
					
						
							|  |  |  |     +---------------+-------------------+ | 
					
						
							|  |  |  |     | null          | None              | | 
					
						
							|  |  |  |     +---------------+-------------------+ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as | 
					
						
							|  |  |  |     their corresponding ``float`` values, which is outside the JSON spec. | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     _scanner = Scanner(ANYTHING) | 
					
						
							|  |  |  |     __all__ = ['__init__', 'decode', 'raw_decode'] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def __init__(self, encoding=None, object_hook=None, parse_float=None, | 
					
						
							|  |  |  |             parse_int=None, parse_constant=None, strict=True): | 
					
						
							|  |  |  |         """``encoding`` determines the encoding used to interpret any ``str``
 | 
					
						
							|  |  |  |         objects decoded by this instance (utf-8 by default).  It has no | 
					
						
							|  |  |  |         effect when decoding ``unicode`` objects. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         Note that currently only encodings that are a superset of ASCII work, | 
					
						
							|  |  |  |         strings of other encodings should be passed in as ``unicode``. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         ``object_hook``, if specified, will be called with the result of | 
					
						
							|  |  |  |         every JSON object decoded and its return value will be used in | 
					
						
							|  |  |  |         place of the given ``dict``.  This can be used to provide custom | 
					
						
							|  |  |  |         deserializations (e.g. to support JSON-RPC class hinting). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         ``parse_float``, if specified, will be called with the string | 
					
						
							|  |  |  |         of every JSON float to be decoded. By default this is equivalent to | 
					
						
							|  |  |  |         float(num_str). This can be used to use another datatype or parser | 
					
						
							|  |  |  |         for JSON floats (e.g. decimal.Decimal). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         ``parse_int``, if specified, will be called with the string | 
					
						
							|  |  |  |         of every JSON int to be decoded. By default this is equivalent to | 
					
						
							|  |  |  |         int(num_str). This can be used to use another datatype or parser | 
					
						
							|  |  |  |         for JSON integers (e.g. float). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         ``parse_constant``, if specified, will be called with one of the | 
					
						
							|  |  |  |         following strings: -Infinity, Infinity, NaN, null, true, false. | 
					
						
							|  |  |  |         This can be used to raise an exception if invalid JSON numbers | 
					
						
							|  |  |  |         are encountered. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         self.encoding = encoding | 
					
						
							|  |  |  |         self.object_hook = object_hook | 
					
						
							|  |  |  |         self.parse_float = parse_float | 
					
						
							|  |  |  |         self.parse_int = parse_int | 
					
						
							|  |  |  |         self.parse_constant = parse_constant | 
					
						
							|  |  |  |         self.strict = strict | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def decode(self, s, _w=WHITESPACE.match): | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         Return the Python representation of ``s`` (a ``str`` or ``unicode`` | 
					
						
							|  |  |  |         instance containing a JSON document) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         obj, end = self.raw_decode(s, idx=_w(s, 0).end()) | 
					
						
							|  |  |  |         end = _w(s, end).end() | 
					
						
							|  |  |  |         if end != len(s): | 
					
						
							|  |  |  |             raise ValueError(errmsg("Extra data", s, end, len(s))) | 
					
						
							|  |  |  |         return obj | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def raw_decode(self, s, **kw): | 
					
						
							|  |  |  |         """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning
 | 
					
						
							|  |  |  |         with a JSON document) and return a 2-tuple of the Python | 
					
						
							|  |  |  |         representation and the index in ``s`` where the document ended. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         This can be used to decode a JSON document from a string that may | 
					
						
							|  |  |  |         have extraneous data at the end. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         kw.setdefault('context', self) | 
					
						
							|  |  |  |         try: | 
					
						
							|  |  |  |             obj, end = self._scanner.iterscan(s, **kw).next() | 
					
						
							|  |  |  |         except StopIteration: | 
					
						
							|  |  |  |             raise ValueError("No JSON object could be decoded") | 
					
						
							|  |  |  |         return obj, end |