gh-59598: Ignore leading whitespace in JSONDecoder.raw_decode

Whitespace is allowed before JSON objects according to RFC 4627.
This commit is contained in:
Ankit Goel 2024-03-31 00:27:09 +00:00
parent 91b7f2e7f6
commit 96963f1e97
4 changed files with 25 additions and 3 deletions

View file

@ -341,23 +341,27 @@ def decode(self, s, _w=WHITESPACE.match):
containing a JSON document).
"""
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
obj, end = self.raw_decode(s)
end = _w(s, end).end()
if end != len(s):
raise JSONDecodeError("Extra data", s, end)
return obj
def raw_decode(self, s, idx=0):
def raw_decode(self, s, idx=0, _w=WHITESPACE.match):
"""Decode a JSON document from ``s`` (a ``str`` beginning with
a JSON document) and return a 2-tuple of the Python
representation and the index in ``s`` where the document ended.
Whitespace at the beginning of the document will be ignored.
Optionally, ``idx`` can be used to specify an offset in ``s``
where the document begins.
This can be used to decode a JSON document from a string that may
have extraneous data at the end.
"""
try:
obj, end = self.scan_once(s, idx)
obj, end = self.scan_once(s, idx=_w(s, idx).end())
except StopIteration as err:
raise JSONDecodeError("Expecting value", s, err.value) from None
return obj, end