cpython/Lib/test/test_json/test_decode.py
Gregory P. Smith cec1e9dfd7
[3.9] gh-95778: CVE-2020-10735: Prevent DoS by very large int() (#96502)
* Correctly pre-check for int-to-str conversion (#96537)

Converting a large enough `int` to a decimal string raises `ValueError` as expected. However, the raise comes _after_ the quadratic-time base-conversion algorithm has run to completion. For effective DOS prevention, we need some kind of check before entering the quadratic-time loop. Oops! =)

The quick fix: essentially we catch _most_ values that exceed the threshold up front. Those that slip through will still be on the small side (read: sufficiently fast), and will get caught by the existing check so that the limit remains exact.

The justification for the current check. The C code check is:
```c
max_str_digits / (3 * PyLong_SHIFT) <= (size_a - 11) / 10
```

In GitHub markdown math-speak, writing $M$ for `max_str_digits`, $L$ for `PyLong_SHIFT` and $s$ for `size_a`, that check is:
$$\left\lfloor\frac{M}{3L}\right\rfloor \le \left\lfloor\frac{s - 11}{10}\right\rfloor$$

From this it follows that
$$\frac{M}{3L} < \frac{s-1}{10}$$
hence that
$$\frac{L(s-1)}{M} > \frac{10}{3} > \log_2(10).$$
So
$$2^{L(s-1)} > 10^M.$$
But our input integer $a$ satisfies $|a| \ge 2^{L(s-1)}$, so $|a|$ is larger than $10^M$. This shows that we don't accidentally capture anything _below_ the intended limit in the check.

<!-- gh-issue-number: gh-95778 -->
* Issue: gh-95778
<!-- /gh-issue-number -->

Co-authored-by: Gregory P. Smith [Google LLC] <greg@krypto.org>
Co-authored-by: Christian Heimes <christian@python.org>
Co-authored-by: Mark Dickinson <dickinsm@gmail.com>
2022-09-05 11:21:03 +02:00

108 lines
4.4 KiB
Python

import decimal
from io import StringIO
from collections import OrderedDict
from test.test_json import PyTest, CTest
from test import support
class TestDecode:
def test_decimal(self):
rval = self.loads('1.1', parse_float=decimal.Decimal)
self.assertTrue(isinstance(rval, decimal.Decimal))
self.assertEqual(rval, decimal.Decimal('1.1'))
def test_float(self):
rval = self.loads('1', parse_int=float)
self.assertTrue(isinstance(rval, float))
self.assertEqual(rval, 1.0)
def test_empty_objects(self):
self.assertEqual(self.loads('{}'), {})
self.assertEqual(self.loads('[]'), [])
self.assertEqual(self.loads('""'), "")
def test_object_pairs_hook(self):
s = '{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}'
p = [("xkd", 1), ("kcw", 2), ("art", 3), ("hxm", 4),
("qrt", 5), ("pad", 6), ("hoy", 7)]
self.assertEqual(self.loads(s), eval(s))
self.assertEqual(self.loads(s, object_pairs_hook=lambda x: x), p)
self.assertEqual(self.json.load(StringIO(s),
object_pairs_hook=lambda x: x), p)
od = self.loads(s, object_pairs_hook=OrderedDict)
self.assertEqual(od, OrderedDict(p))
self.assertEqual(type(od), OrderedDict)
# the object_pairs_hook takes priority over the object_hook
self.assertEqual(self.loads(s, object_pairs_hook=OrderedDict,
object_hook=lambda x: None),
OrderedDict(p))
# check that empty object literals work (see #17368)
self.assertEqual(self.loads('{}', object_pairs_hook=OrderedDict),
OrderedDict())
self.assertEqual(self.loads('{"empty": {}}',
object_pairs_hook=OrderedDict),
OrderedDict([('empty', OrderedDict())]))
def test_decoder_optimizations(self):
# Several optimizations were made that skip over calls to
# the whitespace regex, so this test is designed to try and
# exercise the uncommon cases. The array cases are already covered.
rval = self.loads('{ "key" : "value" , "k":"v" }')
self.assertEqual(rval, {"key":"value", "k":"v"})
def check_keys_reuse(self, source, loads):
rval = loads(source)
(a, b), (c, d) = sorted(rval[0]), sorted(rval[1])
self.assertIs(a, c)
self.assertIs(b, d)
def test_keys_reuse(self):
s = '[{"a_key": 1, "b_\xe9": 2}, {"a_key": 3, "b_\xe9": 4}]'
self.check_keys_reuse(s, self.loads)
decoder = self.json.decoder.JSONDecoder()
self.check_keys_reuse(s, decoder.decode)
self.assertFalse(decoder.memo)
def test_extra_data(self):
s = '[1, 2, 3]5'
msg = 'Extra data'
self.assertRaisesRegex(self.JSONDecodeError, msg, self.loads, s)
def test_invalid_escape(self):
s = '["abc\\y"]'
msg = 'escape'
self.assertRaisesRegex(self.JSONDecodeError, msg, self.loads, s)
def test_invalid_input_type(self):
msg = 'the JSON object must be str'
for value in [1, 3.14, [], {}, None]:
self.assertRaisesRegex(TypeError, msg, self.loads, value)
def test_string_with_utf8_bom(self):
# see #18958
bom_json = "[1,2,3]".encode('utf-8-sig').decode('utf-8')
with self.assertRaises(self.JSONDecodeError) as cm:
self.loads(bom_json)
self.assertIn('BOM', str(cm.exception))
with self.assertRaises(self.JSONDecodeError) as cm:
self.json.load(StringIO(bom_json))
self.assertIn('BOM', str(cm.exception))
# make sure that the BOM is not detected in the middle of a string
bom_in_str = '"{}"'.format(''.encode('utf-8-sig').decode('utf-8'))
self.assertEqual(self.loads(bom_in_str), '\ufeff')
self.assertEqual(self.json.load(StringIO(bom_in_str)), '\ufeff')
def test_negative_index(self):
d = self.json.JSONDecoder()
self.assertRaises(ValueError, d.raw_decode, 'a'*42, -50000)
def test_limit_int(self):
maxdigits = 5000
with support.adjust_int_max_str_digits(maxdigits):
self.loads('1' * maxdigits)
with self.assertRaises(ValueError):
self.loads('1' * (maxdigits + 1))
class TestPyDecode(TestDecode, PyTest): pass
class TestCDecode(TestDecode, CTest): pass