mirror of
https://github.com/python/cpython.git
synced 2025-12-08 06:10:17 +00:00
* Correctly pre-check for int-to-str conversion (#96537) Converting a large enough `int` to a decimal string raises `ValueError` as expected. However, the raise comes _after_ the quadratic-time base-conversion algorithm has run to completion. For effective DOS prevention, we need some kind of check before entering the quadratic-time loop. Oops! =) The quick fix: essentially we catch _most_ values that exceed the threshold up front. Those that slip through will still be on the small side (read: sufficiently fast), and will get caught by the existing check so that the limit remains exact. The justification for the current check. The C code check is: ```c max_str_digits / (3 * PyLong_SHIFT) <= (size_a - 11) / 10 ``` In GitHub markdown math-speak, writing $M$ for `max_str_digits`, $L$ for `PyLong_SHIFT` and $s$ for `size_a`, that check is: $$\left\lfloor\frac{M}{3L}\right\rfloor \le \left\lfloor\frac{s - 11}{10}\right\rfloor$$ From this it follows that $$\frac{M}{3L} < \frac{s-1}{10}$$ hence that $$\frac{L(s-1)}{M} > \frac{10}{3} > \log_2(10).$$ So $$2^{L(s-1)} > 10^M.$$ But our input integer $a$ satisfies $|a| \ge 2^{L(s-1)}$, so $|a|$ is larger than $10^M$. This shows that we don't accidentally capture anything _below_ the intended limit in the check. <!-- gh-issue-number: gh-95778 --> * Issue: gh-95778 <!-- /gh-issue-number --> Co-authored-by: Gregory P. Smith [Google LLC] <greg@krypto.org> Co-authored-by: Christian Heimes <christian@python.org> Co-authored-by: Mark Dickinson <dickinsm@gmail.com>
108 lines
4.4 KiB
Python
108 lines
4.4 KiB
Python
import decimal
|
|
from io import StringIO
|
|
from collections import OrderedDict
|
|
from test.test_json import PyTest, CTest
|
|
from test import support
|
|
|
|
|
|
class TestDecode:
|
|
def test_decimal(self):
|
|
rval = self.loads('1.1', parse_float=decimal.Decimal)
|
|
self.assertTrue(isinstance(rval, decimal.Decimal))
|
|
self.assertEqual(rval, decimal.Decimal('1.1'))
|
|
|
|
def test_float(self):
|
|
rval = self.loads('1', parse_int=float)
|
|
self.assertTrue(isinstance(rval, float))
|
|
self.assertEqual(rval, 1.0)
|
|
|
|
def test_empty_objects(self):
|
|
self.assertEqual(self.loads('{}'), {})
|
|
self.assertEqual(self.loads('[]'), [])
|
|
self.assertEqual(self.loads('""'), "")
|
|
|
|
def test_object_pairs_hook(self):
|
|
s = '{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}'
|
|
p = [("xkd", 1), ("kcw", 2), ("art", 3), ("hxm", 4),
|
|
("qrt", 5), ("pad", 6), ("hoy", 7)]
|
|
self.assertEqual(self.loads(s), eval(s))
|
|
self.assertEqual(self.loads(s, object_pairs_hook=lambda x: x), p)
|
|
self.assertEqual(self.json.load(StringIO(s),
|
|
object_pairs_hook=lambda x: x), p)
|
|
od = self.loads(s, object_pairs_hook=OrderedDict)
|
|
self.assertEqual(od, OrderedDict(p))
|
|
self.assertEqual(type(od), OrderedDict)
|
|
# the object_pairs_hook takes priority over the object_hook
|
|
self.assertEqual(self.loads(s, object_pairs_hook=OrderedDict,
|
|
object_hook=lambda x: None),
|
|
OrderedDict(p))
|
|
# check that empty object literals work (see #17368)
|
|
self.assertEqual(self.loads('{}', object_pairs_hook=OrderedDict),
|
|
OrderedDict())
|
|
self.assertEqual(self.loads('{"empty": {}}',
|
|
object_pairs_hook=OrderedDict),
|
|
OrderedDict([('empty', OrderedDict())]))
|
|
|
|
def test_decoder_optimizations(self):
|
|
# Several optimizations were made that skip over calls to
|
|
# the whitespace regex, so this test is designed to try and
|
|
# exercise the uncommon cases. The array cases are already covered.
|
|
rval = self.loads('{ "key" : "value" , "k":"v" }')
|
|
self.assertEqual(rval, {"key":"value", "k":"v"})
|
|
|
|
def check_keys_reuse(self, source, loads):
|
|
rval = loads(source)
|
|
(a, b), (c, d) = sorted(rval[0]), sorted(rval[1])
|
|
self.assertIs(a, c)
|
|
self.assertIs(b, d)
|
|
|
|
def test_keys_reuse(self):
|
|
s = '[{"a_key": 1, "b_\xe9": 2}, {"a_key": 3, "b_\xe9": 4}]'
|
|
self.check_keys_reuse(s, self.loads)
|
|
decoder = self.json.decoder.JSONDecoder()
|
|
self.check_keys_reuse(s, decoder.decode)
|
|
self.assertFalse(decoder.memo)
|
|
|
|
def test_extra_data(self):
|
|
s = '[1, 2, 3]5'
|
|
msg = 'Extra data'
|
|
self.assertRaisesRegex(self.JSONDecodeError, msg, self.loads, s)
|
|
|
|
def test_invalid_escape(self):
|
|
s = '["abc\\y"]'
|
|
msg = 'escape'
|
|
self.assertRaisesRegex(self.JSONDecodeError, msg, self.loads, s)
|
|
|
|
def test_invalid_input_type(self):
|
|
msg = 'the JSON object must be str'
|
|
for value in [1, 3.14, [], {}, None]:
|
|
self.assertRaisesRegex(TypeError, msg, self.loads, value)
|
|
|
|
def test_string_with_utf8_bom(self):
|
|
# see #18958
|
|
bom_json = "[1,2,3]".encode('utf-8-sig').decode('utf-8')
|
|
with self.assertRaises(self.JSONDecodeError) as cm:
|
|
self.loads(bom_json)
|
|
self.assertIn('BOM', str(cm.exception))
|
|
with self.assertRaises(self.JSONDecodeError) as cm:
|
|
self.json.load(StringIO(bom_json))
|
|
self.assertIn('BOM', str(cm.exception))
|
|
# make sure that the BOM is not detected in the middle of a string
|
|
bom_in_str = '"{}"'.format(''.encode('utf-8-sig').decode('utf-8'))
|
|
self.assertEqual(self.loads(bom_in_str), '\ufeff')
|
|
self.assertEqual(self.json.load(StringIO(bom_in_str)), '\ufeff')
|
|
|
|
def test_negative_index(self):
|
|
d = self.json.JSONDecoder()
|
|
self.assertRaises(ValueError, d.raw_decode, 'a'*42, -50000)
|
|
|
|
def test_limit_int(self):
|
|
maxdigits = 5000
|
|
with support.adjust_int_max_str_digits(maxdigits):
|
|
self.loads('1' * maxdigits)
|
|
with self.assertRaises(ValueError):
|
|
self.loads('1' * (maxdigits + 1))
|
|
|
|
|
|
class TestPyDecode(TestDecode, PyTest): pass
|
|
class TestCDecode(TestDecode, CTest): pass
|