[3.9] gh-95778: CVE-2020-10735: Prevent DoS by very large int() (#96502)

* Correctly pre-check for int-to-str conversion (#96537)

Converting a large enough `int` to a decimal string raises `ValueError` as expected. However, the raise comes _after_ the quadratic-time base-conversion algorithm has run to completion. For effective DOS prevention, we need some kind of check before entering the quadratic-time loop. Oops! =)

The quick fix: essentially we catch _most_ values that exceed the threshold up front. Those that slip through will still be on the small side (read: sufficiently fast), and will get caught by the existing check so that the limit remains exact.

The justification for the current check. The C code check is:
```c
max_str_digits / (3 * PyLong_SHIFT) <= (size_a - 11) / 10
```

In GitHub markdown math-speak, writing $M$ for `max_str_digits`, $L$ for `PyLong_SHIFT` and $s$ for `size_a`, that check is:
$$\left\lfloor\frac{M}{3L}\right\rfloor \le \left\lfloor\frac{s - 11}{10}\right\rfloor$$

From this it follows that
$$\frac{M}{3L} < \frac{s-1}{10}$$
hence that
$$\frac{L(s-1)}{M} > \frac{10}{3} > \log_2(10).$$
So
$$2^{L(s-1)} > 10^M.$$
But our input integer $a$ satisfies $|a| \ge 2^{L(s-1)}$, so $|a|$ is larger than $10^M$. This shows that we don't accidentally capture anything _below_ the intended limit in the check.

<!-- gh-issue-number: gh-95778 -->
* Issue: gh-95778
<!-- /gh-issue-number -->

Co-authored-by: Gregory P. Smith [Google LLC] <greg@krypto.org>
Co-authored-by: Christian Heimes <christian@python.org>
Co-authored-by: Mark Dickinson <dickinsm@gmail.com>
This commit is contained in:
Gregory P. Smith 2022-09-05 02:21:03 -07:00 committed by GitHub
parent d348afa15d
commit cec1e9dfd7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
27 changed files with 886 additions and 19 deletions

View file

@ -3,7 +3,9 @@
/* XXX The functional organization of this file is terrible */
#include "Python.h"
#include "pycore_initconfig.h" // _Py_global_config_int_max_str_digits
#include "pycore_interp.h" // _PY_NSMALLPOSINTS
#include "pycore_long.h"
#include "pycore_pystate.h" // _Py_IsMainInterpreter()
#include "longintrepr.h"
@ -36,6 +38,9 @@ PyObject *_PyLong_One = NULL;
#define IS_SMALL_INT(ival) (-NSMALLNEGINTS <= (ival) && (ival) < NSMALLPOSINTS)
#define IS_SMALL_UINT(ival) ((ival) < NSMALLPOSINTS)
#define _MAX_STR_DIGITS_ERROR_FMT_TO_INT "Exceeds the limit (%d) for integer string conversion: value has %zd digits"
#define _MAX_STR_DIGITS_ERROR_FMT_TO_STR "Exceeds the limit (%d) for integer string conversion"
static PyObject *
get_small_int(sdigit ival)
{
@ -1718,6 +1723,23 @@ long_to_decimal_string_internal(PyObject *aa,
size_a = Py_ABS(Py_SIZE(a));
negative = Py_SIZE(a) < 0;
/* quick and dirty pre-check for overflowing the decimal digit limit,
based on the inequality 10/3 >= log2(10)
explanation in https://github.com/python/cpython/pull/96537
*/
if (size_a >= 10 * _PY_LONG_MAX_STR_DIGITS_THRESHOLD
/ (3 * PyLong_SHIFT) + 2) {
PyInterpreterState *interp = _PyInterpreterState_GET();
int max_str_digits = interp->int_max_str_digits;
if ((max_str_digits > 0) &&
(max_str_digits / (3 * PyLong_SHIFT) <= (size_a - 11) / 10)) {
PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_STR,
max_str_digits);
return -1;
}
}
/* quick and dirty upper bound for the number of digits
required to express a in base _PyLong_DECIMAL_BASE:
@ -1777,6 +1799,17 @@ long_to_decimal_string_internal(PyObject *aa,
tenpow *= 10;
strlen++;
}
if (strlen > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) {
PyInterpreterState *interp = _PyInterpreterState_GET();
int max_str_digits = interp->int_max_str_digits;
Py_ssize_t strlen_nosign = strlen - negative;
if ((max_str_digits > 0) && (strlen_nosign > max_str_digits)) {
Py_DECREF(scratch);
PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_STR,
max_str_digits);
return -1;
}
}
if (writer) {
if (_PyUnicodeWriter_Prepare(writer, strlen, '9') == -1) {
Py_DECREF(scratch);
@ -2290,6 +2323,7 @@ PyLong_FromString(const char *str, char **pend, int base)
start = str;
if ((base & (base - 1)) == 0) {
/* binary bases are not limited by int_max_str_digits */
int res = long_from_binary_base(&str, base, &z);
if (res < 0) {
/* Syntax error. */
@ -2441,6 +2475,17 @@ digit beyond the first.
goto onError;
}
/* Limit the size to avoid excessive computation attacks. */
if (digits > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) {
PyInterpreterState *interp = _PyInterpreterState_GET();
int max_str_digits = interp->int_max_str_digits;
if ((max_str_digits > 0) && (digits > max_str_digits)) {
PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_INT,
max_str_digits, digits);
return NULL;
}
}
/* Create an int object that can contain the largest possible
* integer with this base and length. Note that there's no
* need to initialize z->ob_digit -- no slot is read up before
@ -5071,6 +5116,7 @@ long_new_impl(PyTypeObject *type, PyObject *x, PyObject *obase)
}
return PyLong_FromLong(0L);
}
/* default base and limit, forward to standard implementation */
if (obase == NULL)
return PyNumber_Long(x);
@ -5723,6 +5769,8 @@ internal representation of integers. The attributes are read only.");
static PyStructSequence_Field int_info_fields[] = {
{"bits_per_digit", "size of a digit in bits"},
{"sizeof_digit", "size in bytes of the C type used to represent a digit"},
{"default_max_str_digits", "maximum string conversion digits limitation"},
{"str_digits_check_threshold", "minimum positive value for int_max_str_digits"},
{NULL, NULL}
};
@ -5730,7 +5778,7 @@ static PyStructSequence_Desc int_info_desc = {
"sys.int_info", /* name */
int_info__doc__, /* doc */
int_info_fields, /* fields */
2 /* number of fields */
4 /* number of fields */
};
PyObject *
@ -5745,6 +5793,17 @@ PyLong_GetInfo(void)
PyLong_FromLong(PyLong_SHIFT));
PyStructSequence_SET_ITEM(int_info, field++,
PyLong_FromLong(sizeof(digit)));
/*
* The following two fields were added after investigating uses of
* sys.int_info in the wild: Exceedingly rarely used. The ONLY use found was
* numba using sys.int_info.bits_per_digit as attribute access rather than
* sequence unpacking. Cython and sympy also refer to sys.int_info but only
* as info for debugging. No concern about adding these in a backport.
*/
PyStructSequence_SET_ITEM(int_info, field++,
PyLong_FromLong(_PY_LONG_DEFAULT_MAX_STR_DIGITS));
PyStructSequence_SET_ITEM(int_info, field++,
PyLong_FromLong(_PY_LONG_MAX_STR_DIGITS_THRESHOLD));
if (PyErr_Occurred()) {
Py_CLEAR(int_info);
return NULL;
@ -5790,6 +5849,10 @@ _PyLong_Init(PyThreadState *tstate)
}
}
}
tstate->interp->int_max_str_digits = _Py_global_config_int_max_str_digits;
if (tstate->interp->int_max_str_digits == -1) {
tstate->interp->int_max_str_digits = _PY_LONG_DEFAULT_MAX_STR_DIGITS;
}
return 1;
}