mirror of
https://github.com/python/cpython.git
synced 2025-12-08 06:10:17 +00:00
[3.9] gh-95778: CVE-2020-10735: Prevent DoS by very large int() (#96502)
* Correctly pre-check for int-to-str conversion (#96537) Converting a large enough `int` to a decimal string raises `ValueError` as expected. However, the raise comes _after_ the quadratic-time base-conversion algorithm has run to completion. For effective DOS prevention, we need some kind of check before entering the quadratic-time loop. Oops! =) The quick fix: essentially we catch _most_ values that exceed the threshold up front. Those that slip through will still be on the small side (read: sufficiently fast), and will get caught by the existing check so that the limit remains exact. The justification for the current check. The C code check is: ```c max_str_digits / (3 * PyLong_SHIFT) <= (size_a - 11) / 10 ``` In GitHub markdown math-speak, writing $M$ for `max_str_digits`, $L$ for `PyLong_SHIFT` and $s$ for `size_a`, that check is: $$\left\lfloor\frac{M}{3L}\right\rfloor \le \left\lfloor\frac{s - 11}{10}\right\rfloor$$ From this it follows that $$\frac{M}{3L} < \frac{s-1}{10}$$ hence that $$\frac{L(s-1)}{M} > \frac{10}{3} > \log_2(10).$$ So $$2^{L(s-1)} > 10^M.$$ But our input integer $a$ satisfies $|a| \ge 2^{L(s-1)}$, so $|a|$ is larger than $10^M$. This shows that we don't accidentally capture anything _below_ the intended limit in the check. <!-- gh-issue-number: gh-95778 --> * Issue: gh-95778 <!-- /gh-issue-number --> Co-authored-by: Gregory P. Smith [Google LLC] <greg@krypto.org> Co-authored-by: Christian Heimes <christian@python.org> Co-authored-by: Mark Dickinson <dickinsm@gmail.com>
This commit is contained in:
parent
d348afa15d
commit
cec1e9dfd7
27 changed files with 886 additions and 19 deletions
|
|
@ -3,7 +3,9 @@
|
|||
/* XXX The functional organization of this file is terrible */
|
||||
|
||||
#include "Python.h"
|
||||
#include "pycore_initconfig.h" // _Py_global_config_int_max_str_digits
|
||||
#include "pycore_interp.h" // _PY_NSMALLPOSINTS
|
||||
#include "pycore_long.h"
|
||||
#include "pycore_pystate.h" // _Py_IsMainInterpreter()
|
||||
#include "longintrepr.h"
|
||||
|
||||
|
|
@ -36,6 +38,9 @@ PyObject *_PyLong_One = NULL;
|
|||
#define IS_SMALL_INT(ival) (-NSMALLNEGINTS <= (ival) && (ival) < NSMALLPOSINTS)
|
||||
#define IS_SMALL_UINT(ival) ((ival) < NSMALLPOSINTS)
|
||||
|
||||
#define _MAX_STR_DIGITS_ERROR_FMT_TO_INT "Exceeds the limit (%d) for integer string conversion: value has %zd digits"
|
||||
#define _MAX_STR_DIGITS_ERROR_FMT_TO_STR "Exceeds the limit (%d) for integer string conversion"
|
||||
|
||||
static PyObject *
|
||||
get_small_int(sdigit ival)
|
||||
{
|
||||
|
|
@ -1718,6 +1723,23 @@ long_to_decimal_string_internal(PyObject *aa,
|
|||
size_a = Py_ABS(Py_SIZE(a));
|
||||
negative = Py_SIZE(a) < 0;
|
||||
|
||||
/* quick and dirty pre-check for overflowing the decimal digit limit,
|
||||
based on the inequality 10/3 >= log2(10)
|
||||
|
||||
explanation in https://github.com/python/cpython/pull/96537
|
||||
*/
|
||||
if (size_a >= 10 * _PY_LONG_MAX_STR_DIGITS_THRESHOLD
|
||||
/ (3 * PyLong_SHIFT) + 2) {
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
int max_str_digits = interp->int_max_str_digits;
|
||||
if ((max_str_digits > 0) &&
|
||||
(max_str_digits / (3 * PyLong_SHIFT) <= (size_a - 11) / 10)) {
|
||||
PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_STR,
|
||||
max_str_digits);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* quick and dirty upper bound for the number of digits
|
||||
required to express a in base _PyLong_DECIMAL_BASE:
|
||||
|
||||
|
|
@ -1777,6 +1799,17 @@ long_to_decimal_string_internal(PyObject *aa,
|
|||
tenpow *= 10;
|
||||
strlen++;
|
||||
}
|
||||
if (strlen > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) {
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
int max_str_digits = interp->int_max_str_digits;
|
||||
Py_ssize_t strlen_nosign = strlen - negative;
|
||||
if ((max_str_digits > 0) && (strlen_nosign > max_str_digits)) {
|
||||
Py_DECREF(scratch);
|
||||
PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_STR,
|
||||
max_str_digits);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
if (writer) {
|
||||
if (_PyUnicodeWriter_Prepare(writer, strlen, '9') == -1) {
|
||||
Py_DECREF(scratch);
|
||||
|
|
@ -2290,6 +2323,7 @@ PyLong_FromString(const char *str, char **pend, int base)
|
|||
|
||||
start = str;
|
||||
if ((base & (base - 1)) == 0) {
|
||||
/* binary bases are not limited by int_max_str_digits */
|
||||
int res = long_from_binary_base(&str, base, &z);
|
||||
if (res < 0) {
|
||||
/* Syntax error. */
|
||||
|
|
@ -2441,6 +2475,17 @@ digit beyond the first.
|
|||
goto onError;
|
||||
}
|
||||
|
||||
/* Limit the size to avoid excessive computation attacks. */
|
||||
if (digits > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) {
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
int max_str_digits = interp->int_max_str_digits;
|
||||
if ((max_str_digits > 0) && (digits > max_str_digits)) {
|
||||
PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_INT,
|
||||
max_str_digits, digits);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Create an int object that can contain the largest possible
|
||||
* integer with this base and length. Note that there's no
|
||||
* need to initialize z->ob_digit -- no slot is read up before
|
||||
|
|
@ -5071,6 +5116,7 @@ long_new_impl(PyTypeObject *type, PyObject *x, PyObject *obase)
|
|||
}
|
||||
return PyLong_FromLong(0L);
|
||||
}
|
||||
/* default base and limit, forward to standard implementation */
|
||||
if (obase == NULL)
|
||||
return PyNumber_Long(x);
|
||||
|
||||
|
|
@ -5723,6 +5769,8 @@ internal representation of integers. The attributes are read only.");
|
|||
static PyStructSequence_Field int_info_fields[] = {
|
||||
{"bits_per_digit", "size of a digit in bits"},
|
||||
{"sizeof_digit", "size in bytes of the C type used to represent a digit"},
|
||||
{"default_max_str_digits", "maximum string conversion digits limitation"},
|
||||
{"str_digits_check_threshold", "minimum positive value for int_max_str_digits"},
|
||||
{NULL, NULL}
|
||||
};
|
||||
|
||||
|
|
@ -5730,7 +5778,7 @@ static PyStructSequence_Desc int_info_desc = {
|
|||
"sys.int_info", /* name */
|
||||
int_info__doc__, /* doc */
|
||||
int_info_fields, /* fields */
|
||||
2 /* number of fields */
|
||||
4 /* number of fields */
|
||||
};
|
||||
|
||||
PyObject *
|
||||
|
|
@ -5745,6 +5793,17 @@ PyLong_GetInfo(void)
|
|||
PyLong_FromLong(PyLong_SHIFT));
|
||||
PyStructSequence_SET_ITEM(int_info, field++,
|
||||
PyLong_FromLong(sizeof(digit)));
|
||||
/*
|
||||
* The following two fields were added after investigating uses of
|
||||
* sys.int_info in the wild: Exceedingly rarely used. The ONLY use found was
|
||||
* numba using sys.int_info.bits_per_digit as attribute access rather than
|
||||
* sequence unpacking. Cython and sympy also refer to sys.int_info but only
|
||||
* as info for debugging. No concern about adding these in a backport.
|
||||
*/
|
||||
PyStructSequence_SET_ITEM(int_info, field++,
|
||||
PyLong_FromLong(_PY_LONG_DEFAULT_MAX_STR_DIGITS));
|
||||
PyStructSequence_SET_ITEM(int_info, field++,
|
||||
PyLong_FromLong(_PY_LONG_MAX_STR_DIGITS_THRESHOLD));
|
||||
if (PyErr_Occurred()) {
|
||||
Py_CLEAR(int_info);
|
||||
return NULL;
|
||||
|
|
@ -5790,6 +5849,10 @@ _PyLong_Init(PyThreadState *tstate)
|
|||
}
|
||||
}
|
||||
}
|
||||
tstate->interp->int_max_str_digits = _Py_global_config_int_max_str_digits;
|
||||
if (tstate->interp->int_max_str_digits == -1) {
|
||||
tstate->interp->int_max_str_digits = _PY_LONG_DEFAULT_MAX_STR_DIGITS;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue