mirror of
https://github.com/python/cpython.git
synced 2025-12-08 06:10:17 +00:00
[3.14] GH-139653: Only raise an exception (or fatal error) when the stack pointer is about to overflow the stack. (GH-141711) (GH-141944)
Only raises if the stack pointer is both below the limit *and* above the stack base.
This prevents false positives for user-space threads, as the stack pointer will be outside those bounds
if the stack has been swapped.
Cherry-picked from commit c25a070759
Co-authored-by: Mark Shannon <mark@hotpy.org>
This commit is contained in:
parent
20a677d75a
commit
69021e9acf
4 changed files with 98 additions and 9 deletions
|
|
@ -201,10 +201,13 @@ extern void _PyEval_DeactivateOpCache(void);
|
|||
static inline int _Py_MakeRecCheck(PyThreadState *tstate) {
|
||||
uintptr_t here_addr = _Py_get_machine_stack_pointer();
|
||||
_PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
|
||||
// Overflow if stack pointer is between soft limit and the base of the hardware stack.
|
||||
// If it is below the hardware stack base, assume that we have the wrong stack limits, and do nothing.
|
||||
// We could have the wrong stack limits because of limited platform support, or user-space threads.
|
||||
#if _Py_STACK_GROWS_DOWN
|
||||
return here_addr < _tstate->c_stack_soft_limit;
|
||||
return here_addr < _tstate->c_stack_soft_limit && here_addr >= _tstate->c_stack_soft_limit - 2 * _PyOS_STACK_MARGIN_BYTES;
|
||||
#else
|
||||
return here_addr > _tstate->c_stack_soft_limit;
|
||||
return here_addr > _tstate->c_stack_soft_limit && here_addr <= _tstate->c_stack_soft_limit + 2 * _PyOS_STACK_MARGIN_BYTES;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
|||
68
InternalDocs/stack_protection.md
Normal file
68
InternalDocs/stack_protection.md
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
# Stack Protection
|
||||
|
||||
CPython protects against stack overflow in the form of runaway, or just very deep, recursion by raising a `RecursionError` instead of just crashing.
|
||||
Protection against pure Python stack recursion has existed since very early, but in 3.12 we added protection against stack overflow
|
||||
in C code. This was initially implemented using a counter and later improved in 3.14 to use the actual stack depth.
|
||||
For those platforms that support it (Windows, Mac, and most Linuxes) we query the operating system to find the stack bounds.
|
||||
For other platforms we use conservative estimates.
|
||||
|
||||
|
||||
The C stack looks like this:
|
||||
|
||||
```
|
||||
+-------+ <--- Top of machine stack
|
||||
| |
|
||||
| |
|
||||
|
||||
~~
|
||||
|
||||
| |
|
||||
| |
|
||||
+-------+ <--- Soft limit
|
||||
| |
|
||||
| | _PyOS_STACK_MARGIN_BYTES
|
||||
| |
|
||||
+-------+ <--- Hard limit
|
||||
| |
|
||||
| | _PyOS_STACK_MARGIN_BYTES
|
||||
| |
|
||||
+-------+ <--- Bottom of machine stack
|
||||
```
|
||||
|
||||
|
||||
We get the current stack pointer using compiler intrinsics where available, or by taking the address of a C local variable. See `_Py_get_machine_stack_pointer()`.
|
||||
|
||||
The soft and hard limits pointers are set by calling `_Py_InitializeRecursionLimits()` during thread initialization.
|
||||
|
||||
Recursion checks are performed by `_Py_EnterRecursiveCall()` or `_Py_EnterRecursiveCallTstate()` which compare the stack pointer to the soft limit. If the stack pointer is lower than the soft limit, then `_Py_CheckRecursiveCall()` is called which checks against both the hard and soft limits:
|
||||
|
||||
```python
|
||||
kb_used = (stack_top - stack_pointer)>>10
|
||||
if stack_pointer < bottom_of_machine_stack:
|
||||
pass # Our stack limits could be wrong so it is safest to do nothing.
|
||||
elif stack_pointer < hard_limit:
|
||||
FatalError(f"Unrecoverable stack overflow (used {kb_used} kB)")
|
||||
elif stack_pointer < soft_limit:
|
||||
raise RecursionError(f"Stack overflow (used {kb_used} kB)")
|
||||
```
|
||||
|
||||
### User space threads and other oddities
|
||||
|
||||
Some libraries provide user-space threads. These will change the C stack at runtime.
|
||||
To guard against this we only raise if the stack pointer is in the window between the expected stack base and the soft limit.
|
||||
|
||||
### Diagnosing and fixing stack overflows
|
||||
|
||||
For stack protection to work correctly the amount of stack consumed between calls to `_Py_EnterRecursiveCall()` must be less than `_PyOS_STACK_MARGIN_BYTES`.
|
||||
|
||||
If you see a traceback ending in: `RecursionError: Stack overflow (used ... kB)` then the stack protection is working as intended. If you don't expect to see the error, then check the amount of stack used. If it seems low then CPython may not be configured properly.
|
||||
|
||||
However, if you see a fatal error or crash, then something is not right.
|
||||
Either a recursive call is not checking `_Py_EnterRecursiveCall()`, or the amount of C stack consumed by a single call exceeds `_PyOS_STACK_MARGIN_BYTES`. If a hard crash occurs, it probably means that the amount of C stack consumed is more than double `_PyOS_STACK_MARGIN_BYTES`.
|
||||
|
||||
Likely causes:
|
||||
* Recursive code is not calling `_Py_EnterRecursiveCall()`
|
||||
* `-O0` compilation flags, especially for Clang. With no optimization, C calls can consume a lot of stack space
|
||||
* Giant, complex functions in third-party C extensions. This is unlikely as the function in question would need to be more complicated than the bytecode interpreter.
|
||||
* `_PyOS_STACK_MARGIN_BYTES` is just too low.
|
||||
* `_Py_InitializeRecursionLimits()` is not setting the soft and hard limits correctly for that platform.
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
Only raise a ``RecursionError`` or trigger a fatal error if the stack
|
||||
pointer is both below the limit pointer *and* above the stack base. If
|
||||
outside of these bounds assume that it is OK. This prevents false positives
|
||||
when user-space threads swap stacks.
|
||||
|
|
@ -344,9 +344,11 @@ _Py_ReachedRecursionLimitWithMargin(PyThreadState *tstate, int margin_count)
|
|||
_Py_InitializeRecursionLimits(tstate);
|
||||
}
|
||||
#if _Py_STACK_GROWS_DOWN
|
||||
return here_addr <= _tstate->c_stack_soft_limit + margin_count * _PyOS_STACK_MARGIN_BYTES;
|
||||
return here_addr <= _tstate->c_stack_soft_limit + margin_count * _PyOS_STACK_MARGIN_BYTES &&
|
||||
here_addr >= _tstate->c_stack_soft_limit - 2 * _PyOS_STACK_MARGIN_BYTES;
|
||||
#else
|
||||
return here_addr > _tstate->c_stack_soft_limit - margin_count * _PyOS_STACK_MARGIN_BYTES;
|
||||
return here_addr > _tstate->c_stack_soft_limit - margin_count * _PyOS_STACK_MARGIN_BYTES &&
|
||||
here_addr <= _tstate->c_stack_soft_limit + 2 * _PyOS_STACK_MARGIN_BYTES;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
@ -437,7 +439,7 @@ int pthread_attr_destroy(pthread_attr_t *a)
|
|||
#endif
|
||||
|
||||
static void
|
||||
hardware_stack_limits(uintptr_t *base, uintptr_t *top)
|
||||
hardware_stack_limits(uintptr_t *base, uintptr_t *top, uintptr_t sp)
|
||||
{
|
||||
#ifdef WIN32
|
||||
ULONG_PTR low, high;
|
||||
|
|
@ -473,10 +475,19 @@ hardware_stack_limits(uintptr_t *base, uintptr_t *top)
|
|||
return;
|
||||
}
|
||||
# endif
|
||||
uintptr_t here_addr = _Py_get_machine_stack_pointer();
|
||||
uintptr_t top_addr = _Py_SIZE_ROUND_UP(here_addr, 4096);
|
||||
// Add some space for caller function then round to minimum page size
|
||||
// This is a guess at the top of the stack, but should be a reasonably
|
||||
// good guess if called from _PyThreadState_Attach when creating a thread.
|
||||
// If the thread is attached deep in a call stack, then the guess will be poor.
|
||||
#if _Py_STACK_GROWS_DOWN
|
||||
uintptr_t top_addr = _Py_SIZE_ROUND_UP(sp + 8*sizeof(void*), SYSTEM_PAGE_SIZE);
|
||||
*top = top_addr;
|
||||
*base = top_addr - Py_C_STACK_SIZE;
|
||||
# else
|
||||
uintptr_t base_addr = _Py_SIZE_ROUND_DOWN(sp - 8*sizeof(void*), SYSTEM_PAGE_SIZE);
|
||||
*base = base_addr;
|
||||
*top = base_addr + Py_C_STACK_SIZE;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
@ -525,7 +536,8 @@ void
|
|||
_Py_InitializeRecursionLimits(PyThreadState *tstate)
|
||||
{
|
||||
uintptr_t base, top;
|
||||
hardware_stack_limits(&base, &top);
|
||||
uintptr_t here_addr = _Py_get_machine_stack_pointer();
|
||||
hardware_stack_limits(&base, &top, here_addr);
|
||||
assert(top != 0);
|
||||
|
||||
tstate_set_stack(tstate, base, top);
|
||||
|
|
@ -569,7 +581,7 @@ PyUnstable_ThreadState_ResetStackProtection(PyThreadState *tstate)
|
|||
|
||||
|
||||
/* The function _Py_EnterRecursiveCallTstate() only calls _Py_CheckRecursiveCall()
|
||||
if the recursion_depth reaches recursion_limit. */
|
||||
if the stack pointer is between the stack base and c_stack_hard_limit. */
|
||||
int
|
||||
_Py_CheckRecursiveCall(PyThreadState *tstate, const char *where)
|
||||
{
|
||||
|
|
@ -578,10 +590,12 @@ _Py_CheckRecursiveCall(PyThreadState *tstate, const char *where)
|
|||
assert(_tstate->c_stack_soft_limit != 0);
|
||||
assert(_tstate->c_stack_hard_limit != 0);
|
||||
#if _Py_STACK_GROWS_DOWN
|
||||
assert(here_addr >= _tstate->c_stack_hard_limit - _PyOS_STACK_MARGIN_BYTES);
|
||||
if (here_addr < _tstate->c_stack_hard_limit) {
|
||||
/* Overflowing while handling an overflow. Give up. */
|
||||
int kbytes_used = (int)(_tstate->c_stack_top - here_addr)/1024;
|
||||
#else
|
||||
assert(here_addr <= _tstate->c_stack_hard_limit + _PyOS_STACK_MARGIN_BYTES);
|
||||
if (here_addr > _tstate->c_stack_hard_limit) {
|
||||
/* Overflowing while handling an overflow. Give up. */
|
||||
int kbytes_used = (int)(here_addr - _tstate->c_stack_top)/1024;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue