GH-139653: Only raise an exception (or fatal error) when the stack pointer is about to overflow the stack. (GH-141711)

Only raises if the stack pointer is both below the limit *and* above the stack base.
This prevents false positives for user-space threads, as the stack pointer will be outside those bounds
if the stack has been swapped.
This commit is contained in:
Mark Shannon 2025-11-19 10:16:24 +00:00 committed by GitHub
parent 5d1f8f2d03
commit c25a070759
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 38 additions and 10 deletions

View file

@ -217,10 +217,13 @@ extern void _PyEval_DeactivateOpCache(void);
static inline int _Py_MakeRecCheck(PyThreadState *tstate) { static inline int _Py_MakeRecCheck(PyThreadState *tstate) {
uintptr_t here_addr = _Py_get_machine_stack_pointer(); uintptr_t here_addr = _Py_get_machine_stack_pointer();
_PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
// Overflow if stack pointer is between soft limit and the base of the hardware stack.
// If it is below the hardware stack base, assume that we have the wrong stack limits, and do nothing.
// We could have the wrong stack limits because of limited platform support, or user-space threads.
#if _Py_STACK_GROWS_DOWN #if _Py_STACK_GROWS_DOWN
return here_addr < _tstate->c_stack_soft_limit; return here_addr < _tstate->c_stack_soft_limit && here_addr >= _tstate->c_stack_soft_limit - 2 * _PyOS_STACK_MARGIN_BYTES;
#else #else
return here_addr > _tstate->c_stack_soft_limit; return here_addr > _tstate->c_stack_soft_limit && here_addr <= _tstate->c_stack_soft_limit + 2 * _PyOS_STACK_MARGIN_BYTES;
#endif #endif
} }

View file

@ -38,12 +38,19 @@ # Stack Protection
```python ```python
kb_used = (stack_top - stack_pointer)>>10 kb_used = (stack_top - stack_pointer)>>10
if stack_pointer < hard_limit: if stack_pointer < bottom_of_machine_stack:
pass # Our stack limits could be wrong so it is safest to do nothing.
elif stack_pointer < hard_limit:
FatalError(f"Unrecoverable stack overflow (used {kb_used} kB)") FatalError(f"Unrecoverable stack overflow (used {kb_used} kB)")
elif stack_pointer < soft_limit: elif stack_pointer < soft_limit:
raise RecursionError(f"Stack overflow (used {kb_used} kB)") raise RecursionError(f"Stack overflow (used {kb_used} kB)")
``` ```
### User space threads and other oddities
Some libraries provide user-space threads. These will change the C stack at runtime.
To guard against this we only raise if the stack pointer is in the window between the expected stack base and the soft limit.
### Diagnosing and fixing stack overflows ### Diagnosing and fixing stack overflows
For stack protection to work correctly the amount of stack consumed between calls to `_Py_EnterRecursiveCall()` must be less than `_PyOS_STACK_MARGIN_BYTES`. For stack protection to work correctly the amount of stack consumed between calls to `_Py_EnterRecursiveCall()` must be less than `_PyOS_STACK_MARGIN_BYTES`.

View file

@ -0,0 +1,4 @@
Only raise a ``RecursionError`` or trigger a fatal error if the stack
pointer is both below the limit pointer *and* above the stack base. If
outside of these bounds assume that it is OK. This prevents false positives
when user-space threads swap stacks.

View file

@ -362,9 +362,11 @@ _Py_ReachedRecursionLimitWithMargin(PyThreadState *tstate, int margin_count)
_Py_InitializeRecursionLimits(tstate); _Py_InitializeRecursionLimits(tstate);
} }
#if _Py_STACK_GROWS_DOWN #if _Py_STACK_GROWS_DOWN
return here_addr <= _tstate->c_stack_soft_limit + margin_count * _PyOS_STACK_MARGIN_BYTES; return here_addr <= _tstate->c_stack_soft_limit + margin_count * _PyOS_STACK_MARGIN_BYTES &&
here_addr >= _tstate->c_stack_soft_limit - 2 * _PyOS_STACK_MARGIN_BYTES;
#else #else
return here_addr > _tstate->c_stack_soft_limit - margin_count * _PyOS_STACK_MARGIN_BYTES; return here_addr > _tstate->c_stack_soft_limit - margin_count * _PyOS_STACK_MARGIN_BYTES &&
here_addr <= _tstate->c_stack_soft_limit + 2 * _PyOS_STACK_MARGIN_BYTES;
#endif #endif
} }
@ -455,7 +457,7 @@ int pthread_attr_destroy(pthread_attr_t *a)
#endif #endif
static void static void
hardware_stack_limits(uintptr_t *base, uintptr_t *top) hardware_stack_limits(uintptr_t *base, uintptr_t *top, uintptr_t sp)
{ {
#ifdef WIN32 #ifdef WIN32
ULONG_PTR low, high; ULONG_PTR low, high;
@ -491,10 +493,19 @@ hardware_stack_limits(uintptr_t *base, uintptr_t *top)
return; return;
} }
# endif # endif
uintptr_t here_addr = _Py_get_machine_stack_pointer(); // Add some space for caller function then round to minimum page size
uintptr_t top_addr = _Py_SIZE_ROUND_UP(here_addr, 4096); // This is a guess at the top of the stack, but should be a reasonably
// good guess if called from _PyThreadState_Attach when creating a thread.
// If the thread is attached deep in a call stack, then the guess will be poor.
#if _Py_STACK_GROWS_DOWN
uintptr_t top_addr = _Py_SIZE_ROUND_UP(sp + 8*sizeof(void*), SYSTEM_PAGE_SIZE);
*top = top_addr; *top = top_addr;
*base = top_addr - Py_C_STACK_SIZE; *base = top_addr - Py_C_STACK_SIZE;
# else
uintptr_t base_addr = _Py_SIZE_ROUND_DOWN(sp - 8*sizeof(void*), SYSTEM_PAGE_SIZE);
*base = base_addr;
*top = base_addr + Py_C_STACK_SIZE;
#endif
#endif #endif
} }
@ -543,7 +554,8 @@ void
_Py_InitializeRecursionLimits(PyThreadState *tstate) _Py_InitializeRecursionLimits(PyThreadState *tstate)
{ {
uintptr_t base, top; uintptr_t base, top;
hardware_stack_limits(&base, &top); uintptr_t here_addr = _Py_get_machine_stack_pointer();
hardware_stack_limits(&base, &top, here_addr);
assert(top != 0); assert(top != 0);
tstate_set_stack(tstate, base, top); tstate_set_stack(tstate, base, top);
@ -587,7 +599,7 @@ PyUnstable_ThreadState_ResetStackProtection(PyThreadState *tstate)
/* The function _Py_EnterRecursiveCallTstate() only calls _Py_CheckRecursiveCall() /* The function _Py_EnterRecursiveCallTstate() only calls _Py_CheckRecursiveCall()
if the recursion_depth reaches recursion_limit. */ if the stack pointer is between the stack base and c_stack_hard_limit. */
int int
_Py_CheckRecursiveCall(PyThreadState *tstate, const char *where) _Py_CheckRecursiveCall(PyThreadState *tstate, const char *where)
{ {
@ -596,10 +608,12 @@ _Py_CheckRecursiveCall(PyThreadState *tstate, const char *where)
assert(_tstate->c_stack_soft_limit != 0); assert(_tstate->c_stack_soft_limit != 0);
assert(_tstate->c_stack_hard_limit != 0); assert(_tstate->c_stack_hard_limit != 0);
#if _Py_STACK_GROWS_DOWN #if _Py_STACK_GROWS_DOWN
assert(here_addr >= _tstate->c_stack_hard_limit - _PyOS_STACK_MARGIN_BYTES);
if (here_addr < _tstate->c_stack_hard_limit) { if (here_addr < _tstate->c_stack_hard_limit) {
/* Overflowing while handling an overflow. Give up. */ /* Overflowing while handling an overflow. Give up. */
int kbytes_used = (int)(_tstate->c_stack_top - here_addr)/1024; int kbytes_used = (int)(_tstate->c_stack_top - here_addr)/1024;
#else #else
assert(here_addr <= _tstate->c_stack_hard_limit + _PyOS_STACK_MARGIN_BYTES);
if (here_addr > _tstate->c_stack_hard_limit) { if (here_addr > _tstate->c_stack_hard_limit) {
/* Overflowing while handling an overflow. Give up. */ /* Overflowing while handling an overflow. Give up. */
int kbytes_used = (int)(here_addr - _tstate->c_stack_top)/1024; int kbytes_used = (int)(here_addr - _tstate->c_stack_top)/1024;