GH-139291: Fix C stack limits by factoring out finding hardware stack limits (GH-139294)

This commit is contained in:
Mark Shannon 2025-10-07 14:04:37 +01:00 committed by GitHub
parent 0e2cdd313b
commit 7094f09f54
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -438,31 +438,26 @@ int pthread_attr_destroy(pthread_attr_t *a)
#endif
void
_Py_InitializeRecursionLimits(PyThreadState *tstate)
static void
hardware_stack_limits(uintptr_t *top, uintptr_t *base)
{
_PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
#ifdef WIN32
ULONG_PTR low, high;
GetCurrentThreadStackLimits(&low, &high);
_tstate->c_stack_top = (uintptr_t)high;
*top = (uintptr_t)high;
ULONG guarantee = 0;
SetThreadStackGuarantee(&guarantee);
_tstate->c_stack_hard_limit = ((uintptr_t)low) + guarantee + _PyOS_STACK_MARGIN_BYTES;
_tstate->c_stack_soft_limit = _tstate->c_stack_hard_limit + _PyOS_STACK_MARGIN_BYTES;
*base = (uintptr_t)low + guarantee;
#elif defined(__APPLE__)
pthread_t this_thread = pthread_self();
void *stack_addr = pthread_get_stackaddr_np(this_thread); // top of the stack
size_t stack_size = pthread_get_stacksize_np(this_thread);
_tstate->c_stack_top = (uintptr_t)stack_addr;
_tstate->c_stack_hard_limit = _tstate->c_stack_top - stack_size;
_tstate->c_stack_soft_limit = _tstate->c_stack_hard_limit + _PyOS_STACK_MARGIN_BYTES;
*top = (uintptr_t)stack_addr;
*base = ((uintptr_t)stack_addr) - stack_size;
#else
uintptr_t here_addr = _Py_get_machine_stack_pointer();
/// XXX musl supports HAVE_PTHRED_GETATTR_NP, but the resulting stack size
/// (on alpine at least) is much smaller than expected and imposes undue limits
/// compared to the old stack size estimation. (We assume musl is not glibc.)
/// XXX musl supports HAVE_PTHRED_GETATTR_NP, but the resulting stack size
/// (on alpine at least) is much smaller than expected and imposes undue limits
/// compared to the old stack size estimation. (We assume musl is not glibc.)
# if defined(HAVE_PTHREAD_GETATTR_NP) && !defined(_AIX) && \
!defined(__NetBSD__) && (defined(__GLIBC__) || !defined(__linux__))
size_t stack_size, guard_size;
@ -475,26 +470,35 @@ _Py_InitializeRecursionLimits(PyThreadState *tstate)
err |= pthread_attr_destroy(&attr);
}
if (err == 0) {
uintptr_t base = ((uintptr_t)stack_addr) + guard_size;
_tstate->c_stack_top = base + stack_size;
#ifdef _Py_THREAD_SANITIZER
// Thread sanitizer crashes if we use a bit more than half the stack.
_tstate->c_stack_soft_limit = base + (stack_size / 2);
#else
_tstate->c_stack_soft_limit = base + _PyOS_STACK_MARGIN_BYTES * 2;
#endif
_tstate->c_stack_hard_limit = base + _PyOS_STACK_MARGIN_BYTES;
assert(_tstate->c_stack_soft_limit < here_addr);
assert(here_addr < _tstate->c_stack_top);
*base = ((uintptr_t)stack_addr) + guard_size;
*top = (uintptr_t)stack_addr + stack_size;
return;
}
# endif
_tstate->c_stack_top = _Py_SIZE_ROUND_UP(here_addr, 4096);
_tstate->c_stack_soft_limit = _tstate->c_stack_top - Py_C_STACK_SIZE;
_tstate->c_stack_hard_limit = _tstate->c_stack_top - (Py_C_STACK_SIZE + _PyOS_STACK_MARGIN_BYTES);
uintptr_t here_addr = _Py_get_machine_stack_pointer();
uintptr_t top_addr = _Py_SIZE_ROUND_UP(here_addr, 4096);
*top = top_addr;
*base = top_addr - Py_C_STACK_SIZE;
#endif
}
void
_Py_InitializeRecursionLimits(PyThreadState *tstate)
{
uintptr_t top;
uintptr_t base;
hardware_stack_limits(&top, &base);
#ifdef _Py_THREAD_SANITIZER
// Thread sanitizer crashes if we use more than half the stack.
uintptr_t stacksize = top - base;
base += stacksize/2;
#endif
_PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
_tstate->c_stack_top = top;
_tstate->c_stack_hard_limit = base + _PyOS_STACK_MARGIN_BYTES;
_tstate->c_stack_soft_limit = base + _PyOS_STACK_MARGIN_BYTES * 2;
}
/* The function _Py_EnterRecursiveCallTstate() only calls _Py_CheckRecursiveCall()
if the recursion_depth reaches recursion_limit. */
int