[3.14] gh-145615: Fix mimalloc page leak in the free-threaded build (gh-145626) (#145691)

Fix three issues that caused mimalloc pages to be leaked until the
owning thread exited:

1. In _PyMem_mi_page_maybe_free(), move pages out of the full queue
   when relying on QSBR to defer freeing the page. Pages in the full
   queue are never searched by mi_page_queue_find_free_ex(), so a page
   left there is unusable for allocations.

2. Move _PyMem_mi_page_clear_qsbr() from _mi_page_free_collect() to
   _mi_page_thread_free_collect() where it only fires when all blocks
   on the page are free (used == 0). The previous placement was too
   broad: it cleared QSBR state whenever local_free was non-NULL, but
   _mi_page_free_collect() is called from non-allocation paths (e.g.,
   page visiting in mi_heap_visit_blocks) where the page is not being
   reused.

3. In _PyMem_mi_page_maybe_free(), use the page's heap tld to find the
   correct thread state for QSBR list insertion instead of
   PyThreadState_GET(). During stop-the-world pauses, the function may
   process pages belonging to other threads, so the current thread
   state is not necessarily the owner of the page.

(cherry picked from commit d76df75f51)

Co-authored-by: Sam Gross <colesbury@gmail.com>
This commit is contained in:
Miss Islington (bot) 2026-03-09 19:17:57 +01:00 committed by GitHub
parent 7c624d4f31
commit ba1ea3a85a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 40 additions and 10 deletions

View file

@ -0,0 +1,2 @@
Fixed a memory leak in the :term:`free-threaded build` where mimalloc pages
could become permanently unreclaimable until the owning thread exited.

View file

@ -100,7 +100,10 @@ static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t
// note: this will free retired pages as well.
bool freed = _PyMem_mi_page_maybe_free(page, pq, collect >= MI_FORCE);
if (!freed && collect == MI_ABANDON) {
_mi_page_abandon(page, pq);
// _PyMem_mi_page_maybe_free may have moved the page to a different
// page queue, so we need to re-fetch the correct queue.
uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->xblock_size));
_mi_page_abandon(page, &heap->pages[bin]);
}
}
else if (collect == MI_ABANDON) {

View file

@ -213,6 +213,13 @@ static void _mi_page_thread_free_collect(mi_page_t* page)
// update counts now
page->used -= count;
if (page->used == 0) {
// The page may have had a QSBR goal set from a previous point when it
// was all-free. That goal is no longer valid because the page was
// allocated from and then freed again by other threads.
_PyMem_mi_page_clear_qsbr(page);
}
}
void _mi_page_free_collect(mi_page_t* page, bool force) {
@ -225,9 +232,6 @@ void _mi_page_free_collect(mi_page_t* page, bool force) {
// and the local free list
if (page->local_free != NULL) {
// any previous QSBR goals are no longer valid because we reused the page
_PyMem_mi_page_clear_qsbr(page);
if mi_likely(page->free == NULL) {
// usual case
page->free = page->local_free;

View file

@ -1286,6 +1286,7 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t slices_needed, s
_mi_stat_decrease(&tld->stats->pages_abandoned, 1);
#ifdef Py_GIL_DISABLED
page->qsbr_goal = 0;
mi_assert_internal(page->qsbr_node.next == NULL);
#endif
segment->abandoned--;
slice = mi_segment_page_clear(page, tld); // re-assign slice due to coalesce!
@ -1361,6 +1362,7 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap,
// if everything free by now, free the page
#ifdef Py_GIL_DISABLED
page->qsbr_goal = 0;
mi_assert_internal(page->qsbr_node.next == NULL);
#endif
slice = mi_segment_page_clear(page, tld); // set slice again due to coalesceing
}

View file

@ -149,6 +149,12 @@ should_advance_qsbr_for_page(struct _qsbr_thread_state *qsbr, mi_page_t *page)
}
return false;
}
static _PyThreadStateImpl *
tstate_from_heap(mi_heap_t *heap)
{
return _Py_CONTAINER_OF(heap->tld, _PyThreadStateImpl, mimalloc.tld);
}
#endif
static bool
@ -157,23 +163,35 @@ _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force)
#ifdef Py_GIL_DISABLED
assert(mi_page_all_free(page));
if (page->use_qsbr) {
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET();
if (page->qsbr_goal != 0 && _Py_qbsr_goal_reached(tstate->qsbr, page->qsbr_goal)) {
struct _qsbr_thread_state *qsbr = ((_PyThreadStateImpl *)PyThreadState_GET())->qsbr;
if (page->qsbr_goal != 0 && _Py_qbsr_goal_reached(qsbr, page->qsbr_goal)) {
_PyMem_mi_page_clear_qsbr(page);
_mi_page_free(page, pq, force);
return true;
}
// gh-145615: since we are not freeing this page yet, we want to
// make it available for allocations. Note that the QSBR goal and
// linked list node remain set even if the page is later used for
// an allocation. We only detect and clear the QSBR goal when the
// page becomes empty again (used == 0).
if (mi_page_is_in_full(page)) {
_mi_page_unfull(page);
}
_PyMem_mi_page_clear_qsbr(page);
page->retire_expire = 0;
if (should_advance_qsbr_for_page(tstate->qsbr, page)) {
page->qsbr_goal = _Py_qsbr_advance(tstate->qsbr->shared);
if (should_advance_qsbr_for_page(qsbr, page)) {
page->qsbr_goal = _Py_qsbr_advance(qsbr->shared);
}
else {
page->qsbr_goal = _Py_qsbr_shared_next(tstate->qsbr->shared);
page->qsbr_goal = _Py_qsbr_shared_next(qsbr->shared);
}
// We may be freeing a page belonging to a different thread during a
// stop-the-world event. Find the _PyThreadStateImpl for the page.
_PyThreadStateImpl *tstate = tstate_from_heap(mi_page_heap(page));
llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
return false;
}
@ -190,7 +208,8 @@ _PyMem_mi_page_reclaimed(mi_page_t *page)
if (page->qsbr_goal != 0) {
if (mi_page_all_free(page)) {
assert(page->qsbr_node.next == NULL);
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET();
_PyThreadStateImpl *tstate = tstate_from_heap(mi_page_heap(page));
assert(tstate == (_PyThreadStateImpl *)_PyThreadState_GET());
page->retire_expire = 0;
llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
}