cpython/Python/qsbr.c

/*
 * Implementation of safe memory reclamation scheme using
 * quiescent states.
 *
 * This is dervied from the "GUS" safe memory reclamation technique
 * in FreeBSD written by Jeffrey Roberson. It is heavily modified. Any bugs
 * in this code are likely due to the modifications.
 *
 * The original copyright is preserved below.
 *
 * Copyright (c) 2019,2020 Jeffrey Roberson <jeff@FreeBSD.org>
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice unmodified, this list of conditions, and the following
 *    disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
#include "Python.h"
#include "pycore_initconfig.h"      // _PyStatus_NO_MEMORY()
#include "pycore_lock.h"            // PyMutex_Lock()
#include "pycore_qsbr.h"
#include "pycore_pystate.h"         // _PyThreadState_GET()


// Wrap-around safe comparison. This is a holdover from the FreeBSD
// implementation, which uses 32-bit sequence numbers. We currently use 64-bit
// sequence numbers, so wrap-around is unlikely.
#define QSBR_LT(a, b) ((int64_t)((a)-(b)) < 0)
#define QSBR_LEQ(a, b) ((int64_t)((a)-(b)) <= 0)

// Starting size of the array of qsbr thread states
#define MIN_ARRAY_SIZE 8

// For _Py_qsbr_deferred_advance(): the number of deferrals before advancing
// the write sequence.
#define QSBR_DEFERRED_LIMIT 10

// Allocate a QSBR thread state from the freelist
static struct _qsbr_thread_state *
qsbr_allocate(struct _qsbr_shared *shared)
{
    struct _qsbr_thread_state *qsbr = shared->freelist;
    if (qsbr == NULL) {
        return NULL;
    }
    shared->freelist = qsbr->freelist_next;
    qsbr->freelist_next = NULL;
    qsbr->shared = shared;
    qsbr->allocated = true;
    return qsbr;
}

// Initialize (or reintialize) the freelist of QSBR thread states
static void
initialize_new_array(struct _qsbr_shared *shared)
{
    for (Py_ssize_t i = 0; i != shared->size; i++) {
        struct _qsbr_thread_state *qsbr = &shared->array[i].qsbr;
        if (qsbr->tstate != NULL) {
            // Update the thread state pointer to its QSBR state
            _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)qsbr->tstate;
            tstate->qsbr = qsbr;
        }
        if (!qsbr->allocated) {
            // Push to freelist
            qsbr->freelist_next = shared->freelist;
            shared->freelist = qsbr;
        }
    }
}

// Grow the array of QSBR thread states. Returns 0 on success, -1 on failure.
static int
grow_thread_array(struct _qsbr_shared *shared)
{
    Py_ssize_t new_size = shared->size * 2;
    if (new_size < MIN_ARRAY_SIZE) {
        new_size = MIN_ARRAY_SIZE;
    }

    struct _qsbr_pad *array = PyMem_RawCalloc(new_size, sizeof(*array));
    if (array == NULL) {
        return -1;
    }

    struct _qsbr_pad *old = shared->array;
    if (old != NULL) {
        memcpy(array, shared->array, shared->size * sizeof(*array));
    }

    shared->array = array;
    shared->size = new_size;
    shared->freelist = NULL;
    initialize_new_array(shared);

    PyMem_RawFree(old);
    return 0;
}

uint64_t
_Py_qsbr_advance(struct _qsbr_shared *shared)
{
    // NOTE: with 64-bit sequence numbers, we don't have to worry too much
    // about the wr_seq getting too far ahead of rd_seq, but if we ever use
    // 32-bit sequence numbers, we'll need to be more careful.
    return _Py_atomic_add_uint64(&shared->wr_seq, QSBR_INCR) + QSBR_INCR;
}

uint64_t
_Py_qsbr_deferred_advance(struct _qsbr_thread_state *qsbr)
{
    if (++qsbr->deferrals < QSBR_DEFERRED_LIMIT) {
        return _Py_qsbr_shared_current(qsbr->shared) + QSBR_INCR;
    }
    qsbr->deferrals = 0;
    return _Py_qsbr_advance(qsbr->shared);
}

static uint64_t
qsbr_poll_scan(struct _qsbr_shared *shared)
{
    // Synchronize with store in _Py_qsbr_attach(). We need to ensure that
    // the reads from each thread's sequence number are not reordered to see
    // earlier "offline" states.
    _Py_atomic_fence_seq_cst();

    // Compute the minimum sequence number of all attached threads
    uint64_t min_seq = _Py_atomic_load_uint64(&shared->wr_seq);
    struct _qsbr_pad *array = shared->array;
    for (Py_ssize_t i = 0, size = shared->size; i != size; i++) {
        struct _qsbr_thread_state *qsbr = &array[i].qsbr;

        uint64_t seq = _Py_atomic_load_uint64(&qsbr->seq);
        if (seq != QSBR_OFFLINE && QSBR_LT(seq, min_seq)) {
            min_seq = seq;
        }
    }

    // Update the shared read sequence
    uint64_t rd_seq = _Py_atomic_load_uint64(&shared->rd_seq);
    if (QSBR_LT(rd_seq, min_seq)) {
        // It's okay if the compare-exchange failed: another thread updated it
        (void)_Py_atomic_compare_exchange_uint64(&shared->rd_seq, &rd_seq, min_seq);
        rd_seq = min_seq;
    }

    return rd_seq;
}

bool
_Py_qsbr_poll(struct _qsbr_thread_state *qsbr, uint64_t goal)
{
    assert(_PyThreadState_GET()->state == _Py_THREAD_ATTACHED);

    uint64_t rd_seq = _Py_atomic_load_uint64(&qsbr->shared->rd_seq);
    if (QSBR_LEQ(goal, rd_seq)) {
        return true;
    }

    rd_seq = qsbr_poll_scan(qsbr->shared);
    return QSBR_LEQ(goal, rd_seq);
}

void
_Py_qsbr_attach(struct _qsbr_thread_state *qsbr)
{
    assert(qsbr->seq == 0 && "already attached");

    uint64_t seq = _Py_qsbr_shared_current(qsbr->shared);
    _Py_atomic_store_uint64(&qsbr->seq, seq);  // needs seq_cst
}

void
_Py_qsbr_detach(struct _qsbr_thread_state *qsbr)
{
    assert(qsbr->seq != 0 && "already detached");

    _Py_atomic_store_uint64_release(&qsbr->seq, QSBR_OFFLINE);
}

Py_ssize_t
_Py_qsbr_reserve(PyInterpreterState *interp)
{
    struct _qsbr_shared *shared = &interp->qsbr;

    PyMutex_Lock(&shared->mutex);
    // Try allocating from our internal freelist
    struct _qsbr_thread_state *qsbr = qsbr_allocate(shared);

    // If there are no free entries, we pause all threads, grow the array,
    // and update the pointers in PyThreadState to entries in the new array.
    if (qsbr == NULL) {
        _PyEval_StopTheWorld(interp);
        if (grow_thread_array(shared) == 0) {
            qsbr = qsbr_allocate(shared);
        }
        _PyEval_StartTheWorld(interp);
    }
    PyMutex_Unlock(&shared->mutex);

    if (qsbr == NULL) {
        return -1;
    }

    // Return an index rather than the pointer because the array may be
    // resized and the pointer invalidated.
    return (struct _qsbr_pad *)qsbr - shared->array;
}

void
_Py_qsbr_register(_PyThreadStateImpl *tstate, PyInterpreterState *interp,
                  Py_ssize_t index)
{
    // Associate the QSBR state with the thread state
    struct _qsbr_shared *shared = &interp->qsbr;

    PyMutex_Lock(&shared->mutex);
    struct _qsbr_thread_state *qsbr = &interp->qsbr.array[index].qsbr;
    assert(qsbr->allocated && qsbr->tstate == NULL);
    qsbr->tstate = (PyThreadState *)tstate;
    tstate->qsbr = qsbr;
    PyMutex_Unlock(&shared->mutex);
}

void
_Py_qsbr_unregister(_PyThreadStateImpl *tstate)
{
    struct _qsbr_thread_state *qsbr = tstate->qsbr;
    struct _qsbr_shared *shared = qsbr->shared;

    assert(qsbr->seq == 0 && "thread state must be detached");

    PyMutex_Lock(&shared->mutex);
    assert(qsbr->allocated && qsbr->tstate == (PyThreadState *)tstate);
    tstate->qsbr = NULL;
    qsbr->tstate = NULL;
    qsbr->allocated = false;
    qsbr->freelist_next = shared->freelist;
    shared->freelist = qsbr;
    PyMutex_Unlock(&shared->mutex);
}

void
_Py_qsbr_fini(PyInterpreterState *interp)
{
    struct _qsbr_shared *shared = &interp->qsbr;
    PyMem_RawFree(shared->array);
    shared->array = NULL;
    shared->size = 0;
    shared->freelist = NULL;
}

void
_Py_qsbr_after_fork(_PyThreadStateImpl *tstate)
{
    struct _qsbr_thread_state *this_qsbr = tstate->qsbr;
    struct _qsbr_shared *shared = this_qsbr->shared;

    _PyMutex_at_fork_reinit(&shared->mutex);

    for (Py_ssize_t i = 0; i != shared->size; i++) {
        struct _qsbr_thread_state *qsbr = &shared->array[i].qsbr;
        if (qsbr != this_qsbr && qsbr->allocated) {
            qsbr->tstate = NULL;
            qsbr->allocated = false;
            qsbr->freelist_next = shared->freelist;
            shared->freelist = qsbr;
        }
    }
}
gh-115103: Implement delayed memory reclamation (QSBR) (#115180) This adds a safe memory reclamation scheme based on FreeBSD's "GUS" and quiescent state based reclamation (QSBR). The API provides a mechanism for callers to detect when it is safe to free memory that may be concurrently accessed by readers. 2024-02-16 15:25:19 -05:00			`/*`
			`* Implementation of safe memory reclamation scheme using`
			`* quiescent states.`
			`*`
			`* This is dervied from the "GUS" safe memory reclamation technique`
			`* in FreeBSD written by Jeffrey Roberson. It is heavily modified. Any bugs`
			`* in this code are likely due to the modifications.`
			`*`
			`* The original copyright is preserved below.`
			`*`
			`* Copyright (c) 2019,2020 Jeffrey Roberson <jeff@FreeBSD.org>`
			`*`
			`* Redistribution and use in source and binary forms, with or without`
			`* modification, are permitted provided that the following conditions`
			`* are met:`
			`* 1. Redistributions of source code must retain the above copyright`
			`* notice unmodified, this list of conditions, and the following`
			`* disclaimer.`
			`* 2. Redistributions in binary form must reproduce the above copyright`
			`* notice, this list of conditions and the following disclaimer in the`
			`* documentation and/or other materials provided with the distribution.`
			`*`
			* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
			`* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES`
			`* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.`
			`* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,`
			`* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT`
			`* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,`
			`* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY`
			`* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT`
			`* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF`
			`* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.`
			`*/`
			`#include "Python.h"`
			`#include "pycore_initconfig.h" // _PyStatus_NO_MEMORY()`
			`#include "pycore_lock.h" // PyMutex_Lock()`
			`#include "pycore_qsbr.h"`
			`#include "pycore_pystate.h" // _PyThreadState_GET()`


			`// Wrap-around safe comparison. This is a holdover from the FreeBSD`
			`// implementation, which uses 32-bit sequence numbers. We currently use 64-bit`
			`// sequence numbers, so wrap-around is unlikely.`
			`#define QSBR_LT(a, b) ((int64_t)((a)-(b)) < 0)`
			`#define QSBR_LEQ(a, b) ((int64_t)((a)-(b)) <= 0)`

			`// Starting size of the array of qsbr thread states`
			`#define MIN_ARRAY_SIZE 8`

			`// For _Py_qsbr_deferred_advance(): the number of deferrals before advancing`
			`// the write sequence.`
			`#define QSBR_DEFERRED_LIMIT 10`

			`// Allocate a QSBR thread state from the freelist`
			`static struct _qsbr_thread_state *`
			`qsbr_allocate(struct _qsbr_shared *shared)`
			`{`
			`struct _qsbr_thread_state *qsbr = shared->freelist;`
			`if (qsbr == NULL) {`
			`return NULL;`
			`}`
			`shared->freelist = qsbr->freelist_next;`
			`qsbr->freelist_next = NULL;`
			`qsbr->shared = shared;`
			`qsbr->allocated = true;`
			`return qsbr;`
			`}`

			`// Initialize (or reintialize) the freelist of QSBR thread states`
			`static void`
			`initialize_new_array(struct _qsbr_shared *shared)`
			`{`
			`for (Py_ssize_t i = 0; i != shared->size; i++) {`
			`struct _qsbr_thread_state *qsbr = &shared->array[i].qsbr;`
			`if (qsbr->tstate != NULL) {`
			`// Update the thread state pointer to its QSBR state`
			`_PyThreadStateImpl tstate = (_PyThreadStateImpl )qsbr->tstate;`
			`tstate->qsbr = qsbr;`
			`}`
			`if (!qsbr->allocated) {`
			`// Push to freelist`
			`qsbr->freelist_next = shared->freelist;`
			`shared->freelist = qsbr;`
			`}`
			`}`
			`}`

			`// Grow the array of QSBR thread states. Returns 0 on success, -1 on failure.`
			`static int`
			`grow_thread_array(struct _qsbr_shared *shared)`
			`{`
			`Py_ssize_t new_size = shared->size * 2;`
			`if (new_size < MIN_ARRAY_SIZE) {`
			`new_size = MIN_ARRAY_SIZE;`
			`}`

			`struct _qsbr_pad array = PyMem_RawCalloc(new_size, sizeof(array));`
			`if (array == NULL) {`
			`return -1;`
			`}`

			`struct _qsbr_pad *old = shared->array;`
			`if (old != NULL) {`
			`memcpy(array, shared->array, shared->size * sizeof(*array));`
			`}`

			`shared->array = array;`
			`shared->size = new_size;`
			`shared->freelist = NULL;`
			`initialize_new_array(shared);`

			`PyMem_RawFree(old);`
			`return 0;`
			`}`

			`uint64_t`
			`_Py_qsbr_advance(struct _qsbr_shared *shared)`
			`{`
			`// NOTE: with 64-bit sequence numbers, we don't have to worry too much`
			`// about the wr_seq getting too far ahead of rd_seq, but if we ever use`
			`// 32-bit sequence numbers, we'll need to be more careful.`
			`return _Py_atomic_add_uint64(&shared->wr_seq, QSBR_INCR) + QSBR_INCR;`
			`}`

			`uint64_t`
			`_Py_qsbr_deferred_advance(struct _qsbr_thread_state *qsbr)`
			`{`
			`if (++qsbr->deferrals < QSBR_DEFERRED_LIMIT) {`
			`return _Py_qsbr_shared_current(qsbr->shared) + QSBR_INCR;`
			`}`
			`qsbr->deferrals = 0;`
			`return _Py_qsbr_advance(qsbr->shared);`
			`}`

			`static uint64_t`
			`qsbr_poll_scan(struct _qsbr_shared *shared)`
			`{`
			`// Synchronize with store in _Py_qsbr_attach(). We need to ensure that`
			`// the reads from each thread's sequence number are not reordered to see`
			`// earlier "offline" states.`
			`_Py_atomic_fence_seq_cst();`

			`// Compute the minimum sequence number of all attached threads`
			`uint64_t min_seq = _Py_atomic_load_uint64(&shared->wr_seq);`
			`struct _qsbr_pad *array = shared->array;`
			`for (Py_ssize_t i = 0, size = shared->size; i != size; i++) {`
			`struct _qsbr_thread_state *qsbr = &array[i].qsbr;`

			`uint64_t seq = _Py_atomic_load_uint64(&qsbr->seq);`
			`if (seq != QSBR_OFFLINE && QSBR_LT(seq, min_seq)) {`
			`min_seq = seq;`
			`}`
			`}`

			`// Update the shared read sequence`
			`uint64_t rd_seq = _Py_atomic_load_uint64(&shared->rd_seq);`
			`if (QSBR_LT(rd_seq, min_seq)) {`
			`// It's okay if the compare-exchange failed: another thread updated it`
			`(void)_Py_atomic_compare_exchange_uint64(&shared->rd_seq, &rd_seq, min_seq);`
			`rd_seq = min_seq;`
			`}`

			`return rd_seq;`
			`}`

			`bool`
			`_Py_qsbr_poll(struct _qsbr_thread_state *qsbr, uint64_t goal)`
			`{`
			`assert(_PyThreadState_GET()->state == _Py_THREAD_ATTACHED);`

			`uint64_t rd_seq = _Py_atomic_load_uint64(&qsbr->shared->rd_seq);`
			`if (QSBR_LEQ(goal, rd_seq)) {`
			`return true;`
			`}`

			`rd_seq = qsbr_poll_scan(qsbr->shared);`
			`return QSBR_LEQ(goal, rd_seq);`
			`}`

			`void`
			`_Py_qsbr_attach(struct _qsbr_thread_state *qsbr)`
			`{`
			`assert(qsbr->seq == 0 && "already attached");`

			`uint64_t seq = _Py_qsbr_shared_current(qsbr->shared);`
			`_Py_atomic_store_uint64(&qsbr->seq, seq); // needs seq_cst`
			`}`

			`void`
			`_Py_qsbr_detach(struct _qsbr_thread_state *qsbr)`
			`{`
			`assert(qsbr->seq != 0 && "already detached");`

			`_Py_atomic_store_uint64_release(&qsbr->seq, QSBR_OFFLINE);`
			`}`

			`Py_ssize_t`
			`_Py_qsbr_reserve(PyInterpreterState *interp)`
			`{`
			`struct _qsbr_shared *shared = &interp->qsbr;`

			`PyMutex_Lock(&shared->mutex);`
			`// Try allocating from our internal freelist`
			`struct _qsbr_thread_state *qsbr = qsbr_allocate(shared);`

			`// If there are no free entries, we pause all threads, grow the array,`
			`// and update the pointers in PyThreadState to entries in the new array.`
			`if (qsbr == NULL) {`
			`_PyEval_StopTheWorld(interp);`
			`if (grow_thread_array(shared) == 0) {`
			`qsbr = qsbr_allocate(shared);`
			`}`
			`_PyEval_StartTheWorld(interp);`
			`}`
			`PyMutex_Unlock(&shared->mutex);`

			`if (qsbr == NULL) {`
			`return -1;`
			`}`

			`// Return an index rather than the pointer because the array may be`
			`// resized and the pointer invalidated.`
			`return (struct _qsbr_pad *)qsbr - shared->array;`
			`}`

			`void`
			`_Py_qsbr_register(_PyThreadStateImpl tstate, PyInterpreterState interp,`
			`Py_ssize_t index)`
			`{`
			`// Associate the QSBR state with the thread state`
			`struct _qsbr_shared *shared = &interp->qsbr;`

			`PyMutex_Lock(&shared->mutex);`
			`struct _qsbr_thread_state *qsbr = &interp->qsbr.array[index].qsbr;`
			`assert(qsbr->allocated && qsbr->tstate == NULL);`
			`qsbr->tstate = (PyThreadState *)tstate;`
			`tstate->qsbr = qsbr;`
			`PyMutex_Unlock(&shared->mutex);`
			`}`

			`void`
			`_Py_qsbr_unregister(_PyThreadStateImpl *tstate)`
			`{`
			`struct _qsbr_thread_state *qsbr = tstate->qsbr;`
			`struct _qsbr_shared *shared = qsbr->shared;`

			`assert(qsbr->seq == 0 && "thread state must be detached");`

			`PyMutex_Lock(&shared->mutex);`
			`assert(qsbr->allocated && qsbr->tstate == (PyThreadState *)tstate);`
			`tstate->qsbr = NULL;`
			`qsbr->tstate = NULL;`
			`qsbr->allocated = false;`
			`qsbr->freelist_next = shared->freelist;`
			`shared->freelist = qsbr;`
			`PyMutex_Unlock(&shared->mutex);`
			`}`

			`void`
			`_Py_qsbr_fini(PyInterpreterState *interp)`
			`{`
			`struct _qsbr_shared *shared = &interp->qsbr;`
			`PyMem_RawFree(shared->array);`
			`shared->array = NULL;`
			`shared->size = 0;`
			`shared->freelist = NULL;`
			`}`

			`void`
			`_Py_qsbr_after_fork(_PyThreadStateImpl *tstate)`
			`{`
			`struct _qsbr_thread_state *this_qsbr = tstate->qsbr;`
			`struct _qsbr_shared *shared = this_qsbr->shared;`

			`_PyMutex_at_fork_reinit(&shared->mutex);`

			`for (Py_ssize_t i = 0; i != shared->size; i++) {`
			`struct _qsbr_thread_state *qsbr = &shared->array[i].qsbr;`
			`if (qsbr != this_qsbr && qsbr->allocated) {`
			`qsbr->tstate = NULL;`
			`qsbr->allocated = false;`
			`qsbr->freelist_next = shared->freelist;`
			`shared->freelist = qsbr;`
			`}`
			`}`
			`}`