| 
									
										
										
										
											2024-02-16 15:25:19 -05:00
										 |  |  | // The QSBR APIs (quiescent state-based reclamation) provide a mechanism for
 | 
					
						
							|  |  |  | // the free-threaded build to safely reclaim memory when there may be
 | 
					
						
							|  |  |  | // concurrent accesses.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // Many operations in the free-threaded build are protected by locks. However,
 | 
					
						
							|  |  |  | // in some cases, we want to allow reads to happen concurrently with updates.
 | 
					
						
							|  |  |  | // In this case, we need to delay freeing ("reclaiming") any memory that may be
 | 
					
						
							|  |  |  | // concurrently accessed by a reader. The QSBR APIs provide a way to do this.
 | 
					
						
							|  |  |  | #ifndef Py_INTERNAL_QSBR_H
 | 
					
						
							|  |  |  | #define Py_INTERNAL_QSBR_H
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include <stdbool.h>
 | 
					
						
							|  |  |  | #include <stdint.h>
 | 
					
						
							|  |  |  | #include "pycore_lock.h"        // PyMutex
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef __cplusplus
 | 
					
						
							|  |  |  | extern "C" { | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifndef Py_BUILD_CORE
 | 
					
						
							|  |  |  | #  error "this header requires Py_BUILD_CORE define"
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // The shared write sequence is always odd and incremented by two. Detached
 | 
					
						
							|  |  |  | // threads are indicated by a read sequence of zero. This avoids collisions
 | 
					
						
							|  |  |  | // between the offline state and any valid sequence number even if the
 | 
					
						
							|  |  |  | // sequences numbers wrap around.
 | 
					
						
							|  |  |  | #define QSBR_OFFLINE 0
 | 
					
						
							|  |  |  | #define QSBR_INITIAL 1
 | 
					
						
							|  |  |  | #define QSBR_INCR    2
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-06 09:42:11 -05:00
										 |  |  | // Wrap-around safe comparison. This is a holdover from the FreeBSD
 | 
					
						
							|  |  |  | // implementation, which uses 32-bit sequence numbers. We currently use 64-bit
 | 
					
						
							|  |  |  | // sequence numbers, so wrap-around is unlikely.
 | 
					
						
							|  |  |  | #define QSBR_LT(a, b) ((int64_t)((a)-(b)) < 0)
 | 
					
						
							|  |  |  | #define QSBR_LEQ(a, b) ((int64_t)((a)-(b)) <= 0)
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-02-16 15:25:19 -05:00
										 |  |  | struct _qsbr_shared; | 
					
						
							|  |  |  | struct _PyThreadStateImpl;  // forward declare to avoid circular dependency
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Per-thread state
 | 
					
						
							|  |  |  | struct _qsbr_thread_state { | 
					
						
							|  |  |  |     // Last observed write sequence (or 0 if detached)
 | 
					
						
							|  |  |  |     uint64_t seq; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // Shared (per-interpreter) QSBR state
 | 
					
						
							|  |  |  |     struct _qsbr_shared *shared; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // Thread state (or NULL)
 | 
					
						
							|  |  |  |     PyThreadState *tstate; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // Used to defer advancing write sequence a fixed number of times
 | 
					
						
							|  |  |  |     int deferrals; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // Is this thread state allocated?
 | 
					
						
							|  |  |  |     bool allocated; | 
					
						
							|  |  |  |     struct _qsbr_thread_state *freelist_next; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Padding to avoid false sharing
 | 
					
						
							|  |  |  | struct _qsbr_pad { | 
					
						
							|  |  |  |     struct _qsbr_thread_state qsbr; | 
					
						
							|  |  |  |     char __padding[64 - sizeof(struct _qsbr_thread_state)]; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Per-interpreter state
 | 
					
						
							|  |  |  | struct _qsbr_shared { | 
					
						
							|  |  |  |     // Write sequence: always odd, incremented by two
 | 
					
						
							|  |  |  |     uint64_t wr_seq; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // Minimum observed read sequence of all QSBR thread states
 | 
					
						
							|  |  |  |     uint64_t rd_seq; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // Array of QSBR thread states.
 | 
					
						
							|  |  |  |     struct _qsbr_pad *array; | 
					
						
							|  |  |  |     Py_ssize_t size; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // Freelist of unused _qsbr_thread_states (protected by mutex)
 | 
					
						
							|  |  |  |     PyMutex mutex; | 
					
						
							|  |  |  |     struct _qsbr_thread_state *freelist; | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline uint64_t | 
					
						
							|  |  |  | _Py_qsbr_shared_current(struct _qsbr_shared *shared) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     return _Py_atomic_load_uint64_acquire(&shared->wr_seq); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Reports a quiescent state: the caller no longer holds any pointer to shared
 | 
					
						
							|  |  |  | // data not protected by locks or reference counts.
 | 
					
						
							|  |  |  | static inline void | 
					
						
							|  |  |  | _Py_qsbr_quiescent_state(struct _qsbr_thread_state *qsbr) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     uint64_t seq = _Py_qsbr_shared_current(qsbr->shared); | 
					
						
							|  |  |  |     _Py_atomic_store_uint64_release(&qsbr->seq, seq); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-06 09:42:11 -05:00
										 |  |  | // Have the read sequences advanced to the given goal? Like `_Py_qsbr_poll()`,
 | 
					
						
							|  |  |  | // but does not perform a scan of threads.
 | 
					
						
							|  |  |  | static inline bool | 
					
						
							|  |  |  | _Py_qbsr_goal_reached(struct _qsbr_thread_state *qsbr, uint64_t goal) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     uint64_t rd_seq = _Py_atomic_load_uint64(&qsbr->shared->rd_seq); | 
					
						
							|  |  |  |     return QSBR_LEQ(goal, rd_seq); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-02-16 15:25:19 -05:00
										 |  |  | // Advance the write sequence and return the new goal. This should be called
 | 
					
						
							|  |  |  | // after data is removed. The returned goal is used with `_Py_qsbr_poll()` to
 | 
					
						
							|  |  |  | // determine when it is safe to reclaim (free) the memory.
 | 
					
						
							|  |  |  | extern uint64_t | 
					
						
							|  |  |  | _Py_qsbr_advance(struct _qsbr_shared *shared); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Batches requests to advance the write sequence. This advances the write
 | 
					
						
							|  |  |  | // sequence every N calls, which reduces overhead but increases time to
 | 
					
						
							|  |  |  | // reclamation. Returns the new goal.
 | 
					
						
							|  |  |  | extern uint64_t | 
					
						
							|  |  |  | _Py_qsbr_deferred_advance(struct _qsbr_thread_state *qsbr); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Have the read sequences advanced to the given goal? If this returns true,
 | 
					
						
							|  |  |  | // it safe to reclaim any memory tagged with the goal (or earlier goal).
 | 
					
						
							|  |  |  | extern bool | 
					
						
							|  |  |  | _Py_qsbr_poll(struct _qsbr_thread_state *qsbr, uint64_t goal); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Called when thread attaches to interpreter
 | 
					
						
							|  |  |  | extern void | 
					
						
							|  |  |  | _Py_qsbr_attach(struct _qsbr_thread_state *qsbr); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Called when thread detaches from interpreter
 | 
					
						
							|  |  |  | extern void | 
					
						
							|  |  |  | _Py_qsbr_detach(struct _qsbr_thread_state *qsbr); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Reserves (allocates) a QSBR state and returns its index.
 | 
					
						
							|  |  |  | extern Py_ssize_t | 
					
						
							|  |  |  | _Py_qsbr_reserve(PyInterpreterState *interp); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Associates a PyThreadState with the QSBR state at the given index
 | 
					
						
							|  |  |  | extern void | 
					
						
							|  |  |  | _Py_qsbr_register(struct _PyThreadStateImpl *tstate, | 
					
						
							|  |  |  |                   PyInterpreterState *interp, Py_ssize_t index); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Disassociates a PyThreadState from the QSBR state and frees the QSBR state.
 | 
					
						
							|  |  |  | extern void | 
					
						
							|  |  |  | _Py_qsbr_unregister(struct _PyThreadStateImpl *tstate); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | extern void | 
					
						
							|  |  |  | _Py_qsbr_fini(PyInterpreterState *interp); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | extern void | 
					
						
							|  |  |  | _Py_qsbr_after_fork(struct _PyThreadStateImpl *tstate); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef __cplusplus
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | #endif   /* !Py_INTERNAL_QSBR_H */
 |