cpython/Include/cpython/pystats.h
Mark Shannon 469f191a85
GH-135379: Top of stack caching for the JIT. (GH-135465)
Uses three registers to cache values at the top of the evaluation stack
This significantly reduces memory traffic for smaller, more common uops.
2025-12-11 10:32:52 +00:00

235 lines
7.3 KiB
C

// Statistics on Python performance.
//
// API:
//
// - _Py_INCREF_STAT_INC() and _Py_DECREF_STAT_INC() used by Py_INCREF()
// and Py_DECREF().
// - _PyStats_GET()
//
// Functions of the sys module:
//
// - sys._stats_on()
// - sys._stats_off()
// - sys._stats_clear()
// - sys._stats_dump()
//
// Python must be built with ./configure --enable-pystats to define the
// _PyStats_GET() macro.
//
// Define _PY_INTERPRETER macro to increment interpreter_increfs and
// interpreter_decrefs. Otherwise, increment increfs and decrefs.
//
// The number of incref operations counted by `incref` and
// `interpreter_incref` is the number of increment operations, which is
// not equal to the total of all reference counts. A single increment
// operation may increase the reference count of an object by more than
// one. For example, see `_Py_RefcntAdd`.
#ifndef Py_CPYTHON_PYSTATS_H
# error "this header file must not be included directly"
#endif
#define PYSTATS_MAX_UOP_ID 2000
#define SPECIALIZATION_FAILURE_KINDS 60
/* Stats for determining who is calling PyEval_EvalFrame */
#define EVAL_CALL_TOTAL 0
#define EVAL_CALL_VECTOR 1
#define EVAL_CALL_GENERATOR 2
#define EVAL_CALL_LEGACY 3
#define EVAL_CALL_FUNCTION_VECTORCALL 4
#define EVAL_CALL_BUILD_CLASS 5
#define EVAL_CALL_SLOT 6
#define EVAL_CALL_FUNCTION_EX 7
#define EVAL_CALL_API 8
#define EVAL_CALL_METHOD 9
#define EVAL_CALL_KINDS 10
typedef struct _specialization_stats {
uint64_t success;
uint64_t failure;
uint64_t hit;
uint64_t deferred;
uint64_t miss;
uint64_t deopt;
uint64_t failure_kinds[SPECIALIZATION_FAILURE_KINDS];
} SpecializationStats;
typedef struct _opcode_stats {
SpecializationStats specialization;
uint64_t execution_count;
uint64_t pair_count[256];
} OpcodeStats;
typedef struct _call_stats {
uint64_t inlined_py_calls;
uint64_t pyeval_calls;
uint64_t frames_pushed;
uint64_t frame_objects_created;
uint64_t eval_calls[EVAL_CALL_KINDS];
} CallStats;
typedef struct _object_stats {
uint64_t increfs;
uint64_t decrefs;
uint64_t interpreter_increfs;
uint64_t interpreter_decrefs;
uint64_t immortal_increfs;
uint64_t immortal_decrefs;
uint64_t interpreter_immortal_increfs;
uint64_t interpreter_immortal_decrefs;
uint64_t allocations;
uint64_t allocations512;
uint64_t allocations4k;
uint64_t allocations_big;
uint64_t frees;
uint64_t to_freelist;
uint64_t from_freelist;
uint64_t inline_values;
uint64_t dict_materialized_on_request;
uint64_t dict_materialized_new_key;
uint64_t dict_materialized_too_big;
uint64_t dict_materialized_str_subclass;
uint64_t type_cache_hits;
uint64_t type_cache_misses;
uint64_t type_cache_dunder_hits;
uint64_t type_cache_dunder_misses;
uint64_t type_cache_collisions;
/* Temporary value used during GC */
uint64_t object_visits;
} ObjectStats;
typedef struct _gc_stats {
uint64_t collections;
uint64_t object_visits;
uint64_t objects_collected;
uint64_t objects_transitively_reachable;
uint64_t objects_not_transitively_reachable;
} GCStats;
#ifdef Py_GIL_DISABLED
// stats specific to free-threaded build
typedef struct _ft_stats {
// number of times interpreter had to spin or park when trying to acquire a mutex
uint64_t mutex_sleeps;
// number of times that the QSBR mechanism polled (compute read sequence value)
uint64_t qsbr_polls;
// number of times stop-the-world mechanism was used
uint64_t world_stops;
} FTStats;
#endif
typedef struct _uop_stats {
uint64_t execution_count;
uint64_t miss;
uint64_t pair_count[PYSTATS_MAX_UOP_ID + 1];
} UOpStats;
#define _Py_UOP_HIST_SIZE 32
typedef struct _optimization_stats {
uint64_t attempts;
uint64_t traces_created;
uint64_t traces_executed;
uint64_t uops_executed;
uint64_t trace_stack_overflow;
uint64_t trace_stack_underflow;
uint64_t trace_too_long;
uint64_t trace_too_short;
uint64_t inner_loop;
uint64_t recursive_call;
uint64_t low_confidence;
uint64_t unknown_callee;
uint64_t trace_immediately_deopts;
uint64_t executors_invalidated;
UOpStats opcode[PYSTATS_MAX_UOP_ID + 1];
uint64_t unsupported_opcode[256];
uint64_t trace_length_hist[_Py_UOP_HIST_SIZE];
uint64_t trace_run_length_hist[_Py_UOP_HIST_SIZE];
uint64_t optimized_trace_length_hist[_Py_UOP_HIST_SIZE];
uint64_t optimizer_attempts;
uint64_t optimizer_successes;
uint64_t optimizer_contradiction;
uint64_t optimizer_frame_overflow;
uint64_t optimizer_failure_reason_no_memory;
uint64_t remove_globals_builtins_changed;
uint64_t remove_globals_incorrect_keys;
uint64_t error_in_opcode[PYSTATS_MAX_UOP_ID + 1];
// JIT memory stats
uint64_t jit_total_memory_size;
uint64_t jit_code_size;
uint64_t jit_trampoline_size;
uint64_t jit_data_size;
uint64_t jit_padding_size;
uint64_t jit_freed_memory_size;
uint64_t trace_total_memory_hist[_Py_UOP_HIST_SIZE];
} OptimizationStats;
typedef struct _rare_event_stats {
/* Setting an object's class, obj.__class__ = ... */
uint64_t set_class;
/* Setting the bases of a class, cls.__bases__ = ... */
uint64_t set_bases;
/* Setting the PEP 523 frame eval function, _PyInterpreterState_SetFrameEvalFunc() */
uint64_t set_eval_frame_func;
/* Modifying the builtins, __builtins__.__dict__[var] = ... */
uint64_t builtin_dict;
/* Modifying a function, e.g. func.__defaults__ = ..., etc. */
uint64_t func_modification;
/* Modifying a dict that is being watched */
uint64_t watched_dict_modification;
uint64_t watched_globals_modification;
} RareEventStats;
typedef struct _stats {
OpcodeStats opcode_stats[256];
CallStats call_stats;
ObjectStats object_stats;
OptimizationStats optimization_stats;
#ifdef Py_GIL_DISABLED
FTStats ft_stats;
#endif
RareEventStats rare_event_stats;
GCStats gc_stats[3]; // must match NUM_GENERATIONS
} PyStats;
// Export for most shared extensions
PyAPI_FUNC(PyStats *) _PyStats_GetLocal(void);
#if defined(HAVE_THREAD_LOCAL) && !defined(Py_BUILD_CORE_MODULE)
// use inline function version defined in cpython/pystate.h
static inline PyStats *_PyThreadState_GetStatsFast(void);
#define _PyStats_GET _PyThreadState_GetStatsFast
#else
#define _PyStats_GET _PyStats_GetLocal
#endif
#define _Py_STATS_EXPR(expr) \
do { \
PyStats *s = _PyStats_GET(); \
if (s != NULL) { \
s->expr; \
} \
} while (0)
#define _Py_STATS_COND_EXPR(cond, expr) \
do { \
PyStats *s = _PyStats_GET(); \
if (s != NULL && (cond)) { \
s->expr; \
} \
} while (0)
#ifdef _PY_INTERPRETER
# define _Py_INCREF_STAT_INC() _Py_STATS_EXPR(object_stats.interpreter_increfs++)
# define _Py_DECREF_STAT_INC() _Py_STATS_EXPR(object_stats.interpreter_decrefs++)
# define _Py_INCREF_IMMORTAL_STAT_INC() _Py_STATS_EXPR(object_stats.interpreter_immortal_increfs++)
# define _Py_DECREF_IMMORTAL_STAT_INC() _Py_STATS_EXPR(object_stats.interpreter_immortal_decrefs++)
#else
# define _Py_INCREF_STAT_INC() _Py_STATS_EXPR(object_stats.increfs++)
# define _Py_DECREF_STAT_INC() _Py_STATS_EXPR(object_stats.decrefs++)
# define _Py_INCREF_IMMORTAL_STAT_INC() _Py_STATS_EXPR(object_stats.immortal_increfs++)
# define _Py_DECREF_IMMORTAL_STAT_INC() _Py_STATS_EXPR(object_stats.immortal_decrefs++)
#endif