mirror of
				https://github.com/python/cpython.git
				synced 2025-10-23 09:53:47 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			392 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			392 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| #ifndef Py_INTERNAL_OPTIMIZER_H
 | |
| #define Py_INTERNAL_OPTIMIZER_H
 | |
| #ifdef __cplusplus
 | |
| extern "C" {
 | |
| #endif
 | |
| 
 | |
| #ifndef Py_BUILD_CORE
 | |
| #  error "this header requires Py_BUILD_CORE define"
 | |
| #endif
 | |
| 
 | |
| #include "pycore_typedefs.h"      // _PyInterpreterFrame
 | |
| #include "pycore_uop_ids.h"
 | |
| #include "pycore_stackref.h"      // _PyStackRef
 | |
| #include <stdbool.h>
 | |
| 
 | |
| 
 | |
| typedef struct _PyExecutorLinkListNode {
 | |
|     struct _PyExecutorObject *next;
 | |
|     struct _PyExecutorObject *previous;
 | |
| } _PyExecutorLinkListNode;
 | |
| 
 | |
| 
 | |
| /* Bloom filter with m = 256
 | |
|  * https://en.wikipedia.org/wiki/Bloom_filter */
 | |
| #define _Py_BLOOM_FILTER_WORDS 8
 | |
| 
 | |
| typedef struct {
 | |
|     uint32_t bits[_Py_BLOOM_FILTER_WORDS];
 | |
| } _PyBloomFilter;
 | |
| 
 | |
| typedef struct {
 | |
|     uint8_t opcode;
 | |
|     uint8_t oparg;
 | |
|     uint8_t valid:1;
 | |
|     uint8_t linked:1;
 | |
|     uint8_t chain_depth:6;  // Must be big enough for MAX_CHAIN_DEPTH - 1.
 | |
|     bool warm;
 | |
|     int index;           // Index of ENTER_EXECUTOR (if code isn't NULL, below).
 | |
|     _PyBloomFilter bloom;
 | |
|     _PyExecutorLinkListNode links;
 | |
|     PyCodeObject *code;  // Weak (NULL if no corresponding ENTER_EXECUTOR).
 | |
| } _PyVMData;
 | |
| 
 | |
| /* Depending on the format,
 | |
|  * the 32 bits between the oparg and operand are:
 | |
|  * UOP_FORMAT_TARGET:
 | |
|  *    uint32_t target;
 | |
|  * UOP_FORMAT_JUMP
 | |
|  *    uint16_t jump_target;
 | |
|  *    uint16_t error_target;
 | |
|  */
 | |
| typedef struct {
 | |
|     uint16_t opcode:15;
 | |
|     uint16_t format:1;
 | |
|     uint16_t oparg;
 | |
|     union {
 | |
|         uint32_t target;
 | |
|         struct {
 | |
|             uint16_t jump_target;
 | |
|             uint16_t error_target;
 | |
|         };
 | |
|     };
 | |
|     uint64_t operand0;  // A cache entry
 | |
|     uint64_t operand1;
 | |
| #ifdef Py_STATS
 | |
|     uint64_t execution_count;
 | |
| #endif
 | |
| } _PyUOpInstruction;
 | |
| 
 | |
| typedef struct _PyExitData {
 | |
|     uint32_t target;
 | |
|     uint16_t index;
 | |
|     _Py_BackoffCounter temperature;
 | |
|     struct _PyExecutorObject *executor;
 | |
| } _PyExitData;
 | |
| 
 | |
| typedef struct _PyExecutorObject {
 | |
|     PyObject_VAR_HEAD
 | |
|     const _PyUOpInstruction *trace;
 | |
|     _PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */
 | |
|     uint32_t exit_count;
 | |
|     uint32_t code_size;
 | |
|     size_t jit_size;
 | |
|     void *jit_code;
 | |
|     _PyExitData exits[1];
 | |
| } _PyExecutorObject;
 | |
| 
 | |
| /* If pending deletion list gets large enough, then scan,
 | |
|  * and free any executors that aren't executing
 | |
|  * i.e. any that aren't a thread's current_executor. */
 | |
| #define EXECUTOR_DELETE_LIST_MAX 100
 | |
| 
 | |
| // Export for '_opcode' shared extension (JIT compiler).
 | |
| PyAPI_FUNC(_PyExecutorObject*) _Py_GetExecutor(PyCodeObject *code, int offset);
 | |
| 
 | |
| void _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *);
 | |
| void _Py_ExecutorDetach(_PyExecutorObject *);
 | |
| void _Py_BloomFilter_Init(_PyBloomFilter *);
 | |
| void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj);
 | |
| PyAPI_FUNC(void) _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj);
 | |
| 
 | |
| #define _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS 3
 | |
| #define _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS 6
 | |
| 
 | |
| #ifdef _Py_TIER2
 | |
| PyAPI_FUNC(void) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation);
 | |
| PyAPI_FUNC(void) _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation);
 | |
| PyAPI_FUNC(void) _Py_Executors_InvalidateCold(PyInterpreterState *interp);
 | |
| 
 | |
| #else
 | |
| #  define _Py_Executors_InvalidateDependency(A, B, C) ((void)0)
 | |
| #  define _Py_Executors_InvalidateAll(A, B) ((void)0)
 | |
| #  define _Py_Executors_InvalidateCold(A) ((void)0)
 | |
| 
 | |
| #endif
 | |
| 
 | |
| // Used as the threshold to trigger executor invalidation when
 | |
| // trace_run_counter is greater than this value.
 | |
| #define JIT_CLEANUP_THRESHOLD 100000
 | |
| 
 | |
| // This is the length of the trace we project initially.
 | |
| #define UOP_MAX_TRACE_LENGTH 800
 | |
| 
 | |
| #define TRACE_STACK_SIZE 5
 | |
| 
 | |
| int _Py_uop_analyze_and_optimize(_PyInterpreterFrame *frame,
 | |
|     _PyUOpInstruction *trace, int trace_len, int curr_stackentries,
 | |
|     _PyBloomFilter *dependencies);
 | |
| 
 | |
| extern PyTypeObject _PyUOpExecutor_Type;
 | |
| 
 | |
| 
 | |
| #define UOP_FORMAT_TARGET 0
 | |
| #define UOP_FORMAT_JUMP 1
 | |
| 
 | |
| static inline uint32_t uop_get_target(const _PyUOpInstruction *inst)
 | |
| {
 | |
|     assert(inst->format == UOP_FORMAT_TARGET);
 | |
|     return inst->target;
 | |
| }
 | |
| 
 | |
| static inline uint16_t uop_get_jump_target(const _PyUOpInstruction *inst)
 | |
| {
 | |
|     assert(inst->format == UOP_FORMAT_JUMP);
 | |
|     return inst->jump_target;
 | |
| }
 | |
| 
 | |
| static inline uint16_t uop_get_error_target(const _PyUOpInstruction *inst)
 | |
| {
 | |
|     assert(inst->format != UOP_FORMAT_TARGET);
 | |
|     return inst->error_target;
 | |
| }
 | |
| 
 | |
| // Holds locals, stack, locals, stack ... co_consts (in that order)
 | |
| #define MAX_ABSTRACT_INTERP_SIZE 4096
 | |
| 
 | |
| #define TY_ARENA_SIZE (UOP_MAX_TRACE_LENGTH * 5)
 | |
| 
 | |
| // Need extras for root frame and for overflow frame (see TRACE_STACK_PUSH())
 | |
| #define MAX_ABSTRACT_FRAME_DEPTH (TRACE_STACK_SIZE + 2)
 | |
| 
 | |
| // The maximum number of side exits that we can take before requiring forward
 | |
| // progress (and inserting a new ENTER_EXECUTOR instruction). In practice, this
 | |
| // is the "maximum amount of polymorphism" that an isolated trace tree can
 | |
| // handle before rejoining the rest of the program.
 | |
| #define MAX_CHAIN_DEPTH 4
 | |
| 
 | |
| /* Symbols */
 | |
| /* See explanation in optimizer_symbols.c */
 | |
| 
 | |
| 
 | |
| typedef enum _JitSymType {
 | |
|     JIT_SYM_UNKNOWN_TAG = 1,
 | |
|     JIT_SYM_NULL_TAG = 2,
 | |
|     JIT_SYM_NON_NULL_TAG = 3,
 | |
|     JIT_SYM_BOTTOM_TAG = 4,
 | |
|     JIT_SYM_TYPE_VERSION_TAG = 5,
 | |
|     JIT_SYM_KNOWN_CLASS_TAG = 6,
 | |
|     JIT_SYM_KNOWN_VALUE_TAG = 7,
 | |
|     JIT_SYM_TUPLE_TAG = 8,
 | |
|     JIT_SYM_TRUTHINESS_TAG = 9,
 | |
|     JIT_SYM_COMPACT_INT = 10,
 | |
| } JitSymType;
 | |
| 
 | |
| typedef struct _jit_opt_known_class {
 | |
|     uint8_t tag;
 | |
|     uint32_t version;
 | |
|     PyTypeObject *type;
 | |
| } JitOptKnownClass;
 | |
| 
 | |
| typedef struct _jit_opt_known_version {
 | |
|     uint8_t tag;
 | |
|     uint32_t version;
 | |
| } JitOptKnownVersion;
 | |
| 
 | |
| typedef struct _jit_opt_known_value {
 | |
|     uint8_t tag;
 | |
|     PyObject *value;
 | |
| } JitOptKnownValue;
 | |
| 
 | |
| #define MAX_SYMBOLIC_TUPLE_SIZE 7
 | |
| 
 | |
| typedef struct _jit_opt_tuple {
 | |
|     uint8_t tag;
 | |
|     uint8_t length;
 | |
|     uint16_t items[MAX_SYMBOLIC_TUPLE_SIZE];
 | |
| } JitOptTuple;
 | |
| 
 | |
| typedef struct {
 | |
|     uint8_t tag;
 | |
|     bool invert;
 | |
|     uint16_t value;
 | |
| } JitOptTruthiness;
 | |
| 
 | |
| typedef struct {
 | |
|     uint8_t tag;
 | |
| } JitOptCompactInt;
 | |
| 
 | |
| typedef union _jit_opt_symbol {
 | |
|     uint8_t tag;
 | |
|     JitOptKnownClass cls;
 | |
|     JitOptKnownValue value;
 | |
|     JitOptKnownVersion version;
 | |
|     JitOptTuple tuple;
 | |
|     JitOptTruthiness truthiness;
 | |
|     JitOptCompactInt compact;
 | |
| } JitOptSymbol;
 | |
| 
 | |
| 
 | |
| // This mimics the _PyStackRef API
 | |
| typedef union {
 | |
|     uintptr_t bits;
 | |
| } JitOptRef;
 | |
| 
 | |
| #define REF_IS_BORROWED 1
 | |
| 
 | |
| #define JIT_BITS_TO_PTR_MASKED(REF) ((JitOptSymbol *)(((REF).bits) & (~REF_IS_BORROWED)))
 | |
| 
 | |
| static inline JitOptSymbol *
 | |
| PyJitRef_Unwrap(JitOptRef ref)
 | |
| {
 | |
|     return JIT_BITS_TO_PTR_MASKED(ref);
 | |
| }
 | |
| 
 | |
| bool _Py_uop_symbol_is_immortal(JitOptSymbol *sym);
 | |
| 
 | |
| 
 | |
| static inline JitOptRef
 | |
| PyJitRef_Wrap(JitOptSymbol *sym)
 | |
| {
 | |
|     return (JitOptRef){.bits=(uintptr_t)sym};
 | |
| }
 | |
| 
 | |
| static inline JitOptRef
 | |
| PyJitRef_StripReferenceInfo(JitOptRef ref)
 | |
| {
 | |
|     return PyJitRef_Wrap(PyJitRef_Unwrap(ref));
 | |
| }
 | |
| 
 | |
| static inline JitOptRef
 | |
| PyJitRef_Borrow(JitOptRef ref)
 | |
| {
 | |
|     return (JitOptRef){ .bits = ref.bits | REF_IS_BORROWED };
 | |
| }
 | |
| 
 | |
| static const JitOptRef PyJitRef_NULL = {.bits = REF_IS_BORROWED};
 | |
| 
 | |
| static inline bool
 | |
| PyJitRef_IsNull(JitOptRef ref)
 | |
| {
 | |
|     return ref.bits == PyJitRef_NULL.bits;
 | |
| }
 | |
| 
 | |
| static inline int
 | |
| PyJitRef_IsBorrowed(JitOptRef ref)
 | |
| {
 | |
|     return (ref.bits & REF_IS_BORROWED) == REF_IS_BORROWED;
 | |
| }
 | |
| 
 | |
| struct _Py_UOpsAbstractFrame {
 | |
|     // Max stacklen
 | |
|     int stack_len;
 | |
|     int locals_len;
 | |
| 
 | |
|     JitOptRef *stack_pointer;
 | |
|     JitOptRef *stack;
 | |
|     JitOptRef *locals;
 | |
| };
 | |
| 
 | |
| typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame;
 | |
| 
 | |
| typedef struct ty_arena {
 | |
|     int ty_curr_number;
 | |
|     int ty_max_number;
 | |
|     JitOptSymbol arena[TY_ARENA_SIZE];
 | |
| } ty_arena;
 | |
| 
 | |
| typedef struct _JitOptContext {
 | |
|     char done;
 | |
|     char out_of_space;
 | |
|     bool contradiction;
 | |
|     // The current "executing" frame.
 | |
|     _Py_UOpsAbstractFrame *frame;
 | |
|     _Py_UOpsAbstractFrame frames[MAX_ABSTRACT_FRAME_DEPTH];
 | |
|     int curr_frame_depth;
 | |
| 
 | |
|     // Arena for the symbolic types.
 | |
|     ty_arena t_arena;
 | |
| 
 | |
|     JitOptRef *n_consumed;
 | |
|     JitOptRef *limit;
 | |
|     JitOptRef locals_and_stack[MAX_ABSTRACT_INTERP_SIZE];
 | |
| } JitOptContext;
 | |
| 
 | |
| extern bool _Py_uop_sym_is_null(JitOptRef sym);
 | |
| extern bool _Py_uop_sym_is_not_null(JitOptRef sym);
 | |
| extern bool _Py_uop_sym_is_const(JitOptContext *ctx, JitOptRef sym);
 | |
| extern PyObject *_Py_uop_sym_get_const(JitOptContext *ctx, JitOptRef sym);
 | |
| extern JitOptRef _Py_uop_sym_new_unknown(JitOptContext *ctx);
 | |
| extern JitOptRef _Py_uop_sym_new_not_null(JitOptContext *ctx);
 | |
| extern JitOptRef _Py_uop_sym_new_type(
 | |
|     JitOptContext *ctx, PyTypeObject *typ);
 | |
| 
 | |
| extern JitOptRef _Py_uop_sym_new_const(JitOptContext *ctx, PyObject *const_val);
 | |
| extern JitOptRef _Py_uop_sym_new_const_steal(JitOptContext *ctx, PyObject *const_val);
 | |
| bool _Py_uop_sym_is_safe_const(JitOptContext *ctx, JitOptRef sym);
 | |
| _PyStackRef _Py_uop_sym_get_const_as_stackref(JitOptContext *ctx, JitOptRef sym);
 | |
| extern JitOptRef _Py_uop_sym_new_null(JitOptContext *ctx);
 | |
| extern bool _Py_uop_sym_has_type(JitOptRef sym);
 | |
| extern bool _Py_uop_sym_matches_type(JitOptRef sym, PyTypeObject *typ);
 | |
| extern bool _Py_uop_sym_matches_type_version(JitOptRef sym, unsigned int version);
 | |
| extern void _Py_uop_sym_set_null(JitOptContext *ctx, JitOptRef sym);
 | |
| extern void _Py_uop_sym_set_non_null(JitOptContext *ctx, JitOptRef sym);
 | |
| extern void _Py_uop_sym_set_type(JitOptContext *ctx, JitOptRef sym, PyTypeObject *typ);
 | |
| extern bool _Py_uop_sym_set_type_version(JitOptContext *ctx, JitOptRef sym, unsigned int version);
 | |
| extern void _Py_uop_sym_set_const(JitOptContext *ctx, JitOptRef sym, PyObject *const_val);
 | |
| extern bool _Py_uop_sym_is_bottom(JitOptRef sym);
 | |
| extern int _Py_uop_sym_truthiness(JitOptContext *ctx, JitOptRef sym);
 | |
| extern PyTypeObject *_Py_uop_sym_get_type(JitOptRef sym);
 | |
| extern JitOptRef _Py_uop_sym_new_tuple(JitOptContext *ctx, int size, JitOptRef *args);
 | |
| extern JitOptRef _Py_uop_sym_tuple_getitem(JitOptContext *ctx, JitOptRef sym, Py_ssize_t item);
 | |
| extern Py_ssize_t _Py_uop_sym_tuple_length(JitOptRef sym);
 | |
| extern JitOptRef _Py_uop_sym_new_truthiness(JitOptContext *ctx, JitOptRef value, bool truthy);
 | |
| extern bool _Py_uop_sym_is_compact_int(JitOptRef sym);
 | |
| extern JitOptRef _Py_uop_sym_new_compact_int(JitOptContext *ctx);
 | |
| extern void _Py_uop_sym_set_compact_int(JitOptContext *ctx,  JitOptRef sym);
 | |
| 
 | |
| extern void _Py_uop_abstractcontext_init(JitOptContext *ctx);
 | |
| extern void _Py_uop_abstractcontext_fini(JitOptContext *ctx);
 | |
| 
 | |
| extern _Py_UOpsAbstractFrame *_Py_uop_frame_new(
 | |
|     JitOptContext *ctx,
 | |
|     PyCodeObject *co,
 | |
|     int curr_stackentries,
 | |
|     JitOptRef *args,
 | |
|     int arg_len);
 | |
| extern int _Py_uop_frame_pop(JitOptContext *ctx);
 | |
| 
 | |
| PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored);
 | |
| 
 | |
| PyAPI_FUNC(int) _PyOptimizer_Optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *start, _PyExecutorObject **exec_ptr, int chain_depth);
 | |
| 
 | |
| static inline _PyExecutorObject *_PyExecutor_FromExit(_PyExitData *exit)
 | |
| {
 | |
|     _PyExitData *exit0 = exit - exit->index;
 | |
|     return (_PyExecutorObject *)(((char *)exit0) - offsetof(_PyExecutorObject, exits));
 | |
| }
 | |
| 
 | |
| extern _PyExecutorObject *_PyExecutor_GetColdExecutor(void);
 | |
| 
 | |
| PyAPI_FUNC(void) _PyExecutor_ClearExit(_PyExitData *exit);
 | |
| 
 | |
| static inline int is_terminator(const _PyUOpInstruction *uop)
 | |
| {
 | |
|     int opcode = uop->opcode;
 | |
|     return (
 | |
|         opcode == _EXIT_TRACE ||
 | |
|         opcode == _JUMP_TO_TOP
 | |
|     );
 | |
| }
 | |
| 
 | |
| extern void _PyExecutor_Free(_PyExecutorObject *self);
 | |
| 
 | |
| PyAPI_FUNC(int) _PyDumpExecutors(FILE *out);
 | |
| #ifdef _Py_TIER2
 | |
| extern void _Py_ClearExecutorDeletionList(PyInterpreterState *interp);
 | |
| #endif
 | |
| 
 | |
| #ifdef __cplusplus
 | |
| }
 | |
| #endif
 | |
| #endif /* !Py_INTERNAL_OPTIMIZER_H */
 | 
