mirror of
https://github.com/python/cpython.git
synced 2025-11-10 18:42:04 +00:00
This PR is part of PEP 657 and augments the compiler to emit ending line numbers as well as starting and ending columns from the AST into compiled code objects. This allows bytecodes to be correlated to the exact source code ranges that generated them. This information is made available through the following public APIs: * The `co_positions` method on code objects. * The C API function `PyCode_Addr2Location`. Co-authored-by: Batuhan Taskaya <isidentical@gmail.com> Co-authored-by: Ammar Askar <ammar@ammaraskar.com>
353 lines
10 KiB
C
353 lines
10 KiB
C
#ifndef Py_INTERNAL_CODE_H
|
|
#define Py_INTERNAL_CODE_H
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
/* Legacy Opcache */
|
|
|
|
typedef struct {
|
|
PyObject *ptr; /* Cached pointer (borrowed reference) */
|
|
uint64_t globals_ver; /* ma_version of global dict */
|
|
uint64_t builtins_ver; /* ma_version of builtin dict */
|
|
} _PyOpcache_LoadGlobal;
|
|
|
|
typedef struct {
|
|
PyTypeObject *type;
|
|
Py_ssize_t hint;
|
|
unsigned int tp_version_tag;
|
|
} _PyOpCodeOpt_LoadAttr;
|
|
|
|
struct _PyOpcache {
|
|
union {
|
|
_PyOpcache_LoadGlobal lg;
|
|
_PyOpCodeOpt_LoadAttr la;
|
|
} u;
|
|
char optimized;
|
|
};
|
|
|
|
|
|
/* PEP 659
|
|
* Specialization and quickening structs and helper functions
|
|
*/
|
|
|
|
typedef struct {
|
|
int32_t cache_count;
|
|
int32_t _; /* Force 8 byte size */
|
|
} _PyEntryZero;
|
|
|
|
typedef struct {
|
|
uint8_t original_oparg;
|
|
uint8_t counter;
|
|
uint16_t index;
|
|
} _PyAdaptiveEntry;
|
|
|
|
|
|
typedef struct {
|
|
uint32_t tp_version;
|
|
uint32_t dk_version_or_hint;
|
|
} _PyLoadAttrCache;
|
|
|
|
typedef struct {
|
|
uint32_t module_keys_version;
|
|
uint32_t builtin_keys_version;
|
|
} _PyLoadGlobalCache;
|
|
|
|
/* Add specialized versions of entries to this union.
|
|
*
|
|
* Do not break the invariant: sizeof(SpecializedCacheEntry) == 8
|
|
* Preserving this invariant is necessary because:
|
|
- If any one form uses more space, then all must and on 64 bit machines
|
|
this is likely to double the memory consumption of caches
|
|
- The function for calculating the offset of caches assumes a 4:1
|
|
cache:instruction size ratio. Changing that would need careful
|
|
analysis to choose a new function.
|
|
*/
|
|
typedef union {
|
|
_PyEntryZero zero;
|
|
_PyAdaptiveEntry adaptive;
|
|
_PyLoadAttrCache load_attr;
|
|
_PyLoadGlobalCache load_global;
|
|
} SpecializedCacheEntry;
|
|
|
|
#define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT))
|
|
|
|
/* Maximum size of code to quicken, in code units. */
|
|
#define MAX_SIZE_TO_QUICKEN 5000
|
|
|
|
typedef union _cache_or_instruction {
|
|
_Py_CODEUNIT code[1];
|
|
SpecializedCacheEntry entry;
|
|
} SpecializedCacheOrInstruction;
|
|
|
|
/* Get pointer to the nth cache entry, from the first instruction and n.
|
|
* Cache entries are indexed backwards, with [count-1] first in memory, and [0] last.
|
|
* The zeroth entry immediately precedes the instructions.
|
|
*/
|
|
static inline SpecializedCacheEntry *
|
|
_GetSpecializedCacheEntry(const _Py_CODEUNIT *first_instr, Py_ssize_t n)
|
|
{
|
|
SpecializedCacheOrInstruction *last_cache_plus_one = (SpecializedCacheOrInstruction *)first_instr;
|
|
assert(&last_cache_plus_one->code[0] == first_instr);
|
|
return &last_cache_plus_one[-1-n].entry;
|
|
}
|
|
|
|
/* Following two functions form a pair.
|
|
*
|
|
* oparg_from_offset_and_index() is used to compute the oparg
|
|
* when quickening, so that offset_from_oparg_and_nexti()
|
|
* can be used at runtime to compute the offset.
|
|
*
|
|
* The relationship between the three values is currently
|
|
* offset == (index>>1) + oparg
|
|
* This relation is chosen based on the following observations:
|
|
* 1. typically 1 in 4 instructions need a cache
|
|
* 2. instructions that need a cache typically use 2 entries
|
|
* These observations imply: offset ≈ index/2
|
|
* We use the oparg to fine tune the relation to avoid wasting space
|
|
* and allow consecutive instructions to use caches.
|
|
*
|
|
* If the number of cache entries < number of instructions/2 we will waste
|
|
* some small amoount of space.
|
|
* If the number of cache entries > (number of instructions/2) + 255, then
|
|
* some instructions will not be able to use a cache.
|
|
* In practice, we expect some small amount of wasted space in a shorter functions
|
|
* and only functions exceeding a 1000 lines or more not to have enugh cache space.
|
|
*
|
|
*/
|
|
static inline int
|
|
oparg_from_offset_and_nexti(int offset, int nexti)
|
|
{
|
|
return offset-(nexti>>1);
|
|
}
|
|
|
|
static inline int
|
|
offset_from_oparg_and_nexti(int oparg, int nexti)
|
|
{
|
|
return (nexti>>1)+oparg;
|
|
}
|
|
|
|
/* Get pointer to the cache entry associated with an instruction.
|
|
* nexti is the index of the instruction plus one.
|
|
* nexti is used as it corresponds to the instruction pointer in the interpreter.
|
|
* This doesn't check that an entry has been allocated for that instruction. */
|
|
static inline SpecializedCacheEntry *
|
|
_GetSpecializedCacheEntryForInstruction(const _Py_CODEUNIT *first_instr, int nexti, int oparg)
|
|
{
|
|
return _GetSpecializedCacheEntry(
|
|
first_instr,
|
|
offset_from_oparg_and_nexti(oparg, nexti)
|
|
);
|
|
}
|
|
|
|
#define QUICKENING_WARMUP_DELAY 8
|
|
|
|
/* We want to compare to zero for efficiency, so we offset values accordingly */
|
|
#define QUICKENING_INITIAL_WARMUP_VALUE (-QUICKENING_WARMUP_DELAY)
|
|
#define QUICKENING_WARMUP_COLDEST 1
|
|
|
|
static inline void
|
|
PyCodeObject_IncrementWarmup(PyCodeObject * co)
|
|
{
|
|
co->co_warmup++;
|
|
}
|
|
|
|
/* Used by the interpreter to determine when a code object should be quickened */
|
|
static inline int
|
|
PyCodeObject_IsWarmedUp(PyCodeObject * co)
|
|
{
|
|
return (co->co_warmup == 0);
|
|
}
|
|
|
|
int _Py_Quicken(PyCodeObject *code);
|
|
|
|
extern Py_ssize_t _Py_QuickenedCount;
|
|
|
|
|
|
/* "Locals plus" for a code object is the set of locals + cell vars +
|
|
* free vars. This relates to variable names as well as offsets into
|
|
* the "fast locals" storage array of execution frames. The compiler
|
|
* builds the list of names, their offsets, and the corresponding
|
|
* kind of local.
|
|
*
|
|
* Those kinds represent the source of the initial value and the
|
|
* variable's scope (as related to closures). A "local" is an
|
|
* argument or other variable defined in the current scope. A "free"
|
|
* variable is one that is defined in an outer scope and comes from
|
|
* the function's closure. A "cell" variable is a local that escapes
|
|
* into an inner function as part of a closure, and thus must be
|
|
* wrapped in a cell. Any "local" can also be a "cell", but the
|
|
* "free" kind is mutually exclusive with both.
|
|
*/
|
|
|
|
// Note that these all fit within a byte, as do combinations.
|
|
// Later, we will use the smaller numbers to differentiate the different
|
|
// kinds of locals (e.g. pos-only arg, varkwargs, local-only).
|
|
#define CO_FAST_LOCAL 0x20
|
|
#define CO_FAST_CELL 0x40
|
|
#define CO_FAST_FREE 0x80
|
|
|
|
typedef unsigned char _PyLocals_Kind;
|
|
|
|
static inline _PyLocals_Kind
|
|
_PyLocals_GetKind(PyObject *kinds, int i)
|
|
{
|
|
assert(PyBytes_Check(kinds));
|
|
assert(0 <= i && i < PyBytes_GET_SIZE(kinds));
|
|
char *ptr = PyBytes_AS_STRING(kinds);
|
|
return (_PyLocals_Kind)(ptr[i]);
|
|
}
|
|
|
|
static inline void
|
|
_PyLocals_SetKind(PyObject *kinds, int i, _PyLocals_Kind kind)
|
|
{
|
|
assert(PyBytes_Check(kinds));
|
|
assert(0 <= i && i < PyBytes_GET_SIZE(kinds));
|
|
char *ptr = PyBytes_AS_STRING(kinds);
|
|
ptr[i] = (char) kind;
|
|
}
|
|
|
|
|
|
struct _PyCodeConstructor {
|
|
/* metadata */
|
|
PyObject *filename;
|
|
PyObject *name;
|
|
int flags;
|
|
|
|
/* the code */
|
|
PyObject *code;
|
|
int firstlineno;
|
|
PyObject *linetable;
|
|
PyObject *endlinetable;
|
|
PyObject *columntable;
|
|
|
|
/* used by the code */
|
|
PyObject *consts;
|
|
PyObject *names;
|
|
|
|
/* mapping frame offsets to information */
|
|
PyObject *localsplusnames; // Tuple of strings
|
|
PyObject *localspluskinds; // Bytes object, one byte per variable
|
|
|
|
/* args (within varnames) */
|
|
int argcount;
|
|
int posonlyargcount;
|
|
// XXX Replace argcount with posorkwargcount (argcount - posonlyargcount).
|
|
int kwonlyargcount;
|
|
|
|
/* needed to create the frame */
|
|
int stacksize;
|
|
|
|
/* used by the eval loop */
|
|
PyObject *exceptiontable;
|
|
};
|
|
|
|
// Using an "arguments struct" like this is helpful for maintainability
|
|
// in a case such as this with many parameters. It does bear a risk:
|
|
// if the struct changes and callers are not updated properly then the
|
|
// compiler will not catch problems (like a missing argument). This can
|
|
// cause hard-to-debug problems. The risk is mitigated by the use of
|
|
// check_code() in codeobject.c. However, we may decide to switch
|
|
// back to a regular function signature. Regardless, this approach
|
|
// wouldn't be appropriate if this weren't a strictly internal API.
|
|
// (See the comments in https://github.com/python/cpython/pull/26258.)
|
|
PyAPI_FUNC(int) _PyCode_Validate(struct _PyCodeConstructor *);
|
|
PyAPI_FUNC(PyCodeObject *) _PyCode_New(struct _PyCodeConstructor *);
|
|
|
|
|
|
/* Private API */
|
|
|
|
/* Getters for internal PyCodeObject data. */
|
|
PyAPI_FUNC(PyObject *) _PyCode_GetVarnames(PyCodeObject *);
|
|
PyAPI_FUNC(PyObject *) _PyCode_GetCellvars(PyCodeObject *);
|
|
PyAPI_FUNC(PyObject *) _PyCode_GetFreevars(PyCodeObject *);
|
|
|
|
|
|
/* Cache hits and misses */
|
|
|
|
static inline uint8_t
|
|
saturating_increment(uint8_t c)
|
|
{
|
|
return c<<1;
|
|
}
|
|
|
|
static inline uint8_t
|
|
saturating_decrement(uint8_t c)
|
|
{
|
|
return (c>>1) + 128;
|
|
}
|
|
|
|
static inline uint8_t
|
|
saturating_zero(void)
|
|
{
|
|
return 255;
|
|
}
|
|
|
|
/* Starting value for saturating counter.
|
|
* Technically this should be 1, but that is likely to
|
|
* cause a bit of thrashing when we optimize then get an immediate miss.
|
|
* We want to give the counter a change to stabilize, so we start at 3.
|
|
*/
|
|
static inline uint8_t
|
|
saturating_start(void)
|
|
{
|
|
return saturating_zero()<<3;
|
|
}
|
|
|
|
static inline void
|
|
record_cache_hit(_PyAdaptiveEntry *entry) {
|
|
entry->counter = saturating_increment(entry->counter);
|
|
}
|
|
|
|
static inline void
|
|
record_cache_miss(_PyAdaptiveEntry *entry) {
|
|
entry->counter = saturating_decrement(entry->counter);
|
|
}
|
|
|
|
static inline int
|
|
too_many_cache_misses(_PyAdaptiveEntry *entry) {
|
|
return entry->counter == saturating_zero();
|
|
}
|
|
|
|
#define BACKOFF 64
|
|
|
|
static inline void
|
|
cache_backoff(_PyAdaptiveEntry *entry) {
|
|
entry->counter = BACKOFF;
|
|
}
|
|
|
|
/* Specialization functions */
|
|
|
|
int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
|
|
int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache);
|
|
|
|
#define SPECIALIZATION_STATS 0
|
|
#define SPECIALIZATION_STATS_DETAILED 0
|
|
|
|
#if SPECIALIZATION_STATS
|
|
|
|
typedef struct _stats {
|
|
uint64_t specialization_success;
|
|
uint64_t specialization_failure;
|
|
uint64_t hit;
|
|
uint64_t deferred;
|
|
uint64_t miss;
|
|
uint64_t deopt;
|
|
uint64_t unquickened;
|
|
#if SPECIALIZATION_STATS_DETAILED
|
|
PyObject *miss_types;
|
|
#endif
|
|
} SpecializationStats;
|
|
|
|
extern SpecializationStats _specialization_stats[256];
|
|
#define STAT_INC(opname, name) _specialization_stats[opname].name++
|
|
void _Py_PrintSpecializationStats(void);
|
|
#else
|
|
#define STAT_INC(opname, name) ((void)0)
|
|
#endif
|
|
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
#endif /* !Py_INTERNAL_CODE_H */
|