cpython/Modules/_remote_debugging/code_objects.c
Pablo Galindo Salgado 661df25692
gh-149584: Fix excessive overhead in the Tachyon profiler regarding the cache behavior (#149649)
Use exact remote reads for interpreter state, thread state, and
interpreter frame structs instead of pulling full remote pages into the
profiler page cache. This matches the core change from
python/cpython#149585.

The profiler clears the page cache between samples, so live entries are
always packed at the front. Track the live count and only clear/search
that prefix instead of scanning all 1024 slots on the hot path.

Use the frame cache to predict the next thread state and top frame
address, then batch interpreter/thread/frame reads with process_vm_readv
when profiling a Linux target. Reuse prefetched frame buffers in the
frame walker when the prediction is valid.

Cache the last FrameInfo tuple per code object/instruction offset, reuse
cached thread id objects, and append cached parent frames directly on
full frame-cache hits. This cuts Python allocation churn in the
steady-state profiler path.
2026-05-20 04:32:08 -07:00

553 lines
19 KiB
C

/******************************************************************************
* Remote Debugging Module - Code Object Functions
*
* This file contains functions for parsing code objects and line tables
* from remote process memory.
******************************************************************************/
#include "_remote_debugging.h"
/* ============================================================================
* TLBC CACHING FUNCTIONS (Py_GIL_DISABLED only)
* ============================================================================ */
#ifdef Py_GIL_DISABLED
void
tlbc_cache_entry_destroy(void *ptr)
{
TLBCCacheEntry *entry = (TLBCCacheEntry *)ptr;
if (entry->tlbc_array) {
PyMem_RawFree(entry->tlbc_array);
}
PyMem_RawFree(entry);
}
TLBCCacheEntry *
get_tlbc_cache_entry(RemoteUnwinderObject *self, uintptr_t code_addr, uint32_t current_generation)
{
void *key = (void *)code_addr;
TLBCCacheEntry *entry = _Py_hashtable_get(self->tlbc_cache, key);
if (entry && entry->generation != current_generation) {
// Entry is stale, remove it by setting to NULL
_Py_hashtable_set(self->tlbc_cache, key, NULL);
entry = NULL;
}
return entry;
}
int
cache_tlbc_array(RemoteUnwinderObject *unwinder, uintptr_t code_addr, uintptr_t tlbc_array_addr, uint32_t generation)
{
uintptr_t tlbc_array_ptr;
void *tlbc_array = NULL;
TLBCCacheEntry *entry = NULL;
// Read the TLBC array pointer
if (read_ptr(unwinder, tlbc_array_addr, &tlbc_array_ptr) != 0) {
PyErr_SetString(PyExc_RuntimeError, "Failed to read TLBC array pointer");
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read TLBC array pointer");
return 0; // Read error
}
// Validate TLBC array pointer
if (tlbc_array_ptr == 0) {
PyErr_SetString(PyExc_RuntimeError, "TLBC array pointer is NULL");
return 0; // No TLBC array
}
// Read the TLBC array size
Py_ssize_t tlbc_size;
if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, tlbc_array_ptr, sizeof(tlbc_size), &tlbc_size) != 0) {
PyErr_SetString(PyExc_RuntimeError, "Failed to read TLBC array size");
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read TLBC array size");
return 0; // Read error
}
// Validate TLBC array size
if (tlbc_size <= 0) {
PyErr_SetString(PyExc_RuntimeError, "Invalid TLBC array size");
return 0; // Invalid size
}
if (tlbc_size > MAX_TLBC_SIZE) {
PyErr_SetString(PyExc_RuntimeError, "TLBC array size exceeds maximum limit");
return 0; // Invalid size
}
assert(tlbc_size > 0 && tlbc_size <= MAX_TLBC_SIZE);
// Allocate and read the entire TLBC array
size_t array_data_size = tlbc_size * sizeof(void*);
tlbc_array = PyMem_RawMalloc(sizeof(Py_ssize_t) + array_data_size);
if (!tlbc_array) {
PyErr_NoMemory();
set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate TLBC array");
return 0; // Memory error
}
if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, tlbc_array_ptr, sizeof(Py_ssize_t) + array_data_size, tlbc_array) != 0) {
PyMem_RawFree(tlbc_array);
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read TLBC array data");
return 0; // Read error
}
// Create cache entry
entry = PyMem_RawMalloc(sizeof(TLBCCacheEntry));
if (!entry) {
PyErr_NoMemory();
PyMem_RawFree(tlbc_array);
set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate TLBC cache entry");
return 0; // Memory error
}
entry->tlbc_array = tlbc_array;
entry->tlbc_array_size = tlbc_size;
entry->generation = generation;
// Store in cache
void *key = (void *)code_addr;
if (_Py_hashtable_set(unwinder->tlbc_cache, key, entry) < 0) {
tlbc_cache_entry_destroy(entry);
PyErr_NoMemory();
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to store TLBC entry in cache");
return 0; // Cache error
}
return 1; // Success
}
#endif
/* ============================================================================
* LINE TABLE PARSING FUNCTIONS
* ============================================================================ */
// Inline helper for bounds-checked byte reading (no function call overhead)
static inline int
read_byte(const uint8_t **ptr, const uint8_t *end, uint8_t *out)
{
if (*ptr >= end) {
return -1;
}
*out = *(*ptr)++;
return 0;
}
static int
scan_varint(const uint8_t **ptr, const uint8_t *end)
{
uint8_t read;
if (read_byte(ptr, end, &read) < 0) {
return -1;
}
unsigned int val = read & 63;
unsigned int shift = 0;
while (read & 64) {
if (read_byte(ptr, end, &read) < 0) {
return -1;
}
shift += 6;
// Prevent infinite loop on malformed data (shift overflow)
if (shift > 28) {
return -1;
}
val |= (read & 63) << shift;
}
return (int)val;
}
static int
scan_signed_varint(const uint8_t **ptr, const uint8_t *end)
{
int uval = scan_varint(ptr, end);
if (uval < 0) {
return INT_MIN; // Error sentinel (valid signed varints won't be INT_MIN)
}
if (uval & 1) {
return -(int)((unsigned int)uval >> 1);
}
else {
return (int)((unsigned int)uval >> 1);
}
}
bool
parse_linetable(const uintptr_t addrq, const char* linetable, Py_ssize_t linetable_size,
int firstlineno, LocationInfo* info)
{
// Reject garbage: zero or negative size
if (linetable_size <= 0) {
return false;
}
const uint8_t* ptr = (const uint8_t*)(linetable);
const uint8_t* end = ptr + linetable_size;
uintptr_t addr = 0;
int computed_line = firstlineno; // Running accumulator, separate from output
int val; // Temporary for varint results
uint8_t byte; // Temporary for byte reads
const size_t MAX_LINETABLE_ENTRIES = 65536;
size_t entry_count = 0;
while (ptr < end && *ptr != '\0' && entry_count < MAX_LINETABLE_ENTRIES) {
entry_count++;
uint8_t first_byte = *(ptr++);
uint8_t code = (first_byte >> 3) & 15;
size_t length = (first_byte & 7) + 1;
uintptr_t end_addr = addr + length;
switch (code) {
case PY_CODE_LOCATION_INFO_NONE:
info->lineno = info->end_lineno = -1;
info->column = info->end_column = -1;
break;
case PY_CODE_LOCATION_INFO_LONG:
val = scan_signed_varint(&ptr, end);
if (val == INT_MIN) {
return false;
}
computed_line += val;
info->lineno = computed_line;
val = scan_varint(&ptr, end);
if (val < 0) {
return false;
}
info->end_lineno = computed_line + val;
val = scan_varint(&ptr, end);
if (val < 0) {
return false;
}
info->column = val - 1;
val = scan_varint(&ptr, end);
if (val < 0) {
return false;
}
info->end_column = val - 1;
break;
case PY_CODE_LOCATION_INFO_NO_COLUMNS:
val = scan_signed_varint(&ptr, end);
if (val == INT_MIN) {
return false;
}
computed_line += val;
info->lineno = info->end_lineno = computed_line;
info->column = info->end_column = -1;
break;
case PY_CODE_LOCATION_INFO_ONE_LINE0:
case PY_CODE_LOCATION_INFO_ONE_LINE1:
case PY_CODE_LOCATION_INFO_ONE_LINE2:
computed_line += code - 10;
info->lineno = info->end_lineno = computed_line;
if (read_byte(&ptr, end, &byte) < 0) {
return false;
}
info->column = byte;
if (read_byte(&ptr, end, &byte) < 0) {
return false;
}
info->end_column = byte;
break;
default: {
if (read_byte(&ptr, end, &byte) < 0) {
return false;
}
if ((byte & 128) != 0) {
return false;
}
info->lineno = info->end_lineno = computed_line;
info->column = code << 3 | (byte >> 4);
info->end_column = info->column + (byte & 15);
break;
}
}
if (addr <= addrq && end_addr > addrq) {
return true;
}
addr = end_addr;
}
return false;
}
/* ============================================================================
* CODE OBJECT AND FRAME INFO FUNCTIONS
* ============================================================================ */
PyObject *
make_location_info(RemoteUnwinderObject *unwinder, int lineno, int end_lineno,
int col_offset, int end_col_offset)
{
RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder);
PyObject *info = PyStructSequence_New(state->LocationInfo_Type);
if (info == NULL) {
set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create LocationInfo");
return NULL;
}
PyObject *py_lineno = PyLong_FromLong(lineno);
if (py_lineno == NULL) {
Py_DECREF(info);
return NULL;
}
PyStructSequence_SetItem(info, 0, py_lineno); // steals reference
PyObject *py_end_lineno = PyLong_FromLong(end_lineno);
if (py_end_lineno == NULL) {
Py_DECREF(info);
return NULL;
}
PyStructSequence_SetItem(info, 1, py_end_lineno); // steals reference
PyObject *py_col_offset = PyLong_FromLong(col_offset);
if (py_col_offset == NULL) {
Py_DECREF(info);
return NULL;
}
PyStructSequence_SetItem(info, 2, py_col_offset); // steals reference
PyObject *py_end_col_offset = PyLong_FromLong(end_col_offset);
if (py_end_col_offset == NULL) {
Py_DECREF(info);
return NULL;
}
PyStructSequence_SetItem(info, 3, py_end_col_offset); // steals reference
return info;
}
PyObject *
make_frame_info(RemoteUnwinderObject *unwinder, PyObject *file, PyObject *location,
PyObject *func, PyObject *opcode)
{
RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder);
PyObject *info = PyStructSequence_New(state->FrameInfo_Type);
if (info == NULL) {
set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create FrameInfo");
return NULL;
}
Py_INCREF(file);
Py_INCREF(location);
Py_INCREF(func);
Py_INCREF(opcode);
PyStructSequence_SetItem(info, 0, file);
PyStructSequence_SetItem(info, 1, location);
PyStructSequence_SetItem(info, 2, func);
PyStructSequence_SetItem(info, 3, opcode);
return info;
}
int
parse_code_object(RemoteUnwinderObject *unwinder,
PyObject **result,
const CodeObjectContext *ctx)
{
void *key = (void *)ctx->code_addr;
CachedCodeMetadata *meta = NULL;
PyObject *func = NULL;
PyObject *file = NULL;
PyObject *linetable = NULL;
#ifdef Py_GIL_DISABLED
// In free threading builds, code object addresses might have the low bit set
// as a flag, so we need to mask it off to get the real address
uintptr_t real_address = ctx->code_addr & (~1);
#else
uintptr_t real_address = ctx->code_addr;
#endif
if (unwinder && unwinder->code_object_cache != NULL) {
meta = _Py_hashtable_get(unwinder->code_object_cache, key);
if (meta) {
STATS_INC(unwinder, code_object_cache_hits);
} else {
STATS_INC(unwinder, code_object_cache_misses);
}
}
if (meta == NULL) {
char code_object[SIZEOF_CODE_OBJ];
if (_Py_RemoteDebug_PagedReadRemoteMemory(
&unwinder->handle, real_address, SIZEOF_CODE_OBJ, code_object) < 0)
{
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read code object");
goto error;
}
func = read_py_str(unwinder,
GET_MEMBER(uintptr_t, code_object, unwinder->debug_offsets.code_object.qualname), 1024);
if (!func) {
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read function name from code object");
goto error;
}
file = read_py_str(unwinder,
GET_MEMBER(uintptr_t, code_object, unwinder->debug_offsets.code_object.filename), 1024);
if (!file) {
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read filename from code object");
goto error;
}
linetable = read_py_bytes(unwinder,
GET_MEMBER(uintptr_t, code_object, unwinder->debug_offsets.code_object.linetable), 4096);
if (!linetable) {
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read linetable from code object");
goto error;
}
meta = PyMem_RawMalloc(sizeof(CachedCodeMetadata));
if (!meta) {
PyErr_NoMemory();
set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate cached code metadata");
goto error;
}
meta->func_name = func;
meta->file_name = file;
meta->linetable = linetable;
meta->last_frame_info = NULL;
meta->last_addrq = -1;
meta->first_lineno = GET_MEMBER(int, code_object, unwinder->debug_offsets.code_object.firstlineno);
meta->addr_code_adaptive = real_address + (uintptr_t)unwinder->debug_offsets.code_object.co_code_adaptive;
if (unwinder && unwinder->code_object_cache && _Py_hashtable_set(unwinder->code_object_cache, key, meta) < 0) {
// Ownership of func/file/linetable was transferred to meta,
// so NULL them before destroying meta to prevent double-free
// in the error label's Py_XDECREF calls.
func = NULL;
file = NULL;
linetable = NULL;
cached_code_metadata_destroy(meta);
PyErr_NoMemory();
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to cache code metadata");
goto error;
}
// Ownership transferred to meta
func = NULL;
file = NULL;
linetable = NULL;
}
uintptr_t ip = ctx->instruction_pointer;
ptrdiff_t addrq;
#ifdef Py_GIL_DISABLED
// Handle thread-local bytecode (TLBC) in free threading builds
if (ctx->tlbc_index == 0 || unwinder == NULL || unwinder->debug_offsets.code_object.co_tlbc == 0) {
// No TLBC or no unwinder - use main bytecode directly
addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive;
goto done_tlbc;
}
// Try to get TLBC data from cache (we'll get generation from the caller)
TLBCCacheEntry *tlbc_entry = get_tlbc_cache_entry(unwinder, real_address, unwinder->tlbc_generation);
if (!tlbc_entry) {
// Cache miss - try to read and cache TLBC array
if (!cache_tlbc_array(unwinder, real_address, real_address + unwinder->debug_offsets.code_object.co_tlbc, unwinder->tlbc_generation)) {
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to cache TLBC array");
goto error;
}
tlbc_entry = get_tlbc_cache_entry(unwinder, real_address, unwinder->tlbc_generation);
}
// Validate tlbc_index and check TLBC cache
if (tlbc_entry) {
// Validate index bounds (also catches negative values since tlbc_index is signed)
if (ctx->tlbc_index < 0 || ctx->tlbc_index >= tlbc_entry->tlbc_array_size) {
PyErr_Format(PyExc_RuntimeError,
"Invalid tlbc_index %d (array size %zd, corrupted remote memory)",
ctx->tlbc_index, tlbc_entry->tlbc_array_size);
set_exception_cause(unwinder, PyExc_RuntimeError,
"Invalid tlbc_index (corrupted remote memory)");
goto error;
}
assert(tlbc_entry->tlbc_array_size > 0);
// Use cached TLBC data
uintptr_t *entries = (uintptr_t *)((char *)tlbc_entry->tlbc_array + sizeof(Py_ssize_t));
uintptr_t tlbc_bytecode_addr = entries[ctx->tlbc_index];
if (tlbc_bytecode_addr != 0) {
// Calculate offset from TLBC bytecode
addrq = (uint16_t *)ip - (uint16_t *)tlbc_bytecode_addr;
goto done_tlbc;
}
}
// Fall back to main bytecode (no tlbc_entry or tlbc_bytecode_addr was 0)
addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive;
done_tlbc:
#else
// Non-free-threaded build, always use the main bytecode
addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive;
#endif
; // Empty statement to avoid C23 extension warning
if (!unwinder->opcodes && meta->last_frame_info != NULL && meta->last_addrq == addrq) {
*result = Py_NewRef(meta->last_frame_info);
return 0;
}
LocationInfo info = {0};
bool ok = parse_linetable(addrq, PyBytes_AS_STRING(meta->linetable),
PyBytes_GET_SIZE(meta->linetable),
meta->first_lineno, &info);
if (!ok) {
info.lineno = -1;
info.end_lineno = -1;
info.column = -1;
info.end_column = -1;
}
// Create the LocationInfo structseq: (lineno, end_lineno, col_offset, end_col_offset)
PyObject *location = make_location_info(unwinder,
info.lineno,
info.end_lineno,
info.column,
info.end_column);
if (!location) {
goto error;
}
// Read the instruction opcode from target process if opcodes flag is set
PyObject *opcode_obj = NULL;
if (unwinder->opcodes) {
uint16_t instruction_word = 0;
if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, ip,
sizeof(uint16_t), &instruction_word) == 0) {
opcode_obj = PyLong_FromLong(instruction_word & 0xFF);
if (!opcode_obj) {
Py_DECREF(location);
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create opcode object");
goto error;
}
} else {
// Opcode read failed - clear the exception since opcode is optional
PyErr_Clear();
}
}
PyObject *tuple = make_frame_info(unwinder, meta->file_name, location,
meta->func_name, opcode_obj ? opcode_obj : Py_None);
Py_DECREF(location);
Py_XDECREF(opcode_obj);
if (!tuple) {
goto error;
}
if (!unwinder->opcodes) {
Py_XSETREF(meta->last_frame_info, Py_NewRef(tuple));
meta->last_addrq = addrq;
}
*result = tuple;
return 0;
error:
Py_XDECREF(func);
Py_XDECREF(file);
Py_XDECREF(linetable);
return -1;
}