mirror of
https://github.com/python/cpython.git
synced 2025-12-08 06:10:17 +00:00
Introduces LocationInfo struct sequence with end_lineno, col_offset, and end_col_offset fields. Adds opcodes parameter to RemoteUnwinder that extracts the currently executing opcode alongside its source span. Refactors linetable parsing to correctly accumulate line numbers separately from output values, fixing edge cases in computed_line.
433 lines
15 KiB
C
433 lines
15 KiB
C
/******************************************************************************
|
|
* Remote Debugging Module - Code Object Functions
|
|
*
|
|
* This file contains functions for parsing code objects and line tables
|
|
* from remote process memory.
|
|
******************************************************************************/
|
|
|
|
#include "_remote_debugging.h"
|
|
|
|
/* ============================================================================
|
|
* TLBC CACHING FUNCTIONS (Py_GIL_DISABLED only)
|
|
* ============================================================================ */
|
|
|
|
#ifdef Py_GIL_DISABLED
|
|
|
|
void
|
|
tlbc_cache_entry_destroy(void *ptr)
|
|
{
|
|
TLBCCacheEntry *entry = (TLBCCacheEntry *)ptr;
|
|
if (entry->tlbc_array) {
|
|
PyMem_RawFree(entry->tlbc_array);
|
|
}
|
|
PyMem_RawFree(entry);
|
|
}
|
|
|
|
TLBCCacheEntry *
|
|
get_tlbc_cache_entry(RemoteUnwinderObject *self, uintptr_t code_addr, uint32_t current_generation)
|
|
{
|
|
void *key = (void *)code_addr;
|
|
TLBCCacheEntry *entry = _Py_hashtable_get(self->tlbc_cache, key);
|
|
|
|
if (entry && entry->generation != current_generation) {
|
|
// Entry is stale, remove it by setting to NULL
|
|
_Py_hashtable_set(self->tlbc_cache, key, NULL);
|
|
entry = NULL;
|
|
}
|
|
|
|
return entry;
|
|
}
|
|
|
|
int
|
|
cache_tlbc_array(RemoteUnwinderObject *unwinder, uintptr_t code_addr, uintptr_t tlbc_array_addr, uint32_t generation)
|
|
{
|
|
uintptr_t tlbc_array_ptr;
|
|
void *tlbc_array = NULL;
|
|
TLBCCacheEntry *entry = NULL;
|
|
|
|
// Read the TLBC array pointer
|
|
if (read_ptr(unwinder, tlbc_array_addr, &tlbc_array_ptr) != 0) {
|
|
PyErr_SetString(PyExc_RuntimeError, "Failed to read TLBC array pointer");
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read TLBC array pointer");
|
|
return 0; // Read error
|
|
}
|
|
|
|
// Validate TLBC array pointer
|
|
if (tlbc_array_ptr == 0) {
|
|
PyErr_SetString(PyExc_RuntimeError, "TLBC array pointer is NULL");
|
|
return 0; // No TLBC array
|
|
}
|
|
|
|
// Read the TLBC array size
|
|
Py_ssize_t tlbc_size;
|
|
if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, tlbc_array_ptr, sizeof(tlbc_size), &tlbc_size) != 0) {
|
|
PyErr_SetString(PyExc_RuntimeError, "Failed to read TLBC array size");
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read TLBC array size");
|
|
return 0; // Read error
|
|
}
|
|
|
|
// Validate TLBC array size
|
|
if (tlbc_size <= 0) {
|
|
PyErr_SetString(PyExc_RuntimeError, "Invalid TLBC array size");
|
|
return 0; // Invalid size
|
|
}
|
|
|
|
if (tlbc_size > MAX_TLBC_SIZE) {
|
|
PyErr_SetString(PyExc_RuntimeError, "TLBC array size exceeds maximum limit");
|
|
return 0; // Invalid size
|
|
}
|
|
|
|
// Allocate and read the entire TLBC array
|
|
size_t array_data_size = tlbc_size * sizeof(void*);
|
|
tlbc_array = PyMem_RawMalloc(sizeof(Py_ssize_t) + array_data_size);
|
|
if (!tlbc_array) {
|
|
PyErr_NoMemory();
|
|
set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate TLBC array");
|
|
return 0; // Memory error
|
|
}
|
|
|
|
if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, tlbc_array_ptr, sizeof(Py_ssize_t) + array_data_size, tlbc_array) != 0) {
|
|
PyMem_RawFree(tlbc_array);
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read TLBC array data");
|
|
return 0; // Read error
|
|
}
|
|
|
|
// Create cache entry
|
|
entry = PyMem_RawMalloc(sizeof(TLBCCacheEntry));
|
|
if (!entry) {
|
|
PyErr_NoMemory();
|
|
PyMem_RawFree(tlbc_array);
|
|
set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate TLBC cache entry");
|
|
return 0; // Memory error
|
|
}
|
|
|
|
entry->tlbc_array = tlbc_array;
|
|
entry->tlbc_array_size = tlbc_size;
|
|
entry->generation = generation;
|
|
|
|
// Store in cache
|
|
void *key = (void *)code_addr;
|
|
if (_Py_hashtable_set(unwinder->tlbc_cache, key, entry) < 0) {
|
|
tlbc_cache_entry_destroy(entry);
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to store TLBC entry in cache");
|
|
return 0; // Cache error
|
|
}
|
|
|
|
return 1; // Success
|
|
}
|
|
|
|
#endif
|
|
|
|
/* ============================================================================
|
|
* LINE TABLE PARSING FUNCTIONS
|
|
* ============================================================================ */
|
|
|
|
static int
|
|
scan_varint(const uint8_t **ptr)
|
|
{
|
|
unsigned int read = **ptr;
|
|
*ptr = *ptr + 1;
|
|
unsigned int val = read & 63;
|
|
unsigned int shift = 0;
|
|
while (read & 64) {
|
|
read = **ptr;
|
|
*ptr = *ptr + 1;
|
|
shift += 6;
|
|
val |= (read & 63) << shift;
|
|
}
|
|
return val;
|
|
}
|
|
|
|
static int
|
|
scan_signed_varint(const uint8_t **ptr)
|
|
{
|
|
unsigned int uval = scan_varint(ptr);
|
|
if (uval & 1) {
|
|
return -(int)(uval >> 1);
|
|
}
|
|
else {
|
|
return uval >> 1;
|
|
}
|
|
}
|
|
|
|
bool
|
|
parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, LocationInfo* info)
|
|
{
|
|
const uint8_t* ptr = (const uint8_t*)(linetable);
|
|
uintptr_t addr = 0;
|
|
int computed_line = firstlineno; // Running accumulator, separate from output
|
|
|
|
while (*ptr != '\0') {
|
|
uint8_t first_byte = *(ptr++);
|
|
uint8_t code = (first_byte >> 3) & 15;
|
|
size_t length = (first_byte & 7) + 1;
|
|
uintptr_t end_addr = addr + length;
|
|
|
|
switch (code) {
|
|
case PY_CODE_LOCATION_INFO_NONE:
|
|
info->lineno = info->end_lineno = -1;
|
|
info->column = info->end_column = -1;
|
|
break;
|
|
case PY_CODE_LOCATION_INFO_LONG:
|
|
computed_line += scan_signed_varint(&ptr);
|
|
info->lineno = computed_line;
|
|
info->end_lineno = computed_line + scan_varint(&ptr);
|
|
info->column = scan_varint(&ptr) - 1;
|
|
info->end_column = scan_varint(&ptr) - 1;
|
|
break;
|
|
case PY_CODE_LOCATION_INFO_NO_COLUMNS:
|
|
computed_line += scan_signed_varint(&ptr);
|
|
info->lineno = info->end_lineno = computed_line;
|
|
info->column = info->end_column = -1;
|
|
break;
|
|
case PY_CODE_LOCATION_INFO_ONE_LINE0:
|
|
case PY_CODE_LOCATION_INFO_ONE_LINE1:
|
|
case PY_CODE_LOCATION_INFO_ONE_LINE2:
|
|
computed_line += code - 10;
|
|
info->lineno = info->end_lineno = computed_line;
|
|
info->column = *(ptr++);
|
|
info->end_column = *(ptr++);
|
|
break;
|
|
default: {
|
|
uint8_t second_byte = *(ptr++);
|
|
if ((second_byte & 128) != 0) {
|
|
return false;
|
|
}
|
|
info->lineno = info->end_lineno = computed_line;
|
|
info->column = code << 3 | (second_byte >> 4);
|
|
info->end_column = info->column + (second_byte & 15);
|
|
break;
|
|
}
|
|
}
|
|
if (addr <= addrq && end_addr > addrq) {
|
|
return true;
|
|
}
|
|
addr = end_addr;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/* ============================================================================
|
|
* CODE OBJECT AND FRAME INFO FUNCTIONS
|
|
* ============================================================================ */
|
|
|
|
PyObject *
|
|
make_location_info(RemoteUnwinderObject *unwinder, int lineno, int end_lineno,
|
|
int col_offset, int end_col_offset)
|
|
{
|
|
RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder);
|
|
PyObject *info = PyStructSequence_New(state->LocationInfo_Type);
|
|
if (info == NULL) {
|
|
set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create LocationInfo");
|
|
return NULL;
|
|
}
|
|
PyStructSequence_SetItem(info, 0, PyLong_FromLong(lineno));
|
|
PyStructSequence_SetItem(info, 1, PyLong_FromLong(end_lineno));
|
|
PyStructSequence_SetItem(info, 2, PyLong_FromLong(col_offset));
|
|
PyStructSequence_SetItem(info, 3, PyLong_FromLong(end_col_offset));
|
|
return info;
|
|
}
|
|
|
|
PyObject *
|
|
make_frame_info(RemoteUnwinderObject *unwinder, PyObject *file, PyObject *location,
|
|
PyObject *func, PyObject *opcode)
|
|
{
|
|
RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder);
|
|
PyObject *info = PyStructSequence_New(state->FrameInfo_Type);
|
|
if (info == NULL) {
|
|
set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create FrameInfo");
|
|
return NULL;
|
|
}
|
|
Py_INCREF(file);
|
|
Py_INCREF(location);
|
|
Py_INCREF(func);
|
|
Py_INCREF(opcode);
|
|
PyStructSequence_SetItem(info, 0, file);
|
|
PyStructSequence_SetItem(info, 1, location);
|
|
PyStructSequence_SetItem(info, 2, func);
|
|
PyStructSequence_SetItem(info, 3, opcode);
|
|
return info;
|
|
}
|
|
|
|
int
|
|
parse_code_object(RemoteUnwinderObject *unwinder,
|
|
PyObject **result,
|
|
uintptr_t address,
|
|
uintptr_t instruction_pointer,
|
|
uintptr_t *previous_frame,
|
|
int32_t tlbc_index)
|
|
{
|
|
void *key = (void *)address;
|
|
CachedCodeMetadata *meta = NULL;
|
|
PyObject *func = NULL;
|
|
PyObject *file = NULL;
|
|
PyObject *linetable = NULL;
|
|
|
|
#ifdef Py_GIL_DISABLED
|
|
// In free threading builds, code object addresses might have the low bit set
|
|
// as a flag, so we need to mask it off to get the real address
|
|
uintptr_t real_address = address & (~1);
|
|
#else
|
|
uintptr_t real_address = address;
|
|
#endif
|
|
|
|
if (unwinder && unwinder->code_object_cache != NULL) {
|
|
meta = _Py_hashtable_get(unwinder->code_object_cache, key);
|
|
}
|
|
|
|
if (meta == NULL) {
|
|
char code_object[SIZEOF_CODE_OBJ];
|
|
if (_Py_RemoteDebug_PagedReadRemoteMemory(
|
|
&unwinder->handle, real_address, SIZEOF_CODE_OBJ, code_object) < 0)
|
|
{
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read code object");
|
|
goto error;
|
|
}
|
|
|
|
func = read_py_str(unwinder,
|
|
GET_MEMBER(uintptr_t, code_object, unwinder->debug_offsets.code_object.qualname), 1024);
|
|
if (!func) {
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read function name from code object");
|
|
goto error;
|
|
}
|
|
|
|
file = read_py_str(unwinder,
|
|
GET_MEMBER(uintptr_t, code_object, unwinder->debug_offsets.code_object.filename), 1024);
|
|
if (!file) {
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read filename from code object");
|
|
goto error;
|
|
}
|
|
|
|
linetable = read_py_bytes(unwinder,
|
|
GET_MEMBER(uintptr_t, code_object, unwinder->debug_offsets.code_object.linetable), 4096);
|
|
if (!linetable) {
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read linetable from code object");
|
|
goto error;
|
|
}
|
|
|
|
meta = PyMem_RawMalloc(sizeof(CachedCodeMetadata));
|
|
if (!meta) {
|
|
PyErr_NoMemory();
|
|
set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate cached code metadata");
|
|
goto error;
|
|
}
|
|
|
|
meta->func_name = func;
|
|
meta->file_name = file;
|
|
meta->linetable = linetable;
|
|
meta->first_lineno = GET_MEMBER(int, code_object, unwinder->debug_offsets.code_object.firstlineno);
|
|
meta->addr_code_adaptive = real_address + (uintptr_t)unwinder->debug_offsets.code_object.co_code_adaptive;
|
|
|
|
if (unwinder && unwinder->code_object_cache && _Py_hashtable_set(unwinder->code_object_cache, key, meta) < 0) {
|
|
cached_code_metadata_destroy(meta);
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to cache code metadata");
|
|
goto error;
|
|
}
|
|
|
|
// Ownership transferred to meta
|
|
func = NULL;
|
|
file = NULL;
|
|
linetable = NULL;
|
|
}
|
|
|
|
uintptr_t ip = instruction_pointer;
|
|
ptrdiff_t addrq;
|
|
|
|
#ifdef Py_GIL_DISABLED
|
|
// Handle thread-local bytecode (TLBC) in free threading builds
|
|
if (tlbc_index == 0 || unwinder->debug_offsets.code_object.co_tlbc == 0 || unwinder == NULL) {
|
|
// No TLBC or no unwinder - use main bytecode directly
|
|
addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive;
|
|
goto done_tlbc;
|
|
}
|
|
|
|
// Try to get TLBC data from cache (we'll get generation from the caller)
|
|
TLBCCacheEntry *tlbc_entry = get_tlbc_cache_entry(unwinder, real_address, unwinder->tlbc_generation);
|
|
|
|
if (!tlbc_entry) {
|
|
// Cache miss - try to read and cache TLBC array
|
|
if (!cache_tlbc_array(unwinder, real_address, real_address + unwinder->debug_offsets.code_object.co_tlbc, unwinder->tlbc_generation)) {
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to cache TLBC array");
|
|
goto error;
|
|
}
|
|
tlbc_entry = get_tlbc_cache_entry(unwinder, real_address, unwinder->tlbc_generation);
|
|
}
|
|
|
|
if (tlbc_entry && tlbc_index < tlbc_entry->tlbc_array_size) {
|
|
// Use cached TLBC data
|
|
uintptr_t *entries = (uintptr_t *)((char *)tlbc_entry->tlbc_array + sizeof(Py_ssize_t));
|
|
uintptr_t tlbc_bytecode_addr = entries[tlbc_index];
|
|
|
|
if (tlbc_bytecode_addr != 0) {
|
|
// Calculate offset from TLBC bytecode
|
|
addrq = (uint16_t *)ip - (uint16_t *)tlbc_bytecode_addr;
|
|
goto done_tlbc;
|
|
}
|
|
}
|
|
|
|
// Fall back to main bytecode
|
|
addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive;
|
|
|
|
done_tlbc:
|
|
#else
|
|
// Non-free-threaded build, always use the main bytecode
|
|
(void)tlbc_index; // Suppress unused parameter warning
|
|
(void)unwinder; // Suppress unused parameter warning
|
|
addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive;
|
|
#endif
|
|
; // Empty statement to avoid C23 extension warning
|
|
LocationInfo info = {0};
|
|
bool ok = parse_linetable(addrq, PyBytes_AS_STRING(meta->linetable),
|
|
meta->first_lineno, &info);
|
|
if (!ok) {
|
|
info.lineno = -1;
|
|
info.end_lineno = -1;
|
|
info.column = -1;
|
|
info.end_column = -1;
|
|
}
|
|
|
|
// Create the LocationInfo structseq: (lineno, end_lineno, col_offset, end_col_offset)
|
|
PyObject *location = make_location_info(unwinder,
|
|
info.lineno,
|
|
info.end_lineno,
|
|
info.column,
|
|
info.end_column);
|
|
if (!location) {
|
|
goto error;
|
|
}
|
|
|
|
// Read the instruction opcode from target process if opcodes flag is set
|
|
PyObject *opcode_obj = NULL;
|
|
if (unwinder->opcodes) {
|
|
uint16_t instruction_word = 0;
|
|
if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, ip,
|
|
sizeof(uint16_t), &instruction_word) == 0) {
|
|
opcode_obj = PyLong_FromLong(instruction_word & 0xFF);
|
|
if (!opcode_obj) {
|
|
Py_DECREF(location);
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create opcode object");
|
|
goto error;
|
|
}
|
|
} else {
|
|
// Opcode read failed - clear the exception since opcode is optional
|
|
PyErr_Clear();
|
|
}
|
|
}
|
|
|
|
PyObject *tuple = make_frame_info(unwinder, meta->file_name, location,
|
|
meta->func_name, opcode_obj ? opcode_obj : Py_None);
|
|
Py_DECREF(location);
|
|
Py_XDECREF(opcode_obj);
|
|
if (!tuple) {
|
|
goto error;
|
|
}
|
|
|
|
*result = tuple;
|
|
return 0;
|
|
|
|
error:
|
|
Py_XDECREF(func);
|
|
Py_XDECREF(file);
|
|
Py_XDECREF(linetable);
|
|
return -1;
|
|
}
|