Extend RemoteUnwinder to capture precise bytecode locations

Introduces LocationInfo struct sequence with end_lineno, col_offset, and
end_col_offset fields. Adds opcodes parameter to RemoteUnwinder that
extracts the currently executing opcode alongside its source span.

Refactors linetable parsing to correctly accumulate line numbers
separately from output values, fixing edge cases in computed_line.
This commit is contained in:
Pablo Galindo Salgado 2025-12-03 03:43:00 +00:00
parent 8801c6dec7
commit dd27e5e679
5 changed files with 146 additions and 46 deletions

View file

@ -157,6 +157,7 @@ typedef struct {
typedef struct {
PyTypeObject *RemoteDebugging_Type;
PyTypeObject *TaskInfo_Type;
PyTypeObject *LocationInfo_Type;
PyTypeObject *FrameInfo_Type;
PyTypeObject *CoroInfo_Type;
PyTypeObject *ThreadInfo_Type;
@ -195,6 +196,7 @@ typedef struct {
int skip_non_matching_threads;
int native;
int gc;
int opcodes;
RemoteDebuggingState *cached_state;
#ifdef Py_GIL_DISABLED
uint32_t tlbc_generation;
@ -248,6 +250,7 @@ typedef int (*set_entry_processor_func)(
* ============================================================================ */
extern PyStructSequence_Desc TaskInfo_desc;
extern PyStructSequence_Desc LocationInfo_desc;
extern PyStructSequence_Desc FrameInfo_desc;
extern PyStructSequence_Desc CoroInfo_desc;
extern PyStructSequence_Desc ThreadInfo_desc;
@ -298,11 +301,20 @@ extern int parse_code_object(
int32_t tlbc_index
);
extern PyObject *make_location_info(
RemoteUnwinderObject *unwinder,
int lineno,
int end_lineno,
int col_offset,
int end_col_offset
);
extern PyObject *make_frame_info(
RemoteUnwinderObject *unwinder,
PyObject *file,
PyObject *line,
PyObject *func
PyObject *location, // LocationInfo structseq or None for synthetic frames
PyObject *func,
PyObject *opcode
);
/* Line table parsing */

View file

@ -12,7 +12,7 @@ preserve
PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__,
"RemoteUnwinder(pid, *, all_threads=False, only_active_thread=False,\n"
" mode=0, debug=False, skip_non_matching_threads=True,\n"
" native=False, gc=False)\n"
" native=False, gc=False, opcodes=False)\n"
"--\n"
"\n"
"Initialize a new RemoteUnwinder object for debugging a remote Python process.\n"
@ -32,6 +32,8 @@ PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__,
" non-Python code.\n"
" gc: If True, include artificial \"<GC>\" frames to denote active garbage\n"
" collection.\n"
" opcodes: If True, gather bytecode opcode information for instruction-level\n"
" profiling.\n"
"\n"
"The RemoteUnwinder provides functionality to inspect and debug a running Python\n"
"process, including examining thread states, stack frames and other runtime data.\n"
@ -48,7 +50,8 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
int only_active_thread,
int mode, int debug,
int skip_non_matching_threads,
int native, int gc);
int native, int gc,
int opcodes);
static int
_remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObject *kwargs)
@ -56,7 +59,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje
int return_value = -1;
#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
#define NUM_KEYWORDS 8
#define NUM_KEYWORDS 9
static struct {
PyGC_Head _this_is_not_used;
PyObject_VAR_HEAD
@ -65,7 +68,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje
} _kwtuple = {
.ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
.ob_hash = -1,
.ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), &_Py_ID(only_active_thread), &_Py_ID(mode), &_Py_ID(debug), &_Py_ID(skip_non_matching_threads), &_Py_ID(native), &_Py_ID(gc), },
.ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), &_Py_ID(only_active_thread), &_Py_ID(mode), &_Py_ID(debug), &_Py_ID(skip_non_matching_threads), &_Py_ID(native), &_Py_ID(gc), &_Py_ID(opcodes), },
};
#undef NUM_KEYWORDS
#define KWTUPLE (&_kwtuple.ob_base.ob_base)
@ -74,14 +77,14 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje
# define KWTUPLE NULL
#endif // !Py_BUILD_CORE
static const char * const _keywords[] = {"pid", "all_threads", "only_active_thread", "mode", "debug", "skip_non_matching_threads", "native", "gc", NULL};
static const char * const _keywords[] = {"pid", "all_threads", "only_active_thread", "mode", "debug", "skip_non_matching_threads", "native", "gc", "opcodes", NULL};
static _PyArg_Parser _parser = {
.keywords = _keywords,
.fname = "RemoteUnwinder",
.kwtuple = KWTUPLE,
};
#undef KWTUPLE
PyObject *argsbuf[8];
PyObject *argsbuf[9];
PyObject * const *fastargs;
Py_ssize_t nargs = PyTuple_GET_SIZE(args);
Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1;
@ -93,6 +96,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje
int skip_non_matching_threads = 1;
int native = 0;
int gc = 0;
int opcodes = 0;
fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser,
/*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
@ -160,12 +164,21 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje
goto skip_optional_kwonly;
}
}
gc = PyObject_IsTrue(fastargs[7]);
if (gc < 0) {
if (fastargs[7]) {
gc = PyObject_IsTrue(fastargs[7]);
if (gc < 0) {
goto exit;
}
if (!--noptargs) {
goto skip_optional_kwonly;
}
}
opcodes = PyObject_IsTrue(fastargs[8]);
if (opcodes < 0) {
goto exit;
}
skip_optional_kwonly:
return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads, only_active_thread, mode, debug, skip_non_matching_threads, native, gc);
return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads, only_active_thread, mode, debug, skip_non_matching_threads, native, gc, opcodes);
exit:
return return_value;
@ -347,4 +360,4 @@ _remote_debugging_RemoteUnwinder_get_async_stack_trace(PyObject *self, PyObject
return return_value;
}
/*[clinic end generated code: output=99fed5c94cf36881 input=a9049054013a1b77]*/
/*[clinic end generated code: output=946a0838197bf141 input=a9049054013a1b77]*/

View file

@ -155,48 +155,45 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L
{
const uint8_t* ptr = (const uint8_t*)(linetable);
uintptr_t addr = 0;
info->lineno = firstlineno;
int computed_line = firstlineno; // Running accumulator, separate from output
while (*ptr != '\0') {
// See InternalDocs/code_objects.md for where these magic numbers are from
// and for the decoding algorithm.
uint8_t first_byte = *(ptr++);
uint8_t code = (first_byte >> 3) & 15;
size_t length = (first_byte & 7) + 1;
uintptr_t end_addr = addr + length;
switch (code) {
case PY_CODE_LOCATION_INFO_NONE: {
case PY_CODE_LOCATION_INFO_NONE:
info->lineno = info->end_lineno = -1;
info->column = info->end_column = -1;
break;
}
case PY_CODE_LOCATION_INFO_LONG: {
int line_delta = scan_signed_varint(&ptr);
info->lineno += line_delta;
info->end_lineno = info->lineno + scan_varint(&ptr);
case PY_CODE_LOCATION_INFO_LONG:
computed_line += scan_signed_varint(&ptr);
info->lineno = computed_line;
info->end_lineno = computed_line + scan_varint(&ptr);
info->column = scan_varint(&ptr) - 1;
info->end_column = scan_varint(&ptr) - 1;
break;
}
case PY_CODE_LOCATION_INFO_NO_COLUMNS: {
int line_delta = scan_signed_varint(&ptr);
info->lineno += line_delta;
case PY_CODE_LOCATION_INFO_NO_COLUMNS:
computed_line += scan_signed_varint(&ptr);
info->lineno = info->end_lineno = computed_line;
info->column = info->end_column = -1;
break;
}
case PY_CODE_LOCATION_INFO_ONE_LINE0:
case PY_CODE_LOCATION_INFO_ONE_LINE1:
case PY_CODE_LOCATION_INFO_ONE_LINE2: {
int line_delta = code - 10;
info->lineno += line_delta;
info->end_lineno = info->lineno;
case PY_CODE_LOCATION_INFO_ONE_LINE2:
computed_line += code - 10;
info->lineno = info->end_lineno = computed_line;
info->column = *(ptr++);
info->end_column = *(ptr++);
break;
}
default: {
uint8_t second_byte = *(ptr++);
if ((second_byte & 128) != 0) {
return false;
}
info->lineno = info->end_lineno = computed_line;
info->column = code << 3 | (second_byte >> 4);
info->end_column = info->column + (second_byte & 15);
break;
@ -215,8 +212,25 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L
* ============================================================================ */
PyObject *
make_frame_info(RemoteUnwinderObject *unwinder, PyObject *file, PyObject *line,
PyObject *func)
make_location_info(RemoteUnwinderObject *unwinder, int lineno, int end_lineno,
int col_offset, int end_col_offset)
{
RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder);
PyObject *info = PyStructSequence_New(state->LocationInfo_Type);
if (info == NULL) {
set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create LocationInfo");
return NULL;
}
PyStructSequence_SetItem(info, 0, PyLong_FromLong(lineno));
PyStructSequence_SetItem(info, 1, PyLong_FromLong(end_lineno));
PyStructSequence_SetItem(info, 2, PyLong_FromLong(col_offset));
PyStructSequence_SetItem(info, 3, PyLong_FromLong(end_col_offset));
return info;
}
PyObject *
make_frame_info(RemoteUnwinderObject *unwinder, PyObject *file, PyObject *location,
PyObject *func, PyObject *opcode)
{
RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder);
PyObject *info = PyStructSequence_New(state->FrameInfo_Type);
@ -225,11 +239,13 @@ make_frame_info(RemoteUnwinderObject *unwinder, PyObject *file, PyObject *line,
return NULL;
}
Py_INCREF(file);
Py_INCREF(line);
Py_INCREF(location);
Py_INCREF(func);
Py_INCREF(opcode);
PyStructSequence_SetItem(info, 0, file);
PyStructSequence_SetItem(info, 1, line);
PyStructSequence_SetItem(info, 1, location);
PyStructSequence_SetItem(info, 2, func);
PyStructSequence_SetItem(info, 3, opcode);
return info;
}
@ -365,16 +381,43 @@ parse_code_object(RemoteUnwinderObject *unwinder,
meta->first_lineno, &info);
if (!ok) {
info.lineno = -1;
info.end_lineno = -1;
info.column = -1;
info.end_column = -1;
}
PyObject *lineno = PyLong_FromLong(info.lineno);
if (!lineno) {
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create line number object");
// Create the LocationInfo structseq: (lineno, end_lineno, col_offset, end_col_offset)
PyObject *location = make_location_info(unwinder,
info.lineno,
info.end_lineno,
info.column,
info.end_column);
if (!location) {
goto error;
}
PyObject *tuple = make_frame_info(unwinder, meta->file_name, lineno, meta->func_name);
Py_DECREF(lineno);
// Read the instruction opcode from target process if opcodes flag is set
PyObject *opcode_obj = NULL;
if (unwinder->opcodes) {
uint16_t instruction_word = 0;
if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, ip,
sizeof(uint16_t), &instruction_word) == 0) {
opcode_obj = PyLong_FromLong(instruction_word & 0xFF);
if (!opcode_obj) {
Py_DECREF(location);
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create opcode object");
goto error;
}
} else {
// Opcode read failed - clear the exception since opcode is optional
PyErr_Clear();
}
}
PyObject *tuple = make_frame_info(unwinder, meta->file_name, location,
meta->func_name, opcode_obj ? opcode_obj : Py_None);
Py_DECREF(location);
Py_XDECREF(opcode_obj);
if (!tuple) {
goto error;
}

View file

@ -310,9 +310,9 @@ process_frame_chain(
extra_frame = &_Py_STR(native);
}
if (extra_frame) {
// Use "~" as file and 0 as line, since that's what pstats uses:
// Use "~" as file, None as location (synthetic frame), None as opcode
PyObject *extra_frame_info = make_frame_info(
unwinder, _Py_LATIN1_CHR('~'), _PyLong_GetZero(), extra_frame);
unwinder, _Py_LATIN1_CHR('~'), Py_None, extra_frame, Py_None);
if (extra_frame_info == NULL) {
return -1;
}

View file

@ -28,11 +28,28 @@ PyStructSequence_Desc TaskInfo_desc = {
4
};
// LocationInfo structseq type
static PyStructSequence_Field LocationInfo_fields[] = {
{"lineno", "Line number"},
{"end_lineno", "End line number"},
{"col_offset", "Column offset"},
{"end_col_offset", "End column offset"},
{NULL}
};
PyStructSequence_Desc LocationInfo_desc = {
"_remote_debugging.LocationInfo",
"Source location information: (lineno, end_lineno, col_offset, end_col_offset)",
LocationInfo_fields,
4
};
// FrameInfo structseq type
static PyStructSequence_Field FrameInfo_fields[] = {
{"filename", "Source code filename"},
{"lineno", "Line number"},
{"location", "LocationInfo structseq or None for synthetic frames"},
{"funcname", "Function name"},
{"opcode", "Opcode being executed (None if not gathered)"},
{NULL}
};
@ -40,7 +57,7 @@ PyStructSequence_Desc FrameInfo_desc = {
"_remote_debugging.FrameInfo",
"Information about a frame",
FrameInfo_fields,
3
4
};
// CoroInfo structseq type
@ -235,6 +252,7 @@ _remote_debugging.RemoteUnwinder.__init__
skip_non_matching_threads: bool = True
native: bool = False
gc: bool = False
opcodes: bool = False
Initialize a new RemoteUnwinder object for debugging a remote Python process.
@ -253,6 +271,8 @@ Initialize a new RemoteUnwinder object for debugging a remote Python process.
non-Python code.
gc: If True, include artificial "<GC>" frames to denote active garbage
collection.
opcodes: If True, gather bytecode opcode information for instruction-level
profiling.
The RemoteUnwinder provides functionality to inspect and debug a running Python
process, including examining thread states, stack frames and other runtime data.
@ -270,8 +290,9 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
int only_active_thread,
int mode, int debug,
int skip_non_matching_threads,
int native, int gc)
/*[clinic end generated code: output=e9eb6b4df119f6e0 input=606d099059207df2]*/
int native, int gc,
int opcodes)
/*[clinic end generated code: output=e7f77865c7dd662f input=3dba9e3da913a1e0]*/
{
// Validate that all_threads and only_active_thread are not both True
if (all_threads && only_active_thread) {
@ -290,6 +311,7 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
self->native = native;
self->gc = gc;
self->opcodes = opcodes;
self->debug = debug;
self->only_active_thread = only_active_thread;
self->mode = mode;
@ -844,6 +866,14 @@ _remote_debugging_exec(PyObject *m)
return -1;
}
st->LocationInfo_Type = PyStructSequence_NewType(&LocationInfo_desc);
if (st->LocationInfo_Type == NULL) {
return -1;
}
if (PyModule_AddType(m, st->LocationInfo_Type) < 0) {
return -1;
}
st->FrameInfo_Type = PyStructSequence_NewType(&FrameInfo_desc);
if (st->FrameInfo_Type == NULL) {
return -1;
@ -917,6 +947,7 @@ remote_debugging_traverse(PyObject *mod, visitproc visit, void *arg)
RemoteDebuggingState *state = RemoteDebugging_GetState(mod);
Py_VISIT(state->RemoteDebugging_Type);
Py_VISIT(state->TaskInfo_Type);
Py_VISIT(state->LocationInfo_Type);
Py_VISIT(state->FrameInfo_Type);
Py_VISIT(state->CoroInfo_Type);
Py_VISIT(state->ThreadInfo_Type);
@ -931,6 +962,7 @@ remote_debugging_clear(PyObject *mod)
RemoteDebuggingState *state = RemoteDebugging_GetState(mod);
Py_CLEAR(state->RemoteDebugging_Type);
Py_CLEAR(state->TaskInfo_Type);
Py_CLEAR(state->LocationInfo_Type);
Py_CLEAR(state->FrameInfo_Type);
Py_CLEAR(state->CoroInfo_Type);
Py_CLEAR(state->ThreadInfo_Type);