Extend RemoteUnwinder to capture precise bytecode locations

Introduces LocationInfo struct sequence with end_lineno, col_offset, and
end_col_offset fields. Adds opcodes parameter to RemoteUnwinder that
extracts the currently executing opcode alongside its source span.

Refactors linetable parsing to correctly accumulate line numbers
separately from output values, fixing edge cases in computed_line.
This commit is contained in:
Pablo Galindo Salgado 2025-12-03 03:43:00 +00:00
parent 8801c6dec7
commit dd27e5e679
5 changed files with 146 additions and 46 deletions

View file

@ -157,6 +157,7 @@ typedef struct {
typedef struct { typedef struct {
PyTypeObject *RemoteDebugging_Type; PyTypeObject *RemoteDebugging_Type;
PyTypeObject *TaskInfo_Type; PyTypeObject *TaskInfo_Type;
PyTypeObject *LocationInfo_Type;
PyTypeObject *FrameInfo_Type; PyTypeObject *FrameInfo_Type;
PyTypeObject *CoroInfo_Type; PyTypeObject *CoroInfo_Type;
PyTypeObject *ThreadInfo_Type; PyTypeObject *ThreadInfo_Type;
@ -195,6 +196,7 @@ typedef struct {
int skip_non_matching_threads; int skip_non_matching_threads;
int native; int native;
int gc; int gc;
int opcodes;
RemoteDebuggingState *cached_state; RemoteDebuggingState *cached_state;
#ifdef Py_GIL_DISABLED #ifdef Py_GIL_DISABLED
uint32_t tlbc_generation; uint32_t tlbc_generation;
@ -248,6 +250,7 @@ typedef int (*set_entry_processor_func)(
* ============================================================================ */ * ============================================================================ */
extern PyStructSequence_Desc TaskInfo_desc; extern PyStructSequence_Desc TaskInfo_desc;
extern PyStructSequence_Desc LocationInfo_desc;
extern PyStructSequence_Desc FrameInfo_desc; extern PyStructSequence_Desc FrameInfo_desc;
extern PyStructSequence_Desc CoroInfo_desc; extern PyStructSequence_Desc CoroInfo_desc;
extern PyStructSequence_Desc ThreadInfo_desc; extern PyStructSequence_Desc ThreadInfo_desc;
@ -298,11 +301,20 @@ extern int parse_code_object(
int32_t tlbc_index int32_t tlbc_index
); );
extern PyObject *make_location_info(
RemoteUnwinderObject *unwinder,
int lineno,
int end_lineno,
int col_offset,
int end_col_offset
);
extern PyObject *make_frame_info( extern PyObject *make_frame_info(
RemoteUnwinderObject *unwinder, RemoteUnwinderObject *unwinder,
PyObject *file, PyObject *file,
PyObject *line, PyObject *location, // LocationInfo structseq or None for synthetic frames
PyObject *func PyObject *func,
PyObject *opcode
); );
/* Line table parsing */ /* Line table parsing */

View file

@ -12,7 +12,7 @@ preserve
PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__, PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__,
"RemoteUnwinder(pid, *, all_threads=False, only_active_thread=False,\n" "RemoteUnwinder(pid, *, all_threads=False, only_active_thread=False,\n"
" mode=0, debug=False, skip_non_matching_threads=True,\n" " mode=0, debug=False, skip_non_matching_threads=True,\n"
" native=False, gc=False)\n" " native=False, gc=False, opcodes=False)\n"
"--\n" "--\n"
"\n" "\n"
"Initialize a new RemoteUnwinder object for debugging a remote Python process.\n" "Initialize a new RemoteUnwinder object for debugging a remote Python process.\n"
@ -32,6 +32,8 @@ PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__,
" non-Python code.\n" " non-Python code.\n"
" gc: If True, include artificial \"<GC>\" frames to denote active garbage\n" " gc: If True, include artificial \"<GC>\" frames to denote active garbage\n"
" collection.\n" " collection.\n"
" opcodes: If True, gather bytecode opcode information for instruction-level\n"
" profiling.\n"
"\n" "\n"
"The RemoteUnwinder provides functionality to inspect and debug a running Python\n" "The RemoteUnwinder provides functionality to inspect and debug a running Python\n"
"process, including examining thread states, stack frames and other runtime data.\n" "process, including examining thread states, stack frames and other runtime data.\n"
@ -48,7 +50,8 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
int only_active_thread, int only_active_thread,
int mode, int debug, int mode, int debug,
int skip_non_matching_threads, int skip_non_matching_threads,
int native, int gc); int native, int gc,
int opcodes);
static int static int
_remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObject *kwargs) _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObject *kwargs)
@ -56,7 +59,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje
int return_value = -1; int return_value = -1;
#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
#define NUM_KEYWORDS 8 #define NUM_KEYWORDS 9
static struct { static struct {
PyGC_Head _this_is_not_used; PyGC_Head _this_is_not_used;
PyObject_VAR_HEAD PyObject_VAR_HEAD
@ -65,7 +68,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje
} _kwtuple = { } _kwtuple = {
.ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
.ob_hash = -1, .ob_hash = -1,
.ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), &_Py_ID(only_active_thread), &_Py_ID(mode), &_Py_ID(debug), &_Py_ID(skip_non_matching_threads), &_Py_ID(native), &_Py_ID(gc), }, .ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), &_Py_ID(only_active_thread), &_Py_ID(mode), &_Py_ID(debug), &_Py_ID(skip_non_matching_threads), &_Py_ID(native), &_Py_ID(gc), &_Py_ID(opcodes), },
}; };
#undef NUM_KEYWORDS #undef NUM_KEYWORDS
#define KWTUPLE (&_kwtuple.ob_base.ob_base) #define KWTUPLE (&_kwtuple.ob_base.ob_base)
@ -74,14 +77,14 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje
# define KWTUPLE NULL # define KWTUPLE NULL
#endif // !Py_BUILD_CORE #endif // !Py_BUILD_CORE
static const char * const _keywords[] = {"pid", "all_threads", "only_active_thread", "mode", "debug", "skip_non_matching_threads", "native", "gc", NULL}; static const char * const _keywords[] = {"pid", "all_threads", "only_active_thread", "mode", "debug", "skip_non_matching_threads", "native", "gc", "opcodes", NULL};
static _PyArg_Parser _parser = { static _PyArg_Parser _parser = {
.keywords = _keywords, .keywords = _keywords,
.fname = "RemoteUnwinder", .fname = "RemoteUnwinder",
.kwtuple = KWTUPLE, .kwtuple = KWTUPLE,
}; };
#undef KWTUPLE #undef KWTUPLE
PyObject *argsbuf[8]; PyObject *argsbuf[9];
PyObject * const *fastargs; PyObject * const *fastargs;
Py_ssize_t nargs = PyTuple_GET_SIZE(args); Py_ssize_t nargs = PyTuple_GET_SIZE(args);
Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1; Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1;
@ -93,6 +96,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje
int skip_non_matching_threads = 1; int skip_non_matching_threads = 1;
int native = 0; int native = 0;
int gc = 0; int gc = 0;
int opcodes = 0;
fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser,
/*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
@ -160,12 +164,21 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje
goto skip_optional_kwonly; goto skip_optional_kwonly;
} }
} }
if (fastargs[7]) {
gc = PyObject_IsTrue(fastargs[7]); gc = PyObject_IsTrue(fastargs[7]);
if (gc < 0) { if (gc < 0) {
goto exit; goto exit;
} }
if (!--noptargs) {
goto skip_optional_kwonly;
}
}
opcodes = PyObject_IsTrue(fastargs[8]);
if (opcodes < 0) {
goto exit;
}
skip_optional_kwonly: skip_optional_kwonly:
return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads, only_active_thread, mode, debug, skip_non_matching_threads, native, gc); return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads, only_active_thread, mode, debug, skip_non_matching_threads, native, gc, opcodes);
exit: exit:
return return_value; return return_value;
@ -347,4 +360,4 @@ _remote_debugging_RemoteUnwinder_get_async_stack_trace(PyObject *self, PyObject
return return_value; return return_value;
} }
/*[clinic end generated code: output=99fed5c94cf36881 input=a9049054013a1b77]*/ /*[clinic end generated code: output=946a0838197bf141 input=a9049054013a1b77]*/

View file

@ -155,48 +155,45 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L
{ {
const uint8_t* ptr = (const uint8_t*)(linetable); const uint8_t* ptr = (const uint8_t*)(linetable);
uintptr_t addr = 0; uintptr_t addr = 0;
info->lineno = firstlineno; int computed_line = firstlineno; // Running accumulator, separate from output
while (*ptr != '\0') { while (*ptr != '\0') {
// See InternalDocs/code_objects.md for where these magic numbers are from
// and for the decoding algorithm.
uint8_t first_byte = *(ptr++); uint8_t first_byte = *(ptr++);
uint8_t code = (first_byte >> 3) & 15; uint8_t code = (first_byte >> 3) & 15;
size_t length = (first_byte & 7) + 1; size_t length = (first_byte & 7) + 1;
uintptr_t end_addr = addr + length; uintptr_t end_addr = addr + length;
switch (code) { switch (code) {
case PY_CODE_LOCATION_INFO_NONE: { case PY_CODE_LOCATION_INFO_NONE:
info->lineno = info->end_lineno = -1;
info->column = info->end_column = -1;
break; break;
} case PY_CODE_LOCATION_INFO_LONG:
case PY_CODE_LOCATION_INFO_LONG: { computed_line += scan_signed_varint(&ptr);
int line_delta = scan_signed_varint(&ptr); info->lineno = computed_line;
info->lineno += line_delta; info->end_lineno = computed_line + scan_varint(&ptr);
info->end_lineno = info->lineno + scan_varint(&ptr);
info->column = scan_varint(&ptr) - 1; info->column = scan_varint(&ptr) - 1;
info->end_column = scan_varint(&ptr) - 1; info->end_column = scan_varint(&ptr) - 1;
break; break;
} case PY_CODE_LOCATION_INFO_NO_COLUMNS:
case PY_CODE_LOCATION_INFO_NO_COLUMNS: { computed_line += scan_signed_varint(&ptr);
int line_delta = scan_signed_varint(&ptr); info->lineno = info->end_lineno = computed_line;
info->lineno += line_delta;
info->column = info->end_column = -1; info->column = info->end_column = -1;
break; break;
}
case PY_CODE_LOCATION_INFO_ONE_LINE0: case PY_CODE_LOCATION_INFO_ONE_LINE0:
case PY_CODE_LOCATION_INFO_ONE_LINE1: case PY_CODE_LOCATION_INFO_ONE_LINE1:
case PY_CODE_LOCATION_INFO_ONE_LINE2: { case PY_CODE_LOCATION_INFO_ONE_LINE2:
int line_delta = code - 10; computed_line += code - 10;
info->lineno += line_delta; info->lineno = info->end_lineno = computed_line;
info->end_lineno = info->lineno;
info->column = *(ptr++); info->column = *(ptr++);
info->end_column = *(ptr++); info->end_column = *(ptr++);
break; break;
}
default: { default: {
uint8_t second_byte = *(ptr++); uint8_t second_byte = *(ptr++);
if ((second_byte & 128) != 0) { if ((second_byte & 128) != 0) {
return false; return false;
} }
info->lineno = info->end_lineno = computed_line;
info->column = code << 3 | (second_byte >> 4); info->column = code << 3 | (second_byte >> 4);
info->end_column = info->column + (second_byte & 15); info->end_column = info->column + (second_byte & 15);
break; break;
@ -215,8 +212,25 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L
* ============================================================================ */ * ============================================================================ */
PyObject * PyObject *
make_frame_info(RemoteUnwinderObject *unwinder, PyObject *file, PyObject *line, make_location_info(RemoteUnwinderObject *unwinder, int lineno, int end_lineno,
PyObject *func) int col_offset, int end_col_offset)
{
RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder);
PyObject *info = PyStructSequence_New(state->LocationInfo_Type);
if (info == NULL) {
set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create LocationInfo");
return NULL;
}
PyStructSequence_SetItem(info, 0, PyLong_FromLong(lineno));
PyStructSequence_SetItem(info, 1, PyLong_FromLong(end_lineno));
PyStructSequence_SetItem(info, 2, PyLong_FromLong(col_offset));
PyStructSequence_SetItem(info, 3, PyLong_FromLong(end_col_offset));
return info;
}
PyObject *
make_frame_info(RemoteUnwinderObject *unwinder, PyObject *file, PyObject *location,
PyObject *func, PyObject *opcode)
{ {
RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder);
PyObject *info = PyStructSequence_New(state->FrameInfo_Type); PyObject *info = PyStructSequence_New(state->FrameInfo_Type);
@ -225,11 +239,13 @@ make_frame_info(RemoteUnwinderObject *unwinder, PyObject *file, PyObject *line,
return NULL; return NULL;
} }
Py_INCREF(file); Py_INCREF(file);
Py_INCREF(line); Py_INCREF(location);
Py_INCREF(func); Py_INCREF(func);
Py_INCREF(opcode);
PyStructSequence_SetItem(info, 0, file); PyStructSequence_SetItem(info, 0, file);
PyStructSequence_SetItem(info, 1, line); PyStructSequence_SetItem(info, 1, location);
PyStructSequence_SetItem(info, 2, func); PyStructSequence_SetItem(info, 2, func);
PyStructSequence_SetItem(info, 3, opcode);
return info; return info;
} }
@ -365,16 +381,43 @@ parse_code_object(RemoteUnwinderObject *unwinder,
meta->first_lineno, &info); meta->first_lineno, &info);
if (!ok) { if (!ok) {
info.lineno = -1; info.lineno = -1;
info.end_lineno = -1;
info.column = -1;
info.end_column = -1;
} }
PyObject *lineno = PyLong_FromLong(info.lineno); // Create the LocationInfo structseq: (lineno, end_lineno, col_offset, end_col_offset)
if (!lineno) { PyObject *location = make_location_info(unwinder,
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create line number object"); info.lineno,
info.end_lineno,
info.column,
info.end_column);
if (!location) {
goto error; goto error;
} }
PyObject *tuple = make_frame_info(unwinder, meta->file_name, lineno, meta->func_name); // Read the instruction opcode from target process if opcodes flag is set
Py_DECREF(lineno); PyObject *opcode_obj = NULL;
if (unwinder->opcodes) {
uint16_t instruction_word = 0;
if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, ip,
sizeof(uint16_t), &instruction_word) == 0) {
opcode_obj = PyLong_FromLong(instruction_word & 0xFF);
if (!opcode_obj) {
Py_DECREF(location);
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create opcode object");
goto error;
}
} else {
// Opcode read failed - clear the exception since opcode is optional
PyErr_Clear();
}
}
PyObject *tuple = make_frame_info(unwinder, meta->file_name, location,
meta->func_name, opcode_obj ? opcode_obj : Py_None);
Py_DECREF(location);
Py_XDECREF(opcode_obj);
if (!tuple) { if (!tuple) {
goto error; goto error;
} }

View file

@ -310,9 +310,9 @@ process_frame_chain(
extra_frame = &_Py_STR(native); extra_frame = &_Py_STR(native);
} }
if (extra_frame) { if (extra_frame) {
// Use "~" as file and 0 as line, since that's what pstats uses: // Use "~" as file, None as location (synthetic frame), None as opcode
PyObject *extra_frame_info = make_frame_info( PyObject *extra_frame_info = make_frame_info(
unwinder, _Py_LATIN1_CHR('~'), _PyLong_GetZero(), extra_frame); unwinder, _Py_LATIN1_CHR('~'), Py_None, extra_frame, Py_None);
if (extra_frame_info == NULL) { if (extra_frame_info == NULL) {
return -1; return -1;
} }

View file

@ -28,11 +28,28 @@ PyStructSequence_Desc TaskInfo_desc = {
4 4
}; };
// LocationInfo structseq type
static PyStructSequence_Field LocationInfo_fields[] = {
{"lineno", "Line number"},
{"end_lineno", "End line number"},
{"col_offset", "Column offset"},
{"end_col_offset", "End column offset"},
{NULL}
};
PyStructSequence_Desc LocationInfo_desc = {
"_remote_debugging.LocationInfo",
"Source location information: (lineno, end_lineno, col_offset, end_col_offset)",
LocationInfo_fields,
4
};
// FrameInfo structseq type // FrameInfo structseq type
static PyStructSequence_Field FrameInfo_fields[] = { static PyStructSequence_Field FrameInfo_fields[] = {
{"filename", "Source code filename"}, {"filename", "Source code filename"},
{"lineno", "Line number"}, {"location", "LocationInfo structseq or None for synthetic frames"},
{"funcname", "Function name"}, {"funcname", "Function name"},
{"opcode", "Opcode being executed (None if not gathered)"},
{NULL} {NULL}
}; };
@ -40,7 +57,7 @@ PyStructSequence_Desc FrameInfo_desc = {
"_remote_debugging.FrameInfo", "_remote_debugging.FrameInfo",
"Information about a frame", "Information about a frame",
FrameInfo_fields, FrameInfo_fields,
3 4
}; };
// CoroInfo structseq type // CoroInfo structseq type
@ -235,6 +252,7 @@ _remote_debugging.RemoteUnwinder.__init__
skip_non_matching_threads: bool = True skip_non_matching_threads: bool = True
native: bool = False native: bool = False
gc: bool = False gc: bool = False
opcodes: bool = False
Initialize a new RemoteUnwinder object for debugging a remote Python process. Initialize a new RemoteUnwinder object for debugging a remote Python process.
@ -253,6 +271,8 @@ Initialize a new RemoteUnwinder object for debugging a remote Python process.
non-Python code. non-Python code.
gc: If True, include artificial "<GC>" frames to denote active garbage gc: If True, include artificial "<GC>" frames to denote active garbage
collection. collection.
opcodes: If True, gather bytecode opcode information for instruction-level
profiling.
The RemoteUnwinder provides functionality to inspect and debug a running Python The RemoteUnwinder provides functionality to inspect and debug a running Python
process, including examining thread states, stack frames and other runtime data. process, including examining thread states, stack frames and other runtime data.
@ -270,8 +290,9 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
int only_active_thread, int only_active_thread,
int mode, int debug, int mode, int debug,
int skip_non_matching_threads, int skip_non_matching_threads,
int native, int gc) int native, int gc,
/*[clinic end generated code: output=e9eb6b4df119f6e0 input=606d099059207df2]*/ int opcodes)
/*[clinic end generated code: output=e7f77865c7dd662f input=3dba9e3da913a1e0]*/
{ {
// Validate that all_threads and only_active_thread are not both True // Validate that all_threads and only_active_thread are not both True
if (all_threads && only_active_thread) { if (all_threads && only_active_thread) {
@ -290,6 +311,7 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
self->native = native; self->native = native;
self->gc = gc; self->gc = gc;
self->opcodes = opcodes;
self->debug = debug; self->debug = debug;
self->only_active_thread = only_active_thread; self->only_active_thread = only_active_thread;
self->mode = mode; self->mode = mode;
@ -844,6 +866,14 @@ _remote_debugging_exec(PyObject *m)
return -1; return -1;
} }
st->LocationInfo_Type = PyStructSequence_NewType(&LocationInfo_desc);
if (st->LocationInfo_Type == NULL) {
return -1;
}
if (PyModule_AddType(m, st->LocationInfo_Type) < 0) {
return -1;
}
st->FrameInfo_Type = PyStructSequence_NewType(&FrameInfo_desc); st->FrameInfo_Type = PyStructSequence_NewType(&FrameInfo_desc);
if (st->FrameInfo_Type == NULL) { if (st->FrameInfo_Type == NULL) {
return -1; return -1;
@ -917,6 +947,7 @@ remote_debugging_traverse(PyObject *mod, visitproc visit, void *arg)
RemoteDebuggingState *state = RemoteDebugging_GetState(mod); RemoteDebuggingState *state = RemoteDebugging_GetState(mod);
Py_VISIT(state->RemoteDebugging_Type); Py_VISIT(state->RemoteDebugging_Type);
Py_VISIT(state->TaskInfo_Type); Py_VISIT(state->TaskInfo_Type);
Py_VISIT(state->LocationInfo_Type);
Py_VISIT(state->FrameInfo_Type); Py_VISIT(state->FrameInfo_Type);
Py_VISIT(state->CoroInfo_Type); Py_VISIT(state->CoroInfo_Type);
Py_VISIT(state->ThreadInfo_Type); Py_VISIT(state->ThreadInfo_Type);
@ -931,6 +962,7 @@ remote_debugging_clear(PyObject *mod)
RemoteDebuggingState *state = RemoteDebugging_GetState(mod); RemoteDebuggingState *state = RemoteDebugging_GetState(mod);
Py_CLEAR(state->RemoteDebugging_Type); Py_CLEAR(state->RemoteDebugging_Type);
Py_CLEAR(state->TaskInfo_Type); Py_CLEAR(state->TaskInfo_Type);
Py_CLEAR(state->LocationInfo_Type);
Py_CLEAR(state->FrameInfo_Type); Py_CLEAR(state->FrameInfo_Type);
Py_CLEAR(state->CoroInfo_Type); Py_CLEAR(state->CoroInfo_Type);
Py_CLEAR(state->ThreadInfo_Type); Py_CLEAR(state->ThreadInfo_Type);