From dd27e5e67985adb47f9089a854b61f0d6b9934ff Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Wed, 3 Dec 2025 03:43:00 +0000 Subject: [PATCH] Extend RemoteUnwinder to capture precise bytecode locations Introduces LocationInfo struct sequence with end_lineno, col_offset, and end_col_offset fields. Adds opcodes parameter to RemoteUnwinder that extracts the currently executing opcode alongside its source span. Refactors linetable parsing to correctly accumulate line numbers separately from output values, fixing edge cases in computed_line. --- Modules/_remote_debugging/_remote_debugging.h | 16 ++- Modules/_remote_debugging/clinic/module.c.h | 33 +++++-- Modules/_remote_debugging/code_objects.c | 99 +++++++++++++------ Modules/_remote_debugging/frames.c | 4 +- Modules/_remote_debugging/module.c | 40 +++++++- 5 files changed, 146 insertions(+), 46 deletions(-) diff --git a/Modules/_remote_debugging/_remote_debugging.h b/Modules/_remote_debugging/_remote_debugging.h index c4547baf967..6726576d04f 100644 --- a/Modules/_remote_debugging/_remote_debugging.h +++ b/Modules/_remote_debugging/_remote_debugging.h @@ -157,6 +157,7 @@ typedef struct { typedef struct { PyTypeObject *RemoteDebugging_Type; PyTypeObject *TaskInfo_Type; + PyTypeObject *LocationInfo_Type; PyTypeObject *FrameInfo_Type; PyTypeObject *CoroInfo_Type; PyTypeObject *ThreadInfo_Type; @@ -195,6 +196,7 @@ typedef struct { int skip_non_matching_threads; int native; int gc; + int opcodes; RemoteDebuggingState *cached_state; #ifdef Py_GIL_DISABLED uint32_t tlbc_generation; @@ -248,6 +250,7 @@ typedef int (*set_entry_processor_func)( * ============================================================================ */ extern PyStructSequence_Desc TaskInfo_desc; +extern PyStructSequence_Desc LocationInfo_desc; extern PyStructSequence_Desc FrameInfo_desc; extern PyStructSequence_Desc CoroInfo_desc; extern PyStructSequence_Desc ThreadInfo_desc; @@ -298,11 +301,20 @@ extern int parse_code_object( int32_t tlbc_index ); +extern PyObject *make_location_info( + RemoteUnwinderObject *unwinder, + int lineno, + int end_lineno, + int col_offset, + int end_col_offset +); + extern PyObject *make_frame_info( RemoteUnwinderObject *unwinder, PyObject *file, - PyObject *line, - PyObject *func + PyObject *location, // LocationInfo structseq or None for synthetic frames + PyObject *func, + PyObject *opcode ); /* Line table parsing */ diff --git a/Modules/_remote_debugging/clinic/module.c.h b/Modules/_remote_debugging/clinic/module.c.h index 60adb357e32..d781f7f49be 100644 --- a/Modules/_remote_debugging/clinic/module.c.h +++ b/Modules/_remote_debugging/clinic/module.c.h @@ -12,7 +12,7 @@ preserve PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__, "RemoteUnwinder(pid, *, all_threads=False, only_active_thread=False,\n" " mode=0, debug=False, skip_non_matching_threads=True,\n" -" native=False, gc=False)\n" +" native=False, gc=False, opcodes=False)\n" "--\n" "\n" "Initialize a new RemoteUnwinder object for debugging a remote Python process.\n" @@ -32,6 +32,8 @@ PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__, " non-Python code.\n" " gc: If True, include artificial \"\" frames to denote active garbage\n" " collection.\n" +" opcodes: If True, gather bytecode opcode information for instruction-level\n" +" profiling.\n" "\n" "The RemoteUnwinder provides functionality to inspect and debug a running Python\n" "process, including examining thread states, stack frames and other runtime data.\n" @@ -48,7 +50,8 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, int only_active_thread, int mode, int debug, int skip_non_matching_threads, - int native, int gc); + int native, int gc, + int opcodes); static int _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObject *kwargs) @@ -56,7 +59,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje int return_value = -1; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 8 + #define NUM_KEYWORDS 9 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -65,7 +68,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), &_Py_ID(only_active_thread), &_Py_ID(mode), &_Py_ID(debug), &_Py_ID(skip_non_matching_threads), &_Py_ID(native), &_Py_ID(gc), }, + .ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), &_Py_ID(only_active_thread), &_Py_ID(mode), &_Py_ID(debug), &_Py_ID(skip_non_matching_threads), &_Py_ID(native), &_Py_ID(gc), &_Py_ID(opcodes), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -74,14 +77,14 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"pid", "all_threads", "only_active_thread", "mode", "debug", "skip_non_matching_threads", "native", "gc", NULL}; + static const char * const _keywords[] = {"pid", "all_threads", "only_active_thread", "mode", "debug", "skip_non_matching_threads", "native", "gc", "opcodes", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "RemoteUnwinder", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[8]; + PyObject *argsbuf[9]; PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1; @@ -93,6 +96,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje int skip_non_matching_threads = 1; int native = 0; int gc = 0; + int opcodes = 0; fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -160,12 +164,21 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje goto skip_optional_kwonly; } } - gc = PyObject_IsTrue(fastargs[7]); - if (gc < 0) { + if (fastargs[7]) { + gc = PyObject_IsTrue(fastargs[7]); + if (gc < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + opcodes = PyObject_IsTrue(fastargs[8]); + if (opcodes < 0) { goto exit; } skip_optional_kwonly: - return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads, only_active_thread, mode, debug, skip_non_matching_threads, native, gc); + return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads, only_active_thread, mode, debug, skip_non_matching_threads, native, gc, opcodes); exit: return return_value; @@ -347,4 +360,4 @@ _remote_debugging_RemoteUnwinder_get_async_stack_trace(PyObject *self, PyObject return return_value; } -/*[clinic end generated code: output=99fed5c94cf36881 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=946a0838197bf141 input=a9049054013a1b77]*/ diff --git a/Modules/_remote_debugging/code_objects.c b/Modules/_remote_debugging/code_objects.c index ea3f00c802b..255a4f374f6 100644 --- a/Modules/_remote_debugging/code_objects.c +++ b/Modules/_remote_debugging/code_objects.c @@ -155,48 +155,45 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L { const uint8_t* ptr = (const uint8_t*)(linetable); uintptr_t addr = 0; - info->lineno = firstlineno; + int computed_line = firstlineno; // Running accumulator, separate from output while (*ptr != '\0') { - // See InternalDocs/code_objects.md for where these magic numbers are from - // and for the decoding algorithm. uint8_t first_byte = *(ptr++); uint8_t code = (first_byte >> 3) & 15; size_t length = (first_byte & 7) + 1; uintptr_t end_addr = addr + length; + switch (code) { - case PY_CODE_LOCATION_INFO_NONE: { + case PY_CODE_LOCATION_INFO_NONE: + info->lineno = info->end_lineno = -1; + info->column = info->end_column = -1; break; - } - case PY_CODE_LOCATION_INFO_LONG: { - int line_delta = scan_signed_varint(&ptr); - info->lineno += line_delta; - info->end_lineno = info->lineno + scan_varint(&ptr); + case PY_CODE_LOCATION_INFO_LONG: + computed_line += scan_signed_varint(&ptr); + info->lineno = computed_line; + info->end_lineno = computed_line + scan_varint(&ptr); info->column = scan_varint(&ptr) - 1; info->end_column = scan_varint(&ptr) - 1; break; - } - case PY_CODE_LOCATION_INFO_NO_COLUMNS: { - int line_delta = scan_signed_varint(&ptr); - info->lineno += line_delta; + case PY_CODE_LOCATION_INFO_NO_COLUMNS: + computed_line += scan_signed_varint(&ptr); + info->lineno = info->end_lineno = computed_line; info->column = info->end_column = -1; break; - } case PY_CODE_LOCATION_INFO_ONE_LINE0: case PY_CODE_LOCATION_INFO_ONE_LINE1: - case PY_CODE_LOCATION_INFO_ONE_LINE2: { - int line_delta = code - 10; - info->lineno += line_delta; - info->end_lineno = info->lineno; + case PY_CODE_LOCATION_INFO_ONE_LINE2: + computed_line += code - 10; + info->lineno = info->end_lineno = computed_line; info->column = *(ptr++); info->end_column = *(ptr++); break; - } default: { uint8_t second_byte = *(ptr++); if ((second_byte & 128) != 0) { return false; } + info->lineno = info->end_lineno = computed_line; info->column = code << 3 | (second_byte >> 4); info->end_column = info->column + (second_byte & 15); break; @@ -215,8 +212,25 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L * ============================================================================ */ PyObject * -make_frame_info(RemoteUnwinderObject *unwinder, PyObject *file, PyObject *line, - PyObject *func) +make_location_info(RemoteUnwinderObject *unwinder, int lineno, int end_lineno, + int col_offset, int end_col_offset) +{ + RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); + PyObject *info = PyStructSequence_New(state->LocationInfo_Type); + if (info == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create LocationInfo"); + return NULL; + } + PyStructSequence_SetItem(info, 0, PyLong_FromLong(lineno)); + PyStructSequence_SetItem(info, 1, PyLong_FromLong(end_lineno)); + PyStructSequence_SetItem(info, 2, PyLong_FromLong(col_offset)); + PyStructSequence_SetItem(info, 3, PyLong_FromLong(end_col_offset)); + return info; +} + +PyObject * +make_frame_info(RemoteUnwinderObject *unwinder, PyObject *file, PyObject *location, + PyObject *func, PyObject *opcode) { RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); PyObject *info = PyStructSequence_New(state->FrameInfo_Type); @@ -225,11 +239,13 @@ make_frame_info(RemoteUnwinderObject *unwinder, PyObject *file, PyObject *line, return NULL; } Py_INCREF(file); - Py_INCREF(line); + Py_INCREF(location); Py_INCREF(func); + Py_INCREF(opcode); PyStructSequence_SetItem(info, 0, file); - PyStructSequence_SetItem(info, 1, line); + PyStructSequence_SetItem(info, 1, location); PyStructSequence_SetItem(info, 2, func); + PyStructSequence_SetItem(info, 3, opcode); return info; } @@ -365,16 +381,43 @@ parse_code_object(RemoteUnwinderObject *unwinder, meta->first_lineno, &info); if (!ok) { info.lineno = -1; + info.end_lineno = -1; + info.column = -1; + info.end_column = -1; } - PyObject *lineno = PyLong_FromLong(info.lineno); - if (!lineno) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create line number object"); + // Create the LocationInfo structseq: (lineno, end_lineno, col_offset, end_col_offset) + PyObject *location = make_location_info(unwinder, + info.lineno, + info.end_lineno, + info.column, + info.end_column); + if (!location) { goto error; } - PyObject *tuple = make_frame_info(unwinder, meta->file_name, lineno, meta->func_name); - Py_DECREF(lineno); + // Read the instruction opcode from target process if opcodes flag is set + PyObject *opcode_obj = NULL; + if (unwinder->opcodes) { + uint16_t instruction_word = 0; + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, ip, + sizeof(uint16_t), &instruction_word) == 0) { + opcode_obj = PyLong_FromLong(instruction_word & 0xFF); + if (!opcode_obj) { + Py_DECREF(location); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create opcode object"); + goto error; + } + } else { + // Opcode read failed - clear the exception since opcode is optional + PyErr_Clear(); + } + } + + PyObject *tuple = make_frame_info(unwinder, meta->file_name, location, + meta->func_name, opcode_obj ? opcode_obj : Py_None); + Py_DECREF(location); + Py_XDECREF(opcode_obj); if (!tuple) { goto error; } diff --git a/Modules/_remote_debugging/frames.c b/Modules/_remote_debugging/frames.c index d60caadcb9a..51e9cc93611 100644 --- a/Modules/_remote_debugging/frames.c +++ b/Modules/_remote_debugging/frames.c @@ -310,9 +310,9 @@ process_frame_chain( extra_frame = &_Py_STR(native); } if (extra_frame) { - // Use "~" as file and 0 as line, since that's what pstats uses: + // Use "~" as file, None as location (synthetic frame), None as opcode PyObject *extra_frame_info = make_frame_info( - unwinder, _Py_LATIN1_CHR('~'), _PyLong_GetZero(), extra_frame); + unwinder, _Py_LATIN1_CHR('~'), Py_None, extra_frame, Py_None); if (extra_frame_info == NULL) { return -1; } diff --git a/Modules/_remote_debugging/module.c b/Modules/_remote_debugging/module.c index 252291f9162..221c1731733 100644 --- a/Modules/_remote_debugging/module.c +++ b/Modules/_remote_debugging/module.c @@ -28,11 +28,28 @@ PyStructSequence_Desc TaskInfo_desc = { 4 }; +// LocationInfo structseq type +static PyStructSequence_Field LocationInfo_fields[] = { + {"lineno", "Line number"}, + {"end_lineno", "End line number"}, + {"col_offset", "Column offset"}, + {"end_col_offset", "End column offset"}, + {NULL} +}; + +PyStructSequence_Desc LocationInfo_desc = { + "_remote_debugging.LocationInfo", + "Source location information: (lineno, end_lineno, col_offset, end_col_offset)", + LocationInfo_fields, + 4 +}; + // FrameInfo structseq type static PyStructSequence_Field FrameInfo_fields[] = { {"filename", "Source code filename"}, - {"lineno", "Line number"}, + {"location", "LocationInfo structseq or None for synthetic frames"}, {"funcname", "Function name"}, + {"opcode", "Opcode being executed (None if not gathered)"}, {NULL} }; @@ -40,7 +57,7 @@ PyStructSequence_Desc FrameInfo_desc = { "_remote_debugging.FrameInfo", "Information about a frame", FrameInfo_fields, - 3 + 4 }; // CoroInfo structseq type @@ -235,6 +252,7 @@ _remote_debugging.RemoteUnwinder.__init__ skip_non_matching_threads: bool = True native: bool = False gc: bool = False + opcodes: bool = False Initialize a new RemoteUnwinder object for debugging a remote Python process. @@ -253,6 +271,8 @@ Initialize a new RemoteUnwinder object for debugging a remote Python process. non-Python code. gc: If True, include artificial "" frames to denote active garbage collection. + opcodes: If True, gather bytecode opcode information for instruction-level + profiling. The RemoteUnwinder provides functionality to inspect and debug a running Python process, including examining thread states, stack frames and other runtime data. @@ -270,8 +290,9 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, int only_active_thread, int mode, int debug, int skip_non_matching_threads, - int native, int gc) -/*[clinic end generated code: output=e9eb6b4df119f6e0 input=606d099059207df2]*/ + int native, int gc, + int opcodes) +/*[clinic end generated code: output=e7f77865c7dd662f input=3dba9e3da913a1e0]*/ { // Validate that all_threads and only_active_thread are not both True if (all_threads && only_active_thread) { @@ -290,6 +311,7 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, self->native = native; self->gc = gc; + self->opcodes = opcodes; self->debug = debug; self->only_active_thread = only_active_thread; self->mode = mode; @@ -844,6 +866,14 @@ _remote_debugging_exec(PyObject *m) return -1; } + st->LocationInfo_Type = PyStructSequence_NewType(&LocationInfo_desc); + if (st->LocationInfo_Type == NULL) { + return -1; + } + if (PyModule_AddType(m, st->LocationInfo_Type) < 0) { + return -1; + } + st->FrameInfo_Type = PyStructSequence_NewType(&FrameInfo_desc); if (st->FrameInfo_Type == NULL) { return -1; @@ -917,6 +947,7 @@ remote_debugging_traverse(PyObject *mod, visitproc visit, void *arg) RemoteDebuggingState *state = RemoteDebugging_GetState(mod); Py_VISIT(state->RemoteDebugging_Type); Py_VISIT(state->TaskInfo_Type); + Py_VISIT(state->LocationInfo_Type); Py_VISIT(state->FrameInfo_Type); Py_VISIT(state->CoroInfo_Type); Py_VISIT(state->ThreadInfo_Type); @@ -931,6 +962,7 @@ remote_debugging_clear(PyObject *mod) RemoteDebuggingState *state = RemoteDebugging_GetState(mod); Py_CLEAR(state->RemoteDebugging_Type); Py_CLEAR(state->TaskInfo_Type); + Py_CLEAR(state->LocationInfo_Type); Py_CLEAR(state->FrameInfo_Type); Py_CLEAR(state->CoroInfo_Type); Py_CLEAR(state->ThreadInfo_Type);