diff --git a/Modules/_remote_debugging/_remote_debugging.h b/Modules/_remote_debugging/_remote_debugging.h index c4547baf967..6726576d04f 100644 --- a/Modules/_remote_debugging/_remote_debugging.h +++ b/Modules/_remote_debugging/_remote_debugging.h @@ -157,6 +157,7 @@ typedef struct { typedef struct { PyTypeObject *RemoteDebugging_Type; PyTypeObject *TaskInfo_Type; + PyTypeObject *LocationInfo_Type; PyTypeObject *FrameInfo_Type; PyTypeObject *CoroInfo_Type; PyTypeObject *ThreadInfo_Type; @@ -195,6 +196,7 @@ typedef struct { int skip_non_matching_threads; int native; int gc; + int opcodes; RemoteDebuggingState *cached_state; #ifdef Py_GIL_DISABLED uint32_t tlbc_generation; @@ -248,6 +250,7 @@ typedef int (*set_entry_processor_func)( * ============================================================================ */ extern PyStructSequence_Desc TaskInfo_desc; +extern PyStructSequence_Desc LocationInfo_desc; extern PyStructSequence_Desc FrameInfo_desc; extern PyStructSequence_Desc CoroInfo_desc; extern PyStructSequence_Desc ThreadInfo_desc; @@ -298,11 +301,20 @@ extern int parse_code_object( int32_t tlbc_index ); +extern PyObject *make_location_info( + RemoteUnwinderObject *unwinder, + int lineno, + int end_lineno, + int col_offset, + int end_col_offset +); + extern PyObject *make_frame_info( RemoteUnwinderObject *unwinder, PyObject *file, - PyObject *line, - PyObject *func + PyObject *location, // LocationInfo structseq or None for synthetic frames + PyObject *func, + PyObject *opcode ); /* Line table parsing */ diff --git a/Modules/_remote_debugging/clinic/module.c.h b/Modules/_remote_debugging/clinic/module.c.h index 60adb357e32..d781f7f49be 100644 --- a/Modules/_remote_debugging/clinic/module.c.h +++ b/Modules/_remote_debugging/clinic/module.c.h @@ -12,7 +12,7 @@ preserve PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__, "RemoteUnwinder(pid, *, all_threads=False, only_active_thread=False,\n" " mode=0, debug=False, skip_non_matching_threads=True,\n" -" native=False, gc=False)\n" +" native=False, gc=False, opcodes=False)\n" "--\n" "\n" "Initialize a new RemoteUnwinder object for debugging a remote Python process.\n" @@ -32,6 +32,8 @@ PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__, " non-Python code.\n" " gc: If True, include artificial \"\" frames to denote active garbage\n" " collection.\n" +" opcodes: If True, gather bytecode opcode information for instruction-level\n" +" profiling.\n" "\n" "The RemoteUnwinder provides functionality to inspect and debug a running Python\n" "process, including examining thread states, stack frames and other runtime data.\n" @@ -48,7 +50,8 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, int only_active_thread, int mode, int debug, int skip_non_matching_threads, - int native, int gc); + int native, int gc, + int opcodes); static int _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObject *kwargs) @@ -56,7 +59,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje int return_value = -1; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 8 + #define NUM_KEYWORDS 9 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -65,7 +68,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), &_Py_ID(only_active_thread), &_Py_ID(mode), &_Py_ID(debug), &_Py_ID(skip_non_matching_threads), &_Py_ID(native), &_Py_ID(gc), }, + .ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), &_Py_ID(only_active_thread), &_Py_ID(mode), &_Py_ID(debug), &_Py_ID(skip_non_matching_threads), &_Py_ID(native), &_Py_ID(gc), &_Py_ID(opcodes), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -74,14 +77,14 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"pid", "all_threads", "only_active_thread", "mode", "debug", "skip_non_matching_threads", "native", "gc", NULL}; + static const char * const _keywords[] = {"pid", "all_threads", "only_active_thread", "mode", "debug", "skip_non_matching_threads", "native", "gc", "opcodes", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "RemoteUnwinder", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[8]; + PyObject *argsbuf[9]; PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1; @@ -93,6 +96,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje int skip_non_matching_threads = 1; int native = 0; int gc = 0; + int opcodes = 0; fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -160,12 +164,21 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje goto skip_optional_kwonly; } } - gc = PyObject_IsTrue(fastargs[7]); - if (gc < 0) { + if (fastargs[7]) { + gc = PyObject_IsTrue(fastargs[7]); + if (gc < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + opcodes = PyObject_IsTrue(fastargs[8]); + if (opcodes < 0) { goto exit; } skip_optional_kwonly: - return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads, only_active_thread, mode, debug, skip_non_matching_threads, native, gc); + return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads, only_active_thread, mode, debug, skip_non_matching_threads, native, gc, opcodes); exit: return return_value; @@ -347,4 +360,4 @@ _remote_debugging_RemoteUnwinder_get_async_stack_trace(PyObject *self, PyObject return return_value; } -/*[clinic end generated code: output=99fed5c94cf36881 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=946a0838197bf141 input=a9049054013a1b77]*/ diff --git a/Modules/_remote_debugging/code_objects.c b/Modules/_remote_debugging/code_objects.c index ea3f00c802b..255a4f374f6 100644 --- a/Modules/_remote_debugging/code_objects.c +++ b/Modules/_remote_debugging/code_objects.c @@ -155,48 +155,45 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L { const uint8_t* ptr = (const uint8_t*)(linetable); uintptr_t addr = 0; - info->lineno = firstlineno; + int computed_line = firstlineno; // Running accumulator, separate from output while (*ptr != '\0') { - // See InternalDocs/code_objects.md for where these magic numbers are from - // and for the decoding algorithm. uint8_t first_byte = *(ptr++); uint8_t code = (first_byte >> 3) & 15; size_t length = (first_byte & 7) + 1; uintptr_t end_addr = addr + length; + switch (code) { - case PY_CODE_LOCATION_INFO_NONE: { + case PY_CODE_LOCATION_INFO_NONE: + info->lineno = info->end_lineno = -1; + info->column = info->end_column = -1; break; - } - case PY_CODE_LOCATION_INFO_LONG: { - int line_delta = scan_signed_varint(&ptr); - info->lineno += line_delta; - info->end_lineno = info->lineno + scan_varint(&ptr); + case PY_CODE_LOCATION_INFO_LONG: + computed_line += scan_signed_varint(&ptr); + info->lineno = computed_line; + info->end_lineno = computed_line + scan_varint(&ptr); info->column = scan_varint(&ptr) - 1; info->end_column = scan_varint(&ptr) - 1; break; - } - case PY_CODE_LOCATION_INFO_NO_COLUMNS: { - int line_delta = scan_signed_varint(&ptr); - info->lineno += line_delta; + case PY_CODE_LOCATION_INFO_NO_COLUMNS: + computed_line += scan_signed_varint(&ptr); + info->lineno = info->end_lineno = computed_line; info->column = info->end_column = -1; break; - } case PY_CODE_LOCATION_INFO_ONE_LINE0: case PY_CODE_LOCATION_INFO_ONE_LINE1: - case PY_CODE_LOCATION_INFO_ONE_LINE2: { - int line_delta = code - 10; - info->lineno += line_delta; - info->end_lineno = info->lineno; + case PY_CODE_LOCATION_INFO_ONE_LINE2: + computed_line += code - 10; + info->lineno = info->end_lineno = computed_line; info->column = *(ptr++); info->end_column = *(ptr++); break; - } default: { uint8_t second_byte = *(ptr++); if ((second_byte & 128) != 0) { return false; } + info->lineno = info->end_lineno = computed_line; info->column = code << 3 | (second_byte >> 4); info->end_column = info->column + (second_byte & 15); break; @@ -215,8 +212,25 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L * ============================================================================ */ PyObject * -make_frame_info(RemoteUnwinderObject *unwinder, PyObject *file, PyObject *line, - PyObject *func) +make_location_info(RemoteUnwinderObject *unwinder, int lineno, int end_lineno, + int col_offset, int end_col_offset) +{ + RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); + PyObject *info = PyStructSequence_New(state->LocationInfo_Type); + if (info == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create LocationInfo"); + return NULL; + } + PyStructSequence_SetItem(info, 0, PyLong_FromLong(lineno)); + PyStructSequence_SetItem(info, 1, PyLong_FromLong(end_lineno)); + PyStructSequence_SetItem(info, 2, PyLong_FromLong(col_offset)); + PyStructSequence_SetItem(info, 3, PyLong_FromLong(end_col_offset)); + return info; +} + +PyObject * +make_frame_info(RemoteUnwinderObject *unwinder, PyObject *file, PyObject *location, + PyObject *func, PyObject *opcode) { RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); PyObject *info = PyStructSequence_New(state->FrameInfo_Type); @@ -225,11 +239,13 @@ make_frame_info(RemoteUnwinderObject *unwinder, PyObject *file, PyObject *line, return NULL; } Py_INCREF(file); - Py_INCREF(line); + Py_INCREF(location); Py_INCREF(func); + Py_INCREF(opcode); PyStructSequence_SetItem(info, 0, file); - PyStructSequence_SetItem(info, 1, line); + PyStructSequence_SetItem(info, 1, location); PyStructSequence_SetItem(info, 2, func); + PyStructSequence_SetItem(info, 3, opcode); return info; } @@ -365,16 +381,43 @@ parse_code_object(RemoteUnwinderObject *unwinder, meta->first_lineno, &info); if (!ok) { info.lineno = -1; + info.end_lineno = -1; + info.column = -1; + info.end_column = -1; } - PyObject *lineno = PyLong_FromLong(info.lineno); - if (!lineno) { - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create line number object"); + // Create the LocationInfo structseq: (lineno, end_lineno, col_offset, end_col_offset) + PyObject *location = make_location_info(unwinder, + info.lineno, + info.end_lineno, + info.column, + info.end_column); + if (!location) { goto error; } - PyObject *tuple = make_frame_info(unwinder, meta->file_name, lineno, meta->func_name); - Py_DECREF(lineno); + // Read the instruction opcode from target process if opcodes flag is set + PyObject *opcode_obj = NULL; + if (unwinder->opcodes) { + uint16_t instruction_word = 0; + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, ip, + sizeof(uint16_t), &instruction_word) == 0) { + opcode_obj = PyLong_FromLong(instruction_word & 0xFF); + if (!opcode_obj) { + Py_DECREF(location); + set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create opcode object"); + goto error; + } + } else { + // Opcode read failed - clear the exception since opcode is optional + PyErr_Clear(); + } + } + + PyObject *tuple = make_frame_info(unwinder, meta->file_name, location, + meta->func_name, opcode_obj ? opcode_obj : Py_None); + Py_DECREF(location); + Py_XDECREF(opcode_obj); if (!tuple) { goto error; } diff --git a/Modules/_remote_debugging/frames.c b/Modules/_remote_debugging/frames.c index d60caadcb9a..51e9cc93611 100644 --- a/Modules/_remote_debugging/frames.c +++ b/Modules/_remote_debugging/frames.c @@ -310,9 +310,9 @@ process_frame_chain( extra_frame = &_Py_STR(native); } if (extra_frame) { - // Use "~" as file and 0 as line, since that's what pstats uses: + // Use "~" as file, None as location (synthetic frame), None as opcode PyObject *extra_frame_info = make_frame_info( - unwinder, _Py_LATIN1_CHR('~'), _PyLong_GetZero(), extra_frame); + unwinder, _Py_LATIN1_CHR('~'), Py_None, extra_frame, Py_None); if (extra_frame_info == NULL) { return -1; } diff --git a/Modules/_remote_debugging/module.c b/Modules/_remote_debugging/module.c index 252291f9162..221c1731733 100644 --- a/Modules/_remote_debugging/module.c +++ b/Modules/_remote_debugging/module.c @@ -28,11 +28,28 @@ PyStructSequence_Desc TaskInfo_desc = { 4 }; +// LocationInfo structseq type +static PyStructSequence_Field LocationInfo_fields[] = { + {"lineno", "Line number"}, + {"end_lineno", "End line number"}, + {"col_offset", "Column offset"}, + {"end_col_offset", "End column offset"}, + {NULL} +}; + +PyStructSequence_Desc LocationInfo_desc = { + "_remote_debugging.LocationInfo", + "Source location information: (lineno, end_lineno, col_offset, end_col_offset)", + LocationInfo_fields, + 4 +}; + // FrameInfo structseq type static PyStructSequence_Field FrameInfo_fields[] = { {"filename", "Source code filename"}, - {"lineno", "Line number"}, + {"location", "LocationInfo structseq or None for synthetic frames"}, {"funcname", "Function name"}, + {"opcode", "Opcode being executed (None if not gathered)"}, {NULL} }; @@ -40,7 +57,7 @@ PyStructSequence_Desc FrameInfo_desc = { "_remote_debugging.FrameInfo", "Information about a frame", FrameInfo_fields, - 3 + 4 }; // CoroInfo structseq type @@ -235,6 +252,7 @@ _remote_debugging.RemoteUnwinder.__init__ skip_non_matching_threads: bool = True native: bool = False gc: bool = False + opcodes: bool = False Initialize a new RemoteUnwinder object for debugging a remote Python process. @@ -253,6 +271,8 @@ Initialize a new RemoteUnwinder object for debugging a remote Python process. non-Python code. gc: If True, include artificial "" frames to denote active garbage collection. + opcodes: If True, gather bytecode opcode information for instruction-level + profiling. The RemoteUnwinder provides functionality to inspect and debug a running Python process, including examining thread states, stack frames and other runtime data. @@ -270,8 +290,9 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, int only_active_thread, int mode, int debug, int skip_non_matching_threads, - int native, int gc) -/*[clinic end generated code: output=e9eb6b4df119f6e0 input=606d099059207df2]*/ + int native, int gc, + int opcodes) +/*[clinic end generated code: output=e7f77865c7dd662f input=3dba9e3da913a1e0]*/ { // Validate that all_threads and only_active_thread are not both True if (all_threads && only_active_thread) { @@ -290,6 +311,7 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, self->native = native; self->gc = gc; + self->opcodes = opcodes; self->debug = debug; self->only_active_thread = only_active_thread; self->mode = mode; @@ -844,6 +866,14 @@ _remote_debugging_exec(PyObject *m) return -1; } + st->LocationInfo_Type = PyStructSequence_NewType(&LocationInfo_desc); + if (st->LocationInfo_Type == NULL) { + return -1; + } + if (PyModule_AddType(m, st->LocationInfo_Type) < 0) { + return -1; + } + st->FrameInfo_Type = PyStructSequence_NewType(&FrameInfo_desc); if (st->FrameInfo_Type == NULL) { return -1; @@ -917,6 +947,7 @@ remote_debugging_traverse(PyObject *mod, visitproc visit, void *arg) RemoteDebuggingState *state = RemoteDebugging_GetState(mod); Py_VISIT(state->RemoteDebugging_Type); Py_VISIT(state->TaskInfo_Type); + Py_VISIT(state->LocationInfo_Type); Py_VISIT(state->FrameInfo_Type); Py_VISIT(state->CoroInfo_Type); Py_VISIT(state->ThreadInfo_Type); @@ -931,6 +962,7 @@ remote_debugging_clear(PyObject *mod) RemoteDebuggingState *state = RemoteDebugging_GetState(mod); Py_CLEAR(state->RemoteDebugging_Type); Py_CLEAR(state->TaskInfo_Type); + Py_CLEAR(state->LocationInfo_Type); Py_CLEAR(state->FrameInfo_Type); Py_CLEAR(state->CoroInfo_Type); Py_CLEAR(state->ThreadInfo_Type);