gh-138122: Make the tachyon profiler opcode-aware (#142394)

This commit is contained in:
Pablo Galindo Salgado 2025-12-11 03:41:47 +00:00 committed by GitHub
parent fa448451ab
commit 5b19c75b47
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
36 changed files with 3983 additions and 507 deletions

View file

@ -190,6 +190,7 @@ typedef struct {
typedef struct {
PyTypeObject *RemoteDebugging_Type;
PyTypeObject *TaskInfo_Type;
PyTypeObject *LocationInfo_Type;
PyTypeObject *FrameInfo_Type;
PyTypeObject *CoroInfo_Type;
PyTypeObject *ThreadInfo_Type;
@ -228,6 +229,7 @@ typedef struct {
int skip_non_matching_threads;
int native;
int gc;
int opcodes;
int cache_frames;
int collect_stats; // whether to collect statistics
uint32_t stale_invalidation_counter; // counter for throttling frame_cache_invalidate_stale
@ -286,6 +288,7 @@ typedef int (*set_entry_processor_func)(
* ============================================================================ */
extern PyStructSequence_Desc TaskInfo_desc;
extern PyStructSequence_Desc LocationInfo_desc;
extern PyStructSequence_Desc FrameInfo_desc;
extern PyStructSequence_Desc CoroInfo_desc;
extern PyStructSequence_Desc ThreadInfo_desc;
@ -336,11 +339,20 @@ extern int parse_code_object(
int32_t tlbc_index
);
extern PyObject *make_location_info(
RemoteUnwinderObject *unwinder,
int lineno,
int end_lineno,
int col_offset,
int end_col_offset
);
extern PyObject *make_frame_info(
RemoteUnwinderObject *unwinder,
PyObject *file,
PyObject *line,
PyObject *func
PyObject *location, // LocationInfo structseq or None for synthetic frames
PyObject *func,
PyObject *opcode
);
/* Line table parsing */

View file

@ -12,7 +12,8 @@ preserve
PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__,
"RemoteUnwinder(pid, *, all_threads=False, only_active_thread=False,\n"
" mode=0, debug=False, skip_non_matching_threads=True,\n"
" native=False, gc=False, cache_frames=False, stats=False)\n"
" native=False, gc=False, opcodes=False,\n"
" cache_frames=False, stats=False)\n"
"--\n"
"\n"
"Initialize a new RemoteUnwinder object for debugging a remote Python process.\n"
@ -32,6 +33,8 @@ PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__,
" non-Python code.\n"
" gc: If True, include artificial \"<GC>\" frames to denote active garbage\n"
" collection.\n"
" opcodes: If True, gather bytecode opcode information for instruction-level\n"
" profiling.\n"
" cache_frames: If True, enable frame caching optimization to avoid re-reading\n"
" unchanged parent frames between samples.\n"
" stats: If True, collect statistics about cache hits, memory reads, etc.\n"
@ -53,7 +56,8 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
int mode, int debug,
int skip_non_matching_threads,
int native, int gc,
int cache_frames, int stats);
int opcodes, int cache_frames,
int stats);
static int
_remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObject *kwargs)
@ -61,7 +65,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje
int return_value = -1;
#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
#define NUM_KEYWORDS 10
#define NUM_KEYWORDS 11
static struct {
PyGC_Head _this_is_not_used;
PyObject_VAR_HEAD
@ -70,7 +74,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje
} _kwtuple = {
.ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
.ob_hash = -1,
.ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), &_Py_ID(only_active_thread), &_Py_ID(mode), &_Py_ID(debug), &_Py_ID(skip_non_matching_threads), &_Py_ID(native), &_Py_ID(gc), &_Py_ID(cache_frames), &_Py_ID(stats), },
.ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), &_Py_ID(only_active_thread), &_Py_ID(mode), &_Py_ID(debug), &_Py_ID(skip_non_matching_threads), &_Py_ID(native), &_Py_ID(gc), &_Py_ID(opcodes), &_Py_ID(cache_frames), &_Py_ID(stats), },
};
#undef NUM_KEYWORDS
#define KWTUPLE (&_kwtuple.ob_base.ob_base)
@ -79,14 +83,14 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje
# define KWTUPLE NULL
#endif // !Py_BUILD_CORE
static const char * const _keywords[] = {"pid", "all_threads", "only_active_thread", "mode", "debug", "skip_non_matching_threads", "native", "gc", "cache_frames", "stats", NULL};
static const char * const _keywords[] = {"pid", "all_threads", "only_active_thread", "mode", "debug", "skip_non_matching_threads", "native", "gc", "opcodes", "cache_frames", "stats", NULL};
static _PyArg_Parser _parser = {
.keywords = _keywords,
.fname = "RemoteUnwinder",
.kwtuple = KWTUPLE,
};
#undef KWTUPLE
PyObject *argsbuf[10];
PyObject *argsbuf[11];
PyObject * const *fastargs;
Py_ssize_t nargs = PyTuple_GET_SIZE(args);
Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1;
@ -98,6 +102,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje
int skip_non_matching_threads = 1;
int native = 0;
int gc = 0;
int opcodes = 0;
int cache_frames = 0;
int stats = 0;
@ -177,7 +182,16 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje
}
}
if (fastargs[8]) {
cache_frames = PyObject_IsTrue(fastargs[8]);
opcodes = PyObject_IsTrue(fastargs[8]);
if (opcodes < 0) {
goto exit;
}
if (!--noptargs) {
goto skip_optional_kwonly;
}
}
if (fastargs[9]) {
cache_frames = PyObject_IsTrue(fastargs[9]);
if (cache_frames < 0) {
goto exit;
}
@ -185,12 +199,12 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje
goto skip_optional_kwonly;
}
}
stats = PyObject_IsTrue(fastargs[9]);
stats = PyObject_IsTrue(fastargs[10]);
if (stats < 0) {
goto exit;
}
skip_optional_kwonly:
return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads, only_active_thread, mode, debug, skip_non_matching_threads, native, gc, cache_frames, stats);
return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads, only_active_thread, mode, debug, skip_non_matching_threads, native, gc, opcodes, cache_frames, stats);
exit:
return return_value;
@ -419,4 +433,4 @@ _remote_debugging_RemoteUnwinder_get_stats(PyObject *self, PyObject *Py_UNUSED(i
return return_value;
}
/*[clinic end generated code: output=f1fd6c1d4c4c7254 input=a9049054013a1b77]*/
/*[clinic end generated code: output=1943fb7a56197e39 input=a9049054013a1b77]*/

View file

@ -155,48 +155,45 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L
{
const uint8_t* ptr = (const uint8_t*)(linetable);
uintptr_t addr = 0;
info->lineno = firstlineno;
int computed_line = firstlineno; // Running accumulator, separate from output
while (*ptr != '\0') {
// See InternalDocs/code_objects.md for where these magic numbers are from
// and for the decoding algorithm.
uint8_t first_byte = *(ptr++);
uint8_t code = (first_byte >> 3) & 15;
size_t length = (first_byte & 7) + 1;
uintptr_t end_addr = addr + length;
switch (code) {
case PY_CODE_LOCATION_INFO_NONE: {
case PY_CODE_LOCATION_INFO_NONE:
info->lineno = info->end_lineno = -1;
info->column = info->end_column = -1;
break;
}
case PY_CODE_LOCATION_INFO_LONG: {
int line_delta = scan_signed_varint(&ptr);
info->lineno += line_delta;
info->end_lineno = info->lineno + scan_varint(&ptr);
case PY_CODE_LOCATION_INFO_LONG:
computed_line += scan_signed_varint(&ptr);
info->lineno = computed_line;
info->end_lineno = computed_line + scan_varint(&ptr);
info->column = scan_varint(&ptr) - 1;
info->end_column = scan_varint(&ptr) - 1;
break;
}
case PY_CODE_LOCATION_INFO_NO_COLUMNS: {
int line_delta = scan_signed_varint(&ptr);
info->lineno += line_delta;
case PY_CODE_LOCATION_INFO_NO_COLUMNS:
computed_line += scan_signed_varint(&ptr);
info->lineno = info->end_lineno = computed_line;
info->column = info->end_column = -1;
break;
}
case PY_CODE_LOCATION_INFO_ONE_LINE0:
case PY_CODE_LOCATION_INFO_ONE_LINE1:
case PY_CODE_LOCATION_INFO_ONE_LINE2: {
int line_delta = code - 10;
info->lineno += line_delta;
info->end_lineno = info->lineno;
case PY_CODE_LOCATION_INFO_ONE_LINE2:
computed_line += code - 10;
info->lineno = info->end_lineno = computed_line;
info->column = *(ptr++);
info->end_column = *(ptr++);
break;
}
default: {
uint8_t second_byte = *(ptr++);
if ((second_byte & 128) != 0) {
return false;
}
info->lineno = info->end_lineno = computed_line;
info->column = code << 3 | (second_byte >> 4);
info->end_column = info->column + (second_byte & 15);
break;
@ -215,8 +212,50 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L
* ============================================================================ */
PyObject *
make_frame_info(RemoteUnwinderObject *unwinder, PyObject *file, PyObject *line,
PyObject *func)
make_location_info(RemoteUnwinderObject *unwinder, int lineno, int end_lineno,
int col_offset, int end_col_offset)
{
RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder);
PyObject *info = PyStructSequence_New(state->LocationInfo_Type);
if (info == NULL) {
set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create LocationInfo");
return NULL;
}
PyObject *py_lineno = PyLong_FromLong(lineno);
if (py_lineno == NULL) {
Py_DECREF(info);
return NULL;
}
PyStructSequence_SetItem(info, 0, py_lineno); // steals reference
PyObject *py_end_lineno = PyLong_FromLong(end_lineno);
if (py_end_lineno == NULL) {
Py_DECREF(info);
return NULL;
}
PyStructSequence_SetItem(info, 1, py_end_lineno); // steals reference
PyObject *py_col_offset = PyLong_FromLong(col_offset);
if (py_col_offset == NULL) {
Py_DECREF(info);
return NULL;
}
PyStructSequence_SetItem(info, 2, py_col_offset); // steals reference
PyObject *py_end_col_offset = PyLong_FromLong(end_col_offset);
if (py_end_col_offset == NULL) {
Py_DECREF(info);
return NULL;
}
PyStructSequence_SetItem(info, 3, py_end_col_offset); // steals reference
return info;
}
PyObject *
make_frame_info(RemoteUnwinderObject *unwinder, PyObject *file, PyObject *location,
PyObject *func, PyObject *opcode)
{
RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder);
PyObject *info = PyStructSequence_New(state->FrameInfo_Type);
@ -225,11 +264,13 @@ make_frame_info(RemoteUnwinderObject *unwinder, PyObject *file, PyObject *line,
return NULL;
}
Py_INCREF(file);
Py_INCREF(line);
Py_INCREF(location);
Py_INCREF(func);
Py_INCREF(opcode);
PyStructSequence_SetItem(info, 0, file);
PyStructSequence_SetItem(info, 1, line);
PyStructSequence_SetItem(info, 1, location);
PyStructSequence_SetItem(info, 2, func);
PyStructSequence_SetItem(info, 3, opcode);
return info;
}
@ -370,16 +411,43 @@ parse_code_object(RemoteUnwinderObject *unwinder,
meta->first_lineno, &info);
if (!ok) {
info.lineno = -1;
info.end_lineno = -1;
info.column = -1;
info.end_column = -1;
}
PyObject *lineno = PyLong_FromLong(info.lineno);
if (!lineno) {
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create line number object");
// Create the LocationInfo structseq: (lineno, end_lineno, col_offset, end_col_offset)
PyObject *location = make_location_info(unwinder,
info.lineno,
info.end_lineno,
info.column,
info.end_column);
if (!location) {
goto error;
}
PyObject *tuple = make_frame_info(unwinder, meta->file_name, lineno, meta->func_name);
Py_DECREF(lineno);
// Read the instruction opcode from target process if opcodes flag is set
PyObject *opcode_obj = NULL;
if (unwinder->opcodes) {
uint16_t instruction_word = 0;
if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, ip,
sizeof(uint16_t), &instruction_word) == 0) {
opcode_obj = PyLong_FromLong(instruction_word & 0xFF);
if (!opcode_obj) {
Py_DECREF(location);
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create opcode object");
goto error;
}
} else {
// Opcode read failed - clear the exception since opcode is optional
PyErr_Clear();
}
}
PyObject *tuple = make_frame_info(unwinder, meta->file_name, location,
meta->func_name, opcode_obj ? opcode_obj : Py_None);
Py_DECREF(location);
Py_XDECREF(opcode_obj);
if (!tuple) {
goto error;
}

View file

@ -337,8 +337,9 @@ process_frame_chain(
extra_frame = &_Py_STR(native);
}
if (extra_frame) {
// Use "~" as file, None as location (synthetic frame), None as opcode
PyObject *extra_frame_info = make_frame_info(
unwinder, _Py_LATIN1_CHR('~'), _PyLong_GetZero(), extra_frame);
unwinder, _Py_LATIN1_CHR('~'), Py_None, extra_frame, Py_None);
if (extra_frame_info == NULL) {
return -1;
}

View file

@ -28,11 +28,28 @@ PyStructSequence_Desc TaskInfo_desc = {
4
};
// LocationInfo structseq type
static PyStructSequence_Field LocationInfo_fields[] = {
{"lineno", "Line number"},
{"end_lineno", "End line number"},
{"col_offset", "Column offset"},
{"end_col_offset", "End column offset"},
{NULL}
};
PyStructSequence_Desc LocationInfo_desc = {
"_remote_debugging.LocationInfo",
"Source location information: (lineno, end_lineno, col_offset, end_col_offset)",
LocationInfo_fields,
4
};
// FrameInfo structseq type
static PyStructSequence_Field FrameInfo_fields[] = {
{"filename", "Source code filename"},
{"lineno", "Line number"},
{"location", "LocationInfo structseq or None for synthetic frames"},
{"funcname", "Function name"},
{"opcode", "Opcode being executed (None if not gathered)"},
{NULL}
};
@ -40,7 +57,7 @@ PyStructSequence_Desc FrameInfo_desc = {
"_remote_debugging.FrameInfo",
"Information about a frame",
FrameInfo_fields,
3
4
};
// CoroInfo structseq type
@ -235,6 +252,7 @@ _remote_debugging.RemoteUnwinder.__init__
skip_non_matching_threads: bool = True
native: bool = False
gc: bool = False
opcodes: bool = False
cache_frames: bool = False
stats: bool = False
@ -255,6 +273,8 @@ Initialize a new RemoteUnwinder object for debugging a remote Python process.
non-Python code.
gc: If True, include artificial "<GC>" frames to denote active garbage
collection.
opcodes: If True, gather bytecode opcode information for instruction-level
profiling.
cache_frames: If True, enable frame caching optimization to avoid re-reading
unchanged parent frames between samples.
stats: If True, collect statistics about cache hits, memory reads, etc.
@ -277,8 +297,9 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
int mode, int debug,
int skip_non_matching_threads,
int native, int gc,
int cache_frames, int stats)
/*[clinic end generated code: output=b34ef8cce013c975 input=df2221ef114c3d6a]*/
int opcodes, int cache_frames,
int stats)
/*[clinic end generated code: output=0031f743f4b9ad52 input=8fb61b24102dec6e]*/
{
// Validate that all_threads and only_active_thread are not both True
if (all_threads && only_active_thread) {
@ -297,6 +318,7 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
self->native = native;
self->gc = gc;
self->opcodes = opcodes;
self->cache_frames = cache_frames;
self->collect_stats = stats;
self->stale_invalidation_counter = 0;
@ -978,6 +1000,14 @@ _remote_debugging_exec(PyObject *m)
return -1;
}
st->LocationInfo_Type = PyStructSequence_NewType(&LocationInfo_desc);
if (st->LocationInfo_Type == NULL) {
return -1;
}
if (PyModule_AddType(m, st->LocationInfo_Type) < 0) {
return -1;
}
st->FrameInfo_Type = PyStructSequence_NewType(&FrameInfo_desc);
if (st->FrameInfo_Type == NULL) {
return -1;
@ -1051,6 +1081,7 @@ remote_debugging_traverse(PyObject *mod, visitproc visit, void *arg)
RemoteDebuggingState *state = RemoteDebugging_GetState(mod);
Py_VISIT(state->RemoteDebugging_Type);
Py_VISIT(state->TaskInfo_Type);
Py_VISIT(state->LocationInfo_Type);
Py_VISIT(state->FrameInfo_Type);
Py_VISIT(state->CoroInfo_Type);
Py_VISIT(state->ThreadInfo_Type);
@ -1065,6 +1096,7 @@ remote_debugging_clear(PyObject *mod)
RemoteDebuggingState *state = RemoteDebugging_GetState(mod);
Py_CLEAR(state->RemoteDebugging_Type);
Py_CLEAR(state->TaskInfo_Type);
Py_CLEAR(state->LocationInfo_Type);
Py_CLEAR(state->FrameInfo_Type);
Py_CLEAR(state->CoroInfo_Type);
Py_CLEAR(state->ThreadInfo_Type);