GH-140643: Add <native> and <GC> frames to the sampling profiler (#141108)

- Introduce a new field in the GC state to store the frame that initiated garbage collection.
- Update RemoteUnwinder to include options for including "<native>" and "<GC>" frames in the stack trace.
- Modify the sampling profiler to accept parameters for controlling the inclusion of native and GC frames.
- Enhance the stack collector to properly format and append these frames during profiling.
- Add tests to verify the correct behavior of the profiler with respect to native and GC frames, including options to exclude them.

Co-authored-by: Pablo Galindo Salgado <pablogsal@gmail.com>
This commit is contained in:
Brandt Bucher 2025-11-17 05:39:00 -08:00 committed by GitHub
parent 89a914c58d
commit 336366fd7c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 465 additions and 86 deletions

View file

@ -265,6 +265,14 @@ Profile with real-time sampling statistics::
Sample all threads in the process instead of just the main thread Sample all threads in the process instead of just the main thread
.. option:: --native
Include artificial ``<native>`` frames to denote calls to non-Python code.
.. option:: --no-gc
Don't include artificial ``<GC>`` frames to denote active garbage collection.
.. option:: --realtime-stats .. option:: --realtime-stats
Print real-time sampling statistics during profiling Print real-time sampling statistics during profiling
@ -349,7 +357,7 @@ This section documents the programmatic interface for the :mod:`!profiling.sampl
For command-line usage, see :ref:`sampling-profiler-cli`. For conceptual information For command-line usage, see :ref:`sampling-profiler-cli`. For conceptual information
about statistical profiling, see :ref:`statistical-profiling` about statistical profiling, see :ref:`statistical-profiling`
.. function:: sample(pid, *, sort=2, sample_interval_usec=100, duration_sec=10, filename=None, all_threads=False, limit=None, show_summary=True, output_format="pstats", realtime_stats=False) .. function:: sample(pid, *, sort=2, sample_interval_usec=100, duration_sec=10, filename=None, all_threads=False, limit=None, show_summary=True, output_format="pstats", realtime_stats=False, native=False, gc=True)
Sample a Python process and generate profiling data. Sample a Python process and generate profiling data.
@ -367,6 +375,8 @@ about statistical profiling, see :ref:`statistical-profiling`
:param bool show_summary: Whether to show summary statistics (default: True) :param bool show_summary: Whether to show summary statistics (default: True)
:param str output_format: Output format - 'pstats' or 'collapsed' (default: 'pstats') :param str output_format: Output format - 'pstats' or 'collapsed' (default: 'pstats')
:param bool realtime_stats: Whether to display real-time statistics (default: False) :param bool realtime_stats: Whether to display real-time statistics (default: False)
:param bool native: Whether to include ``<native>`` frames (default: False)
:param bool gc: Whether to include ``<GC>`` frames (default: True)
:raises ValueError: If output_format is not 'pstats' or 'collapsed' :raises ValueError: If output_format is not 'pstats' or 'collapsed'

View file

@ -212,6 +212,7 @@ typedef struct _Py_DebugOffsets {
struct _gc { struct _gc {
uint64_t size; uint64_t size;
uint64_t collecting; uint64_t collecting;
uint64_t frame;
} gc; } gc;
// Generator object offset; // Generator object offset;
@ -355,6 +356,7 @@ typedef struct _Py_DebugOffsets {
.gc = { \ .gc = { \
.size = sizeof(struct _gc_runtime_state), \ .size = sizeof(struct _gc_runtime_state), \
.collecting = offsetof(struct _gc_runtime_state, collecting), \ .collecting = offsetof(struct _gc_runtime_state, collecting), \
.frame = offsetof(struct _gc_runtime_state, frame), \
}, \ }, \
.gen_object = { \ .gen_object = { \
.size = sizeof(PyGenObject), \ .size = sizeof(PyGenObject), \

View file

@ -1326,10 +1326,12 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(dot_locals)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(dot_locals));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(empty)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(empty));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(format)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(format));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(gc));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(generic_base)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(generic_base));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(json_decoder)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(json_decoder));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(kwdefaults)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(kwdefaults));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(list_err)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(list_err));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(native));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(str_replace_inf)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(str_replace_inf));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(type_params)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(type_params));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(utf_8)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(utf_8));
@ -1763,6 +1765,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fullerror)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fullerror));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(func)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(func));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(future)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(future));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(gc));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(generation)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(generation));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(get)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(get));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(get_debug)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(get_debug));
@ -1906,6 +1909,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(name_from)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(name_from));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(namespace_separator)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(namespace_separator));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(namespaces)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(namespaces));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(native));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ndigits)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ndigits));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(nested)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(nested));
_PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(new_file_name)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(new_file_name));

View file

@ -46,10 +46,12 @@ struct _Py_global_strings {
STRUCT_FOR_STR(dot_locals, ".<locals>") STRUCT_FOR_STR(dot_locals, ".<locals>")
STRUCT_FOR_STR(empty, "") STRUCT_FOR_STR(empty, "")
STRUCT_FOR_STR(format, ".format") STRUCT_FOR_STR(format, ".format")
STRUCT_FOR_STR(gc, "<GC>")
STRUCT_FOR_STR(generic_base, ".generic_base") STRUCT_FOR_STR(generic_base, ".generic_base")
STRUCT_FOR_STR(json_decoder, "json.decoder") STRUCT_FOR_STR(json_decoder, "json.decoder")
STRUCT_FOR_STR(kwdefaults, ".kwdefaults") STRUCT_FOR_STR(kwdefaults, ".kwdefaults")
STRUCT_FOR_STR(list_err, "list index out of range") STRUCT_FOR_STR(list_err, "list index out of range")
STRUCT_FOR_STR(native, "<native>")
STRUCT_FOR_STR(str_replace_inf, "1e309") STRUCT_FOR_STR(str_replace_inf, "1e309")
STRUCT_FOR_STR(type_params, ".type_params") STRUCT_FOR_STR(type_params, ".type_params")
STRUCT_FOR_STR(utf_8, "utf-8") STRUCT_FOR_STR(utf_8, "utf-8")
@ -486,6 +488,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(fullerror) STRUCT_FOR_ID(fullerror)
STRUCT_FOR_ID(func) STRUCT_FOR_ID(func)
STRUCT_FOR_ID(future) STRUCT_FOR_ID(future)
STRUCT_FOR_ID(gc)
STRUCT_FOR_ID(generation) STRUCT_FOR_ID(generation)
STRUCT_FOR_ID(get) STRUCT_FOR_ID(get)
STRUCT_FOR_ID(get_debug) STRUCT_FOR_ID(get_debug)
@ -629,6 +632,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(name_from) STRUCT_FOR_ID(name_from)
STRUCT_FOR_ID(namespace_separator) STRUCT_FOR_ID(namespace_separator)
STRUCT_FOR_ID(namespaces) STRUCT_FOR_ID(namespaces)
STRUCT_FOR_ID(native)
STRUCT_FOR_ID(ndigits) STRUCT_FOR_ID(ndigits)
STRUCT_FOR_ID(nested) STRUCT_FOR_ID(nested)
STRUCT_FOR_ID(new_file_name) STRUCT_FOR_ID(new_file_name)

View file

@ -212,6 +212,9 @@ struct _gc_runtime_state {
struct gc_generation_stats generation_stats[NUM_GENERATIONS]; struct gc_generation_stats generation_stats[NUM_GENERATIONS];
/* true if we are currently running the collector */ /* true if we are currently running the collector */
int collecting; int collecting;
// The frame that started the current collection. It might be NULL even when
// collecting (if no Python frame is running):
_PyInterpreterFrame *frame;
/* list of uncollectable objects */ /* list of uncollectable objects */
PyObject *garbage; PyObject *garbage;
/* a list of callbacks to be invoked when collection is performed */ /* a list of callbacks to be invoked when collection is performed */

View file

@ -24,7 +24,6 @@ enum _frameowner {
FRAME_OWNED_BY_GENERATOR = 1, FRAME_OWNED_BY_GENERATOR = 1,
FRAME_OWNED_BY_FRAME_OBJECT = 2, FRAME_OWNED_BY_FRAME_OBJECT = 2,
FRAME_OWNED_BY_INTERPRETER = 3, FRAME_OWNED_BY_INTERPRETER = 3,
FRAME_OWNED_BY_CSTACK = 4,
}; };
struct _PyInterpreterFrame { struct _PyInterpreterFrame {

View file

@ -1321,10 +1321,12 @@ extern "C" {
INIT_STR(dot_locals, ".<locals>"), \ INIT_STR(dot_locals, ".<locals>"), \
INIT_STR(empty, ""), \ INIT_STR(empty, ""), \
INIT_STR(format, ".format"), \ INIT_STR(format, ".format"), \
INIT_STR(gc, "<GC>"), \
INIT_STR(generic_base, ".generic_base"), \ INIT_STR(generic_base, ".generic_base"), \
INIT_STR(json_decoder, "json.decoder"), \ INIT_STR(json_decoder, "json.decoder"), \
INIT_STR(kwdefaults, ".kwdefaults"), \ INIT_STR(kwdefaults, ".kwdefaults"), \
INIT_STR(list_err, "list index out of range"), \ INIT_STR(list_err, "list index out of range"), \
INIT_STR(native, "<native>"), \
INIT_STR(str_replace_inf, "1e309"), \ INIT_STR(str_replace_inf, "1e309"), \
INIT_STR(type_params, ".type_params"), \ INIT_STR(type_params, ".type_params"), \
INIT_STR(utf_8, "utf-8"), \ INIT_STR(utf_8, "utf-8"), \
@ -1761,6 +1763,7 @@ extern "C" {
INIT_ID(fullerror), \ INIT_ID(fullerror), \
INIT_ID(func), \ INIT_ID(func), \
INIT_ID(future), \ INIT_ID(future), \
INIT_ID(gc), \
INIT_ID(generation), \ INIT_ID(generation), \
INIT_ID(get), \ INIT_ID(get), \
INIT_ID(get_debug), \ INIT_ID(get_debug), \
@ -1904,6 +1907,7 @@ extern "C" {
INIT_ID(name_from), \ INIT_ID(name_from), \
INIT_ID(namespace_separator), \ INIT_ID(namespace_separator), \
INIT_ID(namespaces), \ INIT_ID(namespaces), \
INIT_ID(native), \
INIT_ID(ndigits), \ INIT_ID(ndigits), \
INIT_ID(nested), \ INIT_ID(nested), \
INIT_ID(new_file_name), \ INIT_ID(new_file_name), \

View file

@ -1732,6 +1732,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
_PyUnicode_InternStatic(interp, &string); _PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1)); assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1); assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(gc);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(generation); string = &_Py_ID(generation);
_PyUnicode_InternStatic(interp, &string); _PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1)); assert(_PyUnicode_CheckConsistency(string, 1));
@ -2304,6 +2308,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
_PyUnicode_InternStatic(interp, &string); _PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1)); assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1); assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(native);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_ID(ndigits); string = &_Py_ID(ndigits);
_PyUnicode_InternStatic(interp, &string); _PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1)); assert(_PyUnicode_CheckConsistency(string, 1));
@ -3236,6 +3244,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
_PyUnicode_InternStatic(interp, &string); _PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1)); assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1); assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_STR(gc);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_STR(anon_null); string = &_Py_STR(anon_null);
_PyUnicode_InternStatic(interp, &string); _PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1)); assert(_PyUnicode_CheckConsistency(string, 1));
@ -3260,6 +3272,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
_PyUnicode_InternStatic(interp, &string); _PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1)); assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1); assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_STR(native);
_PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1));
assert(PyUnicode_GET_LENGTH(string) != 1);
string = &_Py_STR(anon_setcomp); string = &_Py_STR(anon_setcomp);
_PyUnicode_InternStatic(interp, &string); _PyUnicode_InternStatic(interp, &string);
assert(_PyUnicode_CheckConsistency(string, 1)); assert(_PyUnicode_CheckConsistency(string, 1));

View file

@ -151,17 +151,22 @@ function createPythonTooltip(data) {
const funcname = resolveString(d.data.funcname) || resolveString(d.data.name); const funcname = resolveString(d.data.funcname) || resolveString(d.data.name);
const filename = resolveString(d.data.filename) || ""; const filename = resolveString(d.data.filename) || "";
// Don't show file location for special frames like <GC> and <native>
const isSpecialFrame = filename === "~";
const fileLocationHTML = isSpecialFrame ? "" : `
<div style="color: #5a6c7d; font-size: 13px; margin-bottom: 12px;
font-family: monospace; background: #f8f9fa;
padding: 4px 8px; border-radius: 4px; word-break: break-all; overflow-wrap: break-word;">
${filename}${d.data.lineno ? ":" + d.data.lineno : ""}
</div>`;
const tooltipHTML = ` const tooltipHTML = `
<div> <div>
<div style="color: #3776ab; font-weight: 600; font-size: 16px; <div style="color: #3776ab; font-weight: 600; font-size: 16px;
margin-bottom: 8px; line-height: 1.3; word-break: break-word; overflow-wrap: break-word;"> margin-bottom: 8px; line-height: 1.3; word-break: break-word; overflow-wrap: break-word;">
${funcname} ${funcname}
</div> </div>
<div style="color: #5a6c7d; font-size: 13px; margin-bottom: 12px; ${fileLocationHTML}
font-family: monospace; background: #f8f9fa;
padding: 4px 8px; border-radius: 4px; word-break: break-all; overflow-wrap: break-word;">
${filename}${d.data.lineno ? ":" + d.data.lineno : ""}
</div>
<div style="display: grid; grid-template-columns: auto 1fr; <div style="display: grid; grid-template-columns: auto 1fr;
gap: 8px 16px; font-size: 14px;"> gap: 8px 16px; font-size: 14px;">
<span style="color: #5a6c7d; font-weight: 500;">Execution Time:</span> <span style="color: #5a6c7d; font-weight: 500;">Execution Time:</span>
@ -474,14 +479,23 @@ function populateStats(data) {
if (i < hotSpots.length && hotSpots[i]) { if (i < hotSpots.length && hotSpots[i]) {
const hotspot = hotSpots[i]; const hotspot = hotSpots[i];
const filename = hotspot.filename || 'unknown'; const filename = hotspot.filename || 'unknown';
const basename = filename !== 'unknown' ? filename.split('/').pop() : 'unknown';
const lineno = hotspot.lineno ?? '?'; const lineno = hotspot.lineno ?? '?';
let funcDisplay = hotspot.funcname || 'unknown'; let funcDisplay = hotspot.funcname || 'unknown';
if (funcDisplay.length > 35) { if (funcDisplay.length > 35) {
funcDisplay = funcDisplay.substring(0, 32) + '...'; funcDisplay = funcDisplay.substring(0, 32) + '...';
} }
document.getElementById(`hotspot-file-${num}`).textContent = `${basename}:${lineno}`; // Don't show file:line for special frames like <GC> and <native>
const isSpecialFrame = filename === '~' && (lineno === 0 || lineno === '?');
let fileDisplay;
if (isSpecialFrame) {
fileDisplay = '--';
} else {
const basename = filename !== 'unknown' ? filename.split('/').pop() : 'unknown';
fileDisplay = `${basename}:${lineno}`;
}
document.getElementById(`hotspot-file-${num}`).textContent = fileDisplay;
document.getElementById(`hotspot-func-${num}`).textContent = funcDisplay; document.getElementById(`hotspot-func-${num}`).textContent = funcDisplay;
document.getElementById(`hotspot-detail-${num}`).textContent = `${hotspot.directPercent.toFixed(1)}% samples (${hotspot.directSamples.toLocaleString()})`; document.getElementById(`hotspot-detail-${num}`).textContent = `${hotspot.directPercent.toFixed(1)}% samples (${hotspot.directSamples.toLocaleString()})`;
} else { } else {

View file

@ -137,19 +137,19 @@ def _run_with_sync(original_cmd):
class SampleProfiler: class SampleProfiler:
def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL, skip_non_matching_threads=True): def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL, native=False, gc=True, skip_non_matching_threads=True):
self.pid = pid self.pid = pid
self.sample_interval_usec = sample_interval_usec self.sample_interval_usec = sample_interval_usec
self.all_threads = all_threads self.all_threads = all_threads
if _FREE_THREADED_BUILD: if _FREE_THREADED_BUILD:
self.unwinder = _remote_debugging.RemoteUnwinder( self.unwinder = _remote_debugging.RemoteUnwinder(
self.pid, all_threads=self.all_threads, mode=mode, self.pid, all_threads=self.all_threads, mode=mode, native=native, gc=gc,
skip_non_matching_threads=skip_non_matching_threads skip_non_matching_threads=skip_non_matching_threads
) )
else: else:
only_active_threads = bool(self.all_threads) only_active_threads = bool(self.all_threads)
self.unwinder = _remote_debugging.RemoteUnwinder( self.unwinder = _remote_debugging.RemoteUnwinder(
self.pid, only_active_thread=only_active_threads, mode=mode, self.pid, only_active_thread=only_active_threads, mode=mode, native=native, gc=gc,
skip_non_matching_threads=skip_non_matching_threads skip_non_matching_threads=skip_non_matching_threads
) )
# Track sample intervals and total sample count # Track sample intervals and total sample count
@ -616,6 +616,8 @@ def sample(
output_format="pstats", output_format="pstats",
realtime_stats=False, realtime_stats=False,
mode=PROFILING_MODE_WALL, mode=PROFILING_MODE_WALL,
native=False,
gc=True,
): ):
# PROFILING_MODE_ALL implies no skipping at all # PROFILING_MODE_ALL implies no skipping at all
if mode == PROFILING_MODE_ALL: if mode == PROFILING_MODE_ALL:
@ -627,7 +629,7 @@ def sample(
skip_idle = mode != PROFILING_MODE_WALL skip_idle = mode != PROFILING_MODE_WALL
profiler = SampleProfiler( profiler = SampleProfiler(
pid, sample_interval_usec, all_threads=all_threads, mode=mode, pid, sample_interval_usec, all_threads=all_threads, mode=mode, native=native, gc=gc,
skip_non_matching_threads=skip_non_matching_threads skip_non_matching_threads=skip_non_matching_threads
) )
profiler.realtime_stats = realtime_stats profiler.realtime_stats = realtime_stats
@ -717,6 +719,8 @@ def wait_for_process_and_sample(pid, sort_value, args):
output_format=args.format, output_format=args.format,
realtime_stats=args.realtime_stats, realtime_stats=args.realtime_stats,
mode=mode, mode=mode,
native=args.native,
gc=args.gc,
) )
@ -767,9 +771,19 @@ def main():
sampling_group.add_argument( sampling_group.add_argument(
"--realtime-stats", "--realtime-stats",
action="store_true", action="store_true",
default=False,
help="Print real-time sampling statistics (Hz, mean, min, max, stdev) during profiling", help="Print real-time sampling statistics (Hz, mean, min, max, stdev) during profiling",
) )
sampling_group.add_argument(
"--native",
action="store_true",
help="Include artificial \"<native>\" frames to denote calls to non-Python code.",
)
sampling_group.add_argument(
"--no-gc",
action="store_false",
dest="gc",
help="Don't include artificial \"<GC>\" frames to denote active garbage collection.",
)
# Mode options # Mode options
mode_group = parser.add_argument_group("Mode options") mode_group = parser.add_argument_group("Mode options")
@ -934,6 +948,8 @@ def main():
output_format=args.format, output_format=args.format,
realtime_stats=args.realtime_stats, realtime_stats=args.realtime_stats,
mode=mode, mode=mode,
native=args.native,
gc=args.gc,
) )
elif args.module or args.args: elif args.module or args.args:
if args.module: if args.module:

View file

@ -36,10 +36,16 @@ def process_frames(self, frames, thread_id):
def export(self, filename): def export(self, filename):
lines = [] lines = []
for (call_tree, thread_id), count in self.stack_counter.items(): for (call_tree, thread_id), count in self.stack_counter.items():
stack_str = ";".join( parts = [f"tid:{thread_id}"]
f"{os.path.basename(f[0])}:{f[2]}:{f[1]}" for f in call_tree for file, line, func in call_tree:
) # This is what pstats does for "special" frames:
lines.append((f"tid:{thread_id};{stack_str}", count)) if file == "~" and line == 0:
part = func
else:
part = f"{os.path.basename(file)}:{func}:{line}"
parts.append(part)
stack_str = ";".join(parts)
lines.append((stack_str, count))
lines.sort(key=lambda x: (-x[1], x[0])) lines.sort(key=lambda x: (-x[1], x[0]))
@ -98,6 +104,10 @@ def export(self, filename):
def _format_function_name(func): def _format_function_name(func):
filename, lineno, funcname = func filename, lineno, funcname = func
# Special frames like <GC> and <native> should not show file:line
if filename == "~" and lineno == 0:
return funcname
if len(filename) > 50: if len(filename) > 50:
parts = filename.split("/") parts = filename.split("/")
if len(parts) > 2: if len(parts) > 2:

View file

@ -159,6 +159,8 @@ def foo():
FrameInfo([script_name, 12, "baz"]), FrameInfo([script_name, 12, "baz"]),
FrameInfo([script_name, 9, "bar"]), FrameInfo([script_name, 9, "bar"]),
FrameInfo([threading.__file__, ANY, "Thread.run"]), FrameInfo([threading.__file__, ANY, "Thread.run"]),
FrameInfo([threading.__file__, ANY, "Thread._bootstrap_inner"]),
FrameInfo([threading.__file__, ANY, "Thread._bootstrap"]),
] ]
# Is possible that there are more threads, so we check that the # Is possible that there are more threads, so we check that the
# expected stack traces are in the result (looking at you Windows!) # expected stack traces are in the result (looking at you Windows!)

View file

@ -2025,7 +2025,6 @@ def test_sample_target_script(self):
# Should see some of our test functions # Should see some of our test functions
self.assertIn("slow_fibonacci", output) self.assertIn("slow_fibonacci", output)
def test_sample_target_module(self): def test_sample_target_module(self):
tempdir = tempfile.TemporaryDirectory(delete=False) tempdir = tempfile.TemporaryDirectory(delete=False)
self.addCleanup(lambda x: shutil.rmtree(x), tempdir.name) self.addCleanup(lambda x: shutil.rmtree(x), tempdir.name)
@ -2264,7 +2263,9 @@ def test_cli_module_argument_parsing(self):
show_summary=True, show_summary=True,
output_format="pstats", output_format="pstats",
realtime_stats=False, realtime_stats=False,
mode=0 mode=0,
native=False,
gc=True,
) )
@unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist") @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
@ -2292,7 +2293,9 @@ def test_cli_module_with_arguments(self):
show_summary=True, show_summary=True,
output_format="pstats", output_format="pstats",
realtime_stats=False, realtime_stats=False,
mode=0 mode=0,
native=False,
gc=True,
) )
@unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist") @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
@ -2320,7 +2323,9 @@ def test_cli_script_argument_parsing(self):
show_summary=True, show_summary=True,
output_format="pstats", output_format="pstats",
realtime_stats=False, realtime_stats=False,
mode=0 mode=0,
native=False,
gc=True,
) )
@unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist") @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
@ -2420,7 +2425,9 @@ def test_cli_module_with_profiler_options(self):
show_summary=True, show_summary=True,
output_format="pstats", output_format="pstats",
realtime_stats=False, realtime_stats=False,
mode=0 mode=0,
native=False,
gc=True,
) )
@unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist") @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
@ -2454,7 +2461,9 @@ def test_cli_script_with_profiler_options(self):
show_summary=True, show_summary=True,
output_format="collapsed", output_format="collapsed",
realtime_stats=False, realtime_stats=False,
mode=0 mode=0,
native=False,
gc=True,
) )
def test_cli_empty_module_name(self): def test_cli_empty_module_name(self):
@ -2666,7 +2675,9 @@ def test_argument_parsing_basic(self):
show_summary=True, show_summary=True,
output_format="pstats", output_format="pstats",
realtime_stats=False, realtime_stats=False,
mode=0 mode=0,
native=False,
gc=True,
) )
def test_sort_options(self): def test_sort_options(self):
@ -3121,6 +3132,187 @@ def test_parse_mode_function(self):
@requires_subprocess() @requires_subprocess()
@skip_if_not_supported @skip_if_not_supported
class TestGCFrameTracking(unittest.TestCase):
"""Tests for GC frame tracking in the sampling profiler."""
@classmethod
def setUpClass(cls):
"""Create a static test script with GC frames and CPU-intensive work."""
cls.gc_test_script = '''
import gc
class ExpensiveGarbage:
"""Class that triggers GC with expensive finalizer (callback)."""
def __init__(self):
self.cycle = self
def __del__(self):
# CPU-intensive work in the finalizer callback
result = 0
for i in range(100000):
result += i * i
if i % 1000 == 0:
result = result % 1000000
def main_loop():
"""Main loop that triggers GC with expensive callback."""
while True:
ExpensiveGarbage()
gc.collect()
if __name__ == "__main__":
main_loop()
'''
def test_gc_frames_enabled(self):
"""Test that GC frames appear when gc tracking is enabled."""
with (
test_subprocess(self.gc_test_script) as subproc,
io.StringIO() as captured_output,
mock.patch("sys.stdout", captured_output),
):
try:
profiling.sampling.sample.sample(
subproc.process.pid,
duration_sec=1,
sample_interval_usec=5000,
show_summary=False,
native=False,
gc=True,
)
except PermissionError:
self.skipTest("Insufficient permissions for remote profiling")
output = captured_output.getvalue()
# Should capture samples
self.assertIn("Captured", output)
self.assertIn("samples", output)
# GC frames should be present
self.assertIn("<GC>", output)
def test_gc_frames_disabled(self):
"""Test that GC frames do not appear when gc tracking is disabled."""
with (
test_subprocess(self.gc_test_script) as subproc,
io.StringIO() as captured_output,
mock.patch("sys.stdout", captured_output),
):
try:
profiling.sampling.sample.sample(
subproc.process.pid,
duration_sec=1,
sample_interval_usec=5000,
show_summary=False,
native=False,
gc=False,
)
except PermissionError:
self.skipTest("Insufficient permissions for remote profiling")
output = captured_output.getvalue()
# Should capture samples
self.assertIn("Captured", output)
self.assertIn("samples", output)
# GC frames should NOT be present
self.assertNotIn("<GC>", output)
@requires_subprocess()
@skip_if_not_supported
class TestNativeFrameTracking(unittest.TestCase):
"""Tests for native frame tracking in the sampling profiler."""
@classmethod
def setUpClass(cls):
"""Create a static test script with native frames and CPU-intensive work."""
cls.native_test_script = '''
import operator
def main_loop():
while True:
# Native code in the middle of the stack:
operator.call(inner)
def inner():
# Python code at the top of the stack:
for _ in range(1_000_0000):
pass
if __name__ == "__main__":
main_loop()
'''
def test_native_frames_enabled(self):
"""Test that native frames appear when native tracking is enabled."""
collapsed_file = tempfile.NamedTemporaryFile(
suffix=".txt", delete=False
)
self.addCleanup(close_and_unlink, collapsed_file)
with (
test_subprocess(self.native_test_script) as subproc,
):
# Suppress profiler output when testing file export
with (
io.StringIO() as captured_output,
mock.patch("sys.stdout", captured_output),
):
try:
profiling.sampling.sample.sample(
subproc.process.pid,
duration_sec=1,
filename=collapsed_file.name,
output_format="collapsed",
sample_interval_usec=1000,
native=True,
)
except PermissionError:
self.skipTest("Insufficient permissions for remote profiling")
# Verify file was created and contains valid data
self.assertTrue(os.path.exists(collapsed_file.name))
self.assertGreater(os.path.getsize(collapsed_file.name), 0)
# Check file format
with open(collapsed_file.name, "r") as f:
content = f.read()
lines = content.strip().split("\n")
self.assertGreater(len(lines), 0)
stacks = [line.rsplit(" ", 1)[0] for line in lines]
# Most samples should have native code in the middle of the stack:
self.assertTrue(any(";<native>;" in stack for stack in stacks))
# No samples should have native code at the top of the stack:
self.assertFalse(any(stack.endswith(";<native>") for stack in stacks))
def test_native_frames_disabled(self):
"""Test that native frames do not appear when native tracking is disabled."""
with (
test_subprocess(self.native_test_script) as subproc,
io.StringIO() as captured_output,
mock.patch("sys.stdout", captured_output),
):
try:
profiling.sampling.sample.sample(
subproc.process.pid,
duration_sec=1,
sample_interval_usec=5000,
show_summary=False,
)
except PermissionError:
self.skipTest("Insufficient permissions for remote profiling")
output = captured_output.getvalue()
# Native frames should NOT be present:
self.assertNotIn("<native>", output)
class TestProcessPoolExecutorSupport(unittest.TestCase): class TestProcessPoolExecutorSupport(unittest.TestCase):
""" """
Test that ProcessPoolExecutor works correctly with profiling.sampling. Test that ProcessPoolExecutor works correctly with profiling.sampling.
@ -3161,7 +3353,5 @@ def worker(x):
self.assertIn("Results: [2, 4, 6]", stdout) self.assertIn("Results: [2, 4, 6]", stdout)
self.assertNotIn("Can't pickle", stderr) self.assertNotIn("Can't pickle", stderr)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()

View file

@ -0,0 +1,3 @@
Add support for ``<GC>`` and ``<native>`` frames to
:mod:`!profiling.sampling` output to denote active garbage collection and
calls to native code.

View file

@ -26,8 +26,9 @@
#include "Python.h" #include "Python.h"
#include <internal/pycore_debug_offsets.h> // _Py_DebugOffsets #include <internal/pycore_debug_offsets.h> // _Py_DebugOffsets
#include <internal/pycore_frame.h> // FRAME_SUSPENDED_YIELD_FROM #include <internal/pycore_frame.h> // FRAME_SUSPENDED_YIELD_FROM
#include <internal/pycore_interpframe.h> // FRAME_OWNED_BY_CSTACK #include <internal/pycore_interpframe.h> // FRAME_OWNED_BY_INTERPRETER
#include <internal/pycore_llist.h> // struct llist_node #include <internal/pycore_llist.h> // struct llist_node
#include <internal/pycore_long.h> // _PyLong_GetZero
#include <internal/pycore_stackref.h> // Py_TAG_BITS #include <internal/pycore_stackref.h> // Py_TAG_BITS
#include "../Python/remote_debug.h" #include "../Python/remote_debug.h"
@ -92,14 +93,16 @@ typedef enum _WIN32_THREADSTATE {
#endif #endif
#ifdef Py_GIL_DISABLED #ifdef Py_GIL_DISABLED
#define INTERP_STATE_MIN_SIZE MAX(MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \ #define INTERP_STATE_MIN_SIZE MAX(MAX(MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \
offsetof(PyInterpreterState, tlbc_indices.tlbc_generation) + sizeof(uint32_t)), \ offsetof(PyInterpreterState, tlbc_indices.tlbc_generation) + sizeof(uint32_t)), \
offsetof(PyInterpreterState, threads.head) + sizeof(void*)), \ offsetof(PyInterpreterState, threads.head) + sizeof(void*)), \
offsetof(PyInterpreterState, _gil.last_holder) + sizeof(PyThreadState*)) offsetof(PyInterpreterState, _gil.last_holder) + sizeof(PyThreadState*)), \
offsetof(PyInterpreterState, gc.frame) + sizeof(_PyInterpreterFrame *))
#else #else
#define INTERP_STATE_MIN_SIZE MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \ #define INTERP_STATE_MIN_SIZE MAX(MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \
offsetof(PyInterpreterState, threads.head) + sizeof(void*)), \ offsetof(PyInterpreterState, threads.head) + sizeof(void*)), \
offsetof(PyInterpreterState, _gil.last_holder) + sizeof(PyThreadState*)) offsetof(PyInterpreterState, _gil.last_holder) + sizeof(PyThreadState*)), \
offsetof(PyInterpreterState, gc.frame) + sizeof(_PyInterpreterFrame *))
#endif #endif
#define INTERP_STATE_BUFFER_SIZE MAX(INTERP_STATE_MIN_SIZE, 256) #define INTERP_STATE_BUFFER_SIZE MAX(INTERP_STATE_MIN_SIZE, 256)
@ -276,6 +279,8 @@ typedef struct {
int only_active_thread; int only_active_thread;
int mode; // Use enum _ProfilingMode values int mode; // Use enum _ProfilingMode values
int skip_non_matching_threads; // New option to skip threads that don't match mode int skip_non_matching_threads; // New option to skip threads that don't match mode
int native;
int gc;
RemoteDebuggingState *cached_state; // Cached module state RemoteDebuggingState *cached_state; // Cached module state
#ifdef Py_GIL_DISABLED #ifdef Py_GIL_DISABLED
// TLBC cache invalidation tracking // TLBC cache invalidation tracking
@ -1812,6 +1817,25 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L
* CODE OBJECT AND FRAME PARSING FUNCTIONS * CODE OBJECT AND FRAME PARSING FUNCTIONS
* ============================================================================ */ * ============================================================================ */
static PyObject *
make_frame_info(RemoteUnwinderObject *unwinder, PyObject *file, PyObject *line,
PyObject *func)
{
RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder);
PyObject *info = PyStructSequence_New(state->FrameInfo_Type);
if (info == NULL) {
set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create FrameInfo");
return NULL;
}
Py_INCREF(file);
Py_INCREF(line);
Py_INCREF(func);
PyStructSequence_SetItem(info, 0, file);
PyStructSequence_SetItem(info, 1, line);
PyStructSequence_SetItem(info, 2, func);
return info;
}
static int static int
parse_code_object(RemoteUnwinderObject *unwinder, parse_code_object(RemoteUnwinderObject *unwinder,
PyObject **result, PyObject **result,
@ -1825,8 +1849,6 @@ parse_code_object(RemoteUnwinderObject *unwinder,
PyObject *func = NULL; PyObject *func = NULL;
PyObject *file = NULL; PyObject *file = NULL;
PyObject *linetable = NULL; PyObject *linetable = NULL;
PyObject *lineno = NULL;
PyObject *tuple = NULL;
#ifdef Py_GIL_DISABLED #ifdef Py_GIL_DISABLED
// In free threading builds, code object addresses might have the low bit set // In free threading builds, code object addresses might have the low bit set
@ -1948,25 +1970,18 @@ parse_code_object(RemoteUnwinderObject *unwinder,
info.lineno = -1; info.lineno = -1;
} }
lineno = PyLong_FromLong(info.lineno); PyObject *lineno = PyLong_FromLong(info.lineno);
if (!lineno) { if (!lineno) {
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create line number object"); set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create line number object");
goto error; goto error;
} }
RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); PyObject *tuple = make_frame_info(unwinder, meta->file_name, lineno, meta->func_name);
tuple = PyStructSequence_New(state->FrameInfo_Type); Py_DECREF(lineno);
if (!tuple) { if (!tuple) {
set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create FrameInfo for code object");
goto error; goto error;
} }
Py_INCREF(meta->func_name);
Py_INCREF(meta->file_name);
PyStructSequence_SetItem(tuple, 0, meta->file_name);
PyStructSequence_SetItem(tuple, 1, lineno);
PyStructSequence_SetItem(tuple, 2, meta->func_name);
*result = tuple; *result = tuple;
return 0; return 0;
@ -1974,8 +1989,6 @@ parse_code_object(RemoteUnwinderObject *unwinder,
Py_XDECREF(func); Py_XDECREF(func);
Py_XDECREF(file); Py_XDECREF(file);
Py_XDECREF(linetable); Py_XDECREF(linetable);
Py_XDECREF(lineno);
Py_XDECREF(tuple);
return -1; return -1;
} }
@ -2116,6 +2129,7 @@ parse_frame_from_chunks(
PyObject **result, PyObject **result,
uintptr_t address, uintptr_t address,
uintptr_t *previous_frame, uintptr_t *previous_frame,
uintptr_t *stackpointer,
StackChunkList *chunks StackChunkList *chunks
) { ) {
void *frame_ptr = find_frame_in_chunks(chunks, address); void *frame_ptr = find_frame_in_chunks(chunks, address);
@ -2126,6 +2140,7 @@ parse_frame_from_chunks(
char *frame = (char *)frame_ptr; char *frame = (char *)frame_ptr;
*previous_frame = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.previous); *previous_frame = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.previous);
*stackpointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.stackpointer);
uintptr_t code_object = GET_MEMBER_NO_TAG(uintptr_t, frame_ptr, unwinder->debug_offsets.interpreter_frame.executable); uintptr_t code_object = GET_MEMBER_NO_TAG(uintptr_t, frame_ptr, unwinder->debug_offsets.interpreter_frame.executable);
int frame_valid = is_frame_valid(unwinder, (uintptr_t)frame, code_object); int frame_valid = is_frame_valid(unwinder, (uintptr_t)frame, code_object);
if (frame_valid != 1) { if (frame_valid != 1) {
@ -2238,8 +2253,7 @@ is_frame_valid(
void* frame = (void*)frame_addr; void* frame = (void*)frame_addr;
if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_CSTACK || if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_INTERPRETER) {
GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_INTERPRETER) {
return 0; // C frame return 0; // C frame
} }
@ -2458,8 +2472,9 @@ process_frame_chain(
RemoteUnwinderObject *unwinder, RemoteUnwinderObject *unwinder,
uintptr_t initial_frame_addr, uintptr_t initial_frame_addr,
StackChunkList *chunks, StackChunkList *chunks,
PyObject *frame_info PyObject *frame_info,
) { uintptr_t gc_frame)
{
uintptr_t frame_addr = initial_frame_addr; uintptr_t frame_addr = initial_frame_addr;
uintptr_t prev_frame_addr = 0; uintptr_t prev_frame_addr = 0;
const size_t MAX_FRAMES = 1024; const size_t MAX_FRAMES = 1024;
@ -2468,6 +2483,7 @@ process_frame_chain(
while ((void*)frame_addr != NULL) { while ((void*)frame_addr != NULL) {
PyObject *frame = NULL; PyObject *frame = NULL;
uintptr_t next_frame_addr = 0; uintptr_t next_frame_addr = 0;
uintptr_t stackpointer = 0;
if (++frame_count > MAX_FRAMES) { if (++frame_count > MAX_FRAMES) {
PyErr_SetString(PyExc_RuntimeError, "Too many stack frames (possible infinite loop)"); PyErr_SetString(PyExc_RuntimeError, "Too many stack frames (possible infinite loop)");
@ -2476,7 +2492,7 @@ process_frame_chain(
} }
// Try chunks first, fallback to direct memory read // Try chunks first, fallback to direct memory read
if (parse_frame_from_chunks(unwinder, &frame, frame_addr, &next_frame_addr, chunks) < 0) { if (parse_frame_from_chunks(unwinder, &frame, frame_addr, &next_frame_addr, &stackpointer, chunks) < 0) {
PyErr_Clear(); PyErr_Clear();
uintptr_t address_of_code_object = 0; uintptr_t address_of_code_object = 0;
if (parse_frame_object(unwinder, &frame, frame_addr, &address_of_code_object ,&next_frame_addr) < 0) { if (parse_frame_object(unwinder, &frame, frame_addr, &address_of_code_object ,&next_frame_addr) < 0) {
@ -2484,26 +2500,63 @@ process_frame_chain(
return -1; return -1;
} }
} }
if (frame == NULL && PyList_GET_SIZE(frame_info) == 0) {
if (!frame) { // If the first frame is missing, the chain is broken:
break; const char *e = "Failed to parse initial frame in chain";
} PyErr_SetString(PyExc_RuntimeError, e);
if (prev_frame_addr && frame_addr != prev_frame_addr) {
PyErr_Format(PyExc_RuntimeError,
"Broken frame chain: expected frame at 0x%lx, got 0x%lx",
prev_frame_addr, frame_addr);
Py_DECREF(frame);
set_exception_cause(unwinder, PyExc_RuntimeError, "Frame chain consistency check failed");
return -1; return -1;
} }
PyObject *extra_frame = NULL;
if (PyList_Append(frame_info, frame) == -1) { // This frame kicked off the current GC collection:
Py_DECREF(frame); if (unwinder->gc && frame_addr == gc_frame) {
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append frame to frame info list"); _Py_DECLARE_STR(gc, "<GC>");
return -1; extra_frame = &_Py_STR(gc);
}
// Otherwise, check for native frames to insert:
else if (unwinder->native &&
// We've reached an interpreter trampoline frame:
frame == NULL &&
// Bottommost frame is always native, so skip that one:
next_frame_addr &&
// Only suppress native frames if GC tracking is enabled and the next frame will be a GC frame:
!(unwinder->gc && next_frame_addr == gc_frame))
{
_Py_DECLARE_STR(native, "<native>");
extra_frame = &_Py_STR(native);
}
if (extra_frame) {
// Use "~" as file and 0 as line, since that's what pstats uses:
PyObject *extra_frame_info = make_frame_info(
unwinder, _Py_LATIN1_CHR('~'), _PyLong_GetZero(), extra_frame);
if (extra_frame_info == NULL) {
return -1;
}
int error = PyList_Append(frame_info, extra_frame_info);
Py_DECREF(extra_frame_info);
if (error) {
const char *e = "Failed to append extra frame to frame info list";
set_exception_cause(unwinder, PyExc_RuntimeError, e);
return -1;
}
}
if (frame) {
if (prev_frame_addr && frame_addr != prev_frame_addr) {
const char *f = "Broken frame chain: expected frame at 0x%lx, got 0x%lx";
PyErr_Format(PyExc_RuntimeError, f, prev_frame_addr, frame_addr);
Py_DECREF(frame);
const char *e = "Frame chain consistency check failed";
set_exception_cause(unwinder, PyExc_RuntimeError, e);
return -1;
}
if (PyList_Append(frame_info, frame) == -1) {
Py_DECREF(frame);
const char *e = "Failed to append frame to frame info list";
set_exception_cause(unwinder, PyExc_RuntimeError, e);
return -1;
}
Py_DECREF(frame);
} }
Py_DECREF(frame);
prev_frame_addr = next_frame_addr; prev_frame_addr = next_frame_addr;
frame_addr = next_frame_addr; frame_addr = next_frame_addr;
@ -2644,7 +2697,8 @@ static PyObject*
unwind_stack_for_thread( unwind_stack_for_thread(
RemoteUnwinderObject *unwinder, RemoteUnwinderObject *unwinder,
uintptr_t *current_tstate, uintptr_t *current_tstate,
uintptr_t gil_holder_tstate uintptr_t gil_holder_tstate,
uintptr_t gc_frame
) { ) {
PyObject *frame_info = NULL; PyObject *frame_info = NULL;
PyObject *thread_id = NULL; PyObject *thread_id = NULL;
@ -2746,7 +2800,7 @@ unwind_stack_for_thread(
goto error; goto error;
} }
if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info) < 0) { if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info, gc_frame) < 0) {
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process frame chain"); set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process frame chain");
goto error; goto error;
} }
@ -2818,6 +2872,8 @@ _remote_debugging.RemoteUnwinder.__init__
mode: int = 0 mode: int = 0
debug: bool = False debug: bool = False
skip_non_matching_threads: bool = True skip_non_matching_threads: bool = True
native: bool = False
gc: bool = False
Initialize a new RemoteUnwinder object for debugging a remote Python process. Initialize a new RemoteUnwinder object for debugging a remote Python process.
@ -2832,6 +2888,10 @@ Initialize a new RemoteUnwinder object for debugging a remote Python process.
lead to the exception. lead to the exception.
skip_non_matching_threads: If True, skip threads that don't match the selected mode. skip_non_matching_threads: If True, skip threads that don't match the selected mode.
If False, include all threads regardless of mode. If False, include all threads regardless of mode.
native: If True, include artificial "<native>" frames to denote calls to
non-Python code.
gc: If True, include artificial "<GC>" frames to denote active garbage
collection.
The RemoteUnwinder provides functionality to inspect and debug a running Python The RemoteUnwinder provides functionality to inspect and debug a running Python
process, including examining thread states, stack frames and other runtime data. process, including examining thread states, stack frames and other runtime data.
@ -2848,8 +2908,9 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
int pid, int all_threads, int pid, int all_threads,
int only_active_thread, int only_active_thread,
int mode, int debug, int mode, int debug,
int skip_non_matching_threads) int skip_non_matching_threads,
/*[clinic end generated code: output=abf5ea5cd58bcb36 input=08fb6ace023ec3b5]*/ int native, int gc)
/*[clinic end generated code: output=e9eb6b4df119f6e0 input=606d099059207df2]*/
{ {
// Validate that all_threads and only_active_thread are not both True // Validate that all_threads and only_active_thread are not both True
if (all_threads && only_active_thread) { if (all_threads && only_active_thread) {
@ -2866,6 +2927,8 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
} }
#endif #endif
self->native = native;
self->gc = gc;
self->debug = debug; self->debug = debug;
self->only_active_thread = only_active_thread; self->only_active_thread = only_active_thread;
self->mode = mode; self->mode = mode;
@ -3026,6 +3089,13 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self
goto exit; goto exit;
} }
uintptr_t gc_frame = 0;
if (self->gc) {
gc_frame = GET_MEMBER(uintptr_t, interp_state_buffer,
self->debug_offsets.interpreter_state.gc
+ self->debug_offsets.gc.frame);
}
int64_t interpreter_id = GET_MEMBER(int64_t, interp_state_buffer, int64_t interpreter_id = GET_MEMBER(int64_t, interp_state_buffer,
self->debug_offsets.interpreter_state.id); self->debug_offsets.interpreter_state.id);
@ -3085,7 +3155,9 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self
} }
while (current_tstate != 0) { while (current_tstate != 0) {
PyObject* frame_info = unwind_stack_for_thread(self, &current_tstate, gil_holder_tstate); PyObject* frame_info = unwind_stack_for_thread(self, &current_tstate,
gil_holder_tstate,
gc_frame);
if (!frame_info) { if (!frame_info) {
// Check if this was an intentional skip due to mode-based filtering // Check if this was an intentional skip due to mode-based filtering
if ((self->mode == PROFILING_MODE_CPU || self->mode == PROFILING_MODE_GIL) && !PyErr_Occurred()) { if ((self->mode == PROFILING_MODE_CPU || self->mode == PROFILING_MODE_GIL) && !PyErr_Occurred()) {

View file

@ -11,7 +11,8 @@ preserve
PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__, PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__,
"RemoteUnwinder(pid, *, all_threads=False, only_active_thread=False,\n" "RemoteUnwinder(pid, *, all_threads=False, only_active_thread=False,\n"
" mode=0, debug=False, skip_non_matching_threads=True)\n" " mode=0, debug=False, skip_non_matching_threads=True,\n"
" native=False, gc=False)\n"
"--\n" "--\n"
"\n" "\n"
"Initialize a new RemoteUnwinder object for debugging a remote Python process.\n" "Initialize a new RemoteUnwinder object for debugging a remote Python process.\n"
@ -27,6 +28,10 @@ PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__,
" lead to the exception.\n" " lead to the exception.\n"
" skip_non_matching_threads: If True, skip threads that don\'t match the selected mode.\n" " skip_non_matching_threads: If True, skip threads that don\'t match the selected mode.\n"
" If False, include all threads regardless of mode.\n" " If False, include all threads regardless of mode.\n"
" native: If True, include artificial \"<native>\" frames to denote calls to\n"
" non-Python code.\n"
" gc: If True, include artificial \"<GC>\" frames to denote active garbage\n"
" collection.\n"
"\n" "\n"
"The RemoteUnwinder provides functionality to inspect and debug a running Python\n" "The RemoteUnwinder provides functionality to inspect and debug a running Python\n"
"process, including examining thread states, stack frames and other runtime data.\n" "process, including examining thread states, stack frames and other runtime data.\n"
@ -42,7 +47,8 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
int pid, int all_threads, int pid, int all_threads,
int only_active_thread, int only_active_thread,
int mode, int debug, int mode, int debug,
int skip_non_matching_threads); int skip_non_matching_threads,
int native, int gc);
static int static int
_remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObject *kwargs) _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObject *kwargs)
@ -50,7 +56,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje
int return_value = -1; int return_value = -1;
#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
#define NUM_KEYWORDS 6 #define NUM_KEYWORDS 8
static struct { static struct {
PyGC_Head _this_is_not_used; PyGC_Head _this_is_not_used;
PyObject_VAR_HEAD PyObject_VAR_HEAD
@ -59,7 +65,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje
} _kwtuple = { } _kwtuple = {
.ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
.ob_hash = -1, .ob_hash = -1,
.ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), &_Py_ID(only_active_thread), &_Py_ID(mode), &_Py_ID(debug), &_Py_ID(skip_non_matching_threads), }, .ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), &_Py_ID(only_active_thread), &_Py_ID(mode), &_Py_ID(debug), &_Py_ID(skip_non_matching_threads), &_Py_ID(native), &_Py_ID(gc), },
}; };
#undef NUM_KEYWORDS #undef NUM_KEYWORDS
#define KWTUPLE (&_kwtuple.ob_base.ob_base) #define KWTUPLE (&_kwtuple.ob_base.ob_base)
@ -68,14 +74,14 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje
# define KWTUPLE NULL # define KWTUPLE NULL
#endif // !Py_BUILD_CORE #endif // !Py_BUILD_CORE
static const char * const _keywords[] = {"pid", "all_threads", "only_active_thread", "mode", "debug", "skip_non_matching_threads", NULL}; static const char * const _keywords[] = {"pid", "all_threads", "only_active_thread", "mode", "debug", "skip_non_matching_threads", "native", "gc", NULL};
static _PyArg_Parser _parser = { static _PyArg_Parser _parser = {
.keywords = _keywords, .keywords = _keywords,
.fname = "RemoteUnwinder", .fname = "RemoteUnwinder",
.kwtuple = KWTUPLE, .kwtuple = KWTUPLE,
}; };
#undef KWTUPLE #undef KWTUPLE
PyObject *argsbuf[6]; PyObject *argsbuf[8];
PyObject * const *fastargs; PyObject * const *fastargs;
Py_ssize_t nargs = PyTuple_GET_SIZE(args); Py_ssize_t nargs = PyTuple_GET_SIZE(args);
Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1; Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1;
@ -85,6 +91,8 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje
int mode = 0; int mode = 0;
int debug = 0; int debug = 0;
int skip_non_matching_threads = 1; int skip_non_matching_threads = 1;
int native = 0;
int gc = 0;
fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser,
/*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
@ -134,12 +142,30 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje
goto skip_optional_kwonly; goto skip_optional_kwonly;
} }
} }
skip_non_matching_threads = PyObject_IsTrue(fastargs[5]); if (fastargs[5]) {
if (skip_non_matching_threads < 0) { skip_non_matching_threads = PyObject_IsTrue(fastargs[5]);
if (skip_non_matching_threads < 0) {
goto exit;
}
if (!--noptargs) {
goto skip_optional_kwonly;
}
}
if (fastargs[6]) {
native = PyObject_IsTrue(fastargs[6]);
if (native < 0) {
goto exit;
}
if (!--noptargs) {
goto skip_optional_kwonly;
}
}
gc = PyObject_IsTrue(fastargs[7]);
if (gc < 0) {
goto exit; goto exit;
} }
skip_optional_kwonly: skip_optional_kwonly:
return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads, only_active_thread, mode, debug, skip_non_matching_threads); return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads, only_active_thread, mode, debug, skip_non_matching_threads, native, gc);
exit: exit:
return return_value; return return_value;
@ -321,4 +347,4 @@ _remote_debugging_RemoteUnwinder_get_async_stack_trace(PyObject *self, PyObject
return return_value; return return_value;
} }
/*[clinic end generated code: output=2caefeddf7683d32 input=a9049054013a1b77]*/ /*[clinic end generated code: output=99fed5c94cf36881 input=a9049054013a1b77]*/

View file

@ -2074,6 +2074,7 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason)
// Don't start a garbage collection if one is already in progress. // Don't start a garbage collection if one is already in progress.
return 0; return 0;
} }
gcstate->frame = tstate->current_frame;
struct gc_collection_stats stats = { 0 }; struct gc_collection_stats stats = { 0 };
if (reason != _Py_GC_REASON_SHUTDOWN) { if (reason != _Py_GC_REASON_SHUTDOWN) {
@ -2119,6 +2120,7 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason)
} }
#endif #endif
validate_spaces(gcstate); validate_spaces(gcstate);
gcstate->frame = NULL;
_Py_atomic_store_int(&gcstate->collecting, 0); _Py_atomic_store_int(&gcstate->collecting, 0);
if (gcstate->debug & _PyGC_DEBUG_STATS) { if (gcstate->debug & _PyGC_DEBUG_STATS) {

View file

@ -2359,6 +2359,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
_Py_atomic_store_int(&gcstate->collecting, 0); _Py_atomic_store_int(&gcstate->collecting, 0);
return 0; return 0;
} }
gcstate->frame = tstate->current_frame;
assert(generation >= 0 && generation < NUM_GENERATIONS); assert(generation >= 0 && generation < NUM_GENERATIONS);
@ -2447,6 +2448,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
} }
assert(!_PyErr_Occurred(tstate)); assert(!_PyErr_Occurred(tstate));
gcstate->frame = NULL;
_Py_atomic_store_int(&gcstate->collecting, 0); _Py_atomic_store_int(&gcstate->collecting, 0);
return n + m; return n + m;
} }