gh-138122: Add thread status statistics to flamegraph profiler (#141900)

Co-authored-by: ivonastojanovic <80911834+ivonastojanovic@users.noreply.github.com>
This commit is contained in:
Pablo Galindo Salgado 2025-11-30 01:42:39 +00:00 committed by GitHub
parent db098a475a
commit ea51e745c7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 777 additions and 21 deletions

View file

@ -62,17 +62,65 @@ def __init__(self, *args, **kwargs):
self.stats = {}
self._root = {"samples": 0, "children": {}, "threads": set()}
self._total_samples = 0
self._sample_count = 0 # Track actual number of samples (not thread traces)
self._func_intern = {}
self._string_table = StringTable()
self._all_threads = set()
def set_stats(self, sample_interval_usec, duration_sec, sample_rate, error_rate=None):
# Thread status statistics (similar to LiveStatsCollector)
self.thread_status_counts = {
"has_gil": 0,
"on_cpu": 0,
"gil_requested": 0,
"unknown": 0,
"total": 0,
}
self.samples_with_gc_frames = 0
# Per-thread statistics
self.per_thread_stats = {} # {thread_id: {has_gil, on_cpu, gil_requested, unknown, total, gc_samples}}
def collect(self, stack_frames, skip_idle=False):
"""Override to track thread status statistics before processing frames."""
# Increment sample count once per sample
self._sample_count += 1
# Collect both aggregate and per-thread statistics using base method
status_counts, has_gc_frame, per_thread_stats = self._collect_thread_status_stats(stack_frames)
# Merge aggregate status counts
for key in status_counts:
self.thread_status_counts[key] += status_counts[key]
# Update aggregate GC frame count
if has_gc_frame:
self.samples_with_gc_frames += 1
# Merge per-thread statistics
for thread_id, stats in per_thread_stats.items():
if thread_id not in self.per_thread_stats:
self.per_thread_stats[thread_id] = {
"has_gil": 0,
"on_cpu": 0,
"gil_requested": 0,
"unknown": 0,
"total": 0,
"gc_samples": 0,
}
for key, value in stats.items():
self.per_thread_stats[thread_id][key] += value
# Call parent collect to process frames
super().collect(stack_frames, skip_idle=skip_idle)
def set_stats(self, sample_interval_usec, duration_sec, sample_rate, error_rate=None, mode=None):
"""Set profiling statistics to include in flamegraph data."""
self.stats = {
"sample_interval_usec": sample_interval_usec,
"duration_sec": duration_sec,
"sample_rate": sample_rate,
"error_rate": error_rate
"error_rate": error_rate,
"mode": mode
}
def export(self, filename):
@ -117,7 +165,6 @@ def _format_function_name(func):
return f"{funcname} ({filename}:{lineno})"
def _convert_to_flamegraph_format(self):
"""Convert aggregated trie to d3-flamegraph format with string table optimization."""
if self._total_samples == 0:
return {
"name": self._string_table.intern("No Data"),
@ -178,6 +225,29 @@ def convert_children(children, min_samples):
"strings": self._string_table.get_strings()
}
# Calculate thread status percentages for display
total_threads = max(1, self.thread_status_counts["total"])
thread_stats = {
"has_gil_pct": (self.thread_status_counts["has_gil"] / total_threads) * 100,
"on_cpu_pct": (self.thread_status_counts["on_cpu"] / total_threads) * 100,
"gil_requested_pct": (self.thread_status_counts["gil_requested"] / total_threads) * 100,
"gc_pct": (self.samples_with_gc_frames / max(1, self._sample_count)) * 100,
**self.thread_status_counts
}
# Calculate per-thread statistics with percentages
per_thread_stats_with_pct = {}
total_samples_denominator = max(1, self._sample_count)
for thread_id, stats in self.per_thread_stats.items():
total = max(1, stats["total"])
per_thread_stats_with_pct[thread_id] = {
"has_gil_pct": (stats["has_gil"] / total) * 100,
"on_cpu_pct": (stats["on_cpu"] / total) * 100,
"gil_requested_pct": (stats["gil_requested"] / total) * 100,
"gc_pct": (stats["gc_samples"] / total_samples_denominator) * 100,
**stats
}
# If we only have one root child, make it the root to avoid redundant level
if len(root_children) == 1:
main_child = root_children[0]
@ -185,7 +255,11 @@ def convert_children(children, min_samples):
old_name = self._string_table.get_string(main_child["name"])
new_name = f"Program Root: {old_name}"
main_child["name"] = self._string_table.intern(new_name)
main_child["stats"] = self.stats
main_child["stats"] = {
**self.stats,
"thread_stats": thread_stats,
"per_thread_stats": per_thread_stats_with_pct
}
main_child["threads"] = sorted(list(self._all_threads))
main_child["strings"] = self._string_table.get_strings()
return main_child
@ -194,7 +268,11 @@ def convert_children(children, min_samples):
"name": self._string_table.intern("Program Root"),
"value": total_samples,
"children": root_children,
"stats": self.stats,
"stats": {
**self.stats,
"thread_stats": thread_stats,
"per_thread_stats": per_thread_stats_with_pct
},
"threads": sorted(list(self._all_threads)),
"strings": self._string_table.get_strings()
}