gh-138122: Add thread status statistics to flamegraph profiler (#141900)

Co-authored-by: ivonastojanovic <80911834+ivonastojanovic@users.noreply.github.com>
2025-12-08 06:10:17 +00:00 · 2025-11-30 01:42:39 +00:00 · 2025-11-30 01:42:39 +00:00 · ea51e745c7
commit ea51e745c7
parent db098a475a
8 changed files with 777 additions and 21 deletions
--- a/Lib/profiling/sampling/stack_collector.py
+++ b/Lib/profiling/sampling/stack_collector.py
@ -62,17 +62,65 @@ def __init__(self, *args, **kwargs):
        self.stats = {}
        self._root = {"samples": 0, "children": {}, "threads": set()}
        self._total_samples = 0
+        self._sample_count = 0  # Track actual number of samples (not thread traces)
        self._func_intern = {}
        self._string_table = StringTable()
        self._all_threads = set()

-    def set_stats(self, sample_interval_usec, duration_sec, sample_rate, error_rate=None):
+        # Thread status statistics (similar to LiveStatsCollector)
+        self.thread_status_counts = {
+            "has_gil": 0,
+            "on_cpu": 0,
+            "gil_requested": 0,
+            "unknown": 0,
+            "total": 0,
+        }
+        self.samples_with_gc_frames = 0
+
+        # Per-thread statistics
+        self.per_thread_stats = {}  # {thread_id: {has_gil, on_cpu, gil_requested, unknown, total, gc_samples}}
+
+    def collect(self, stack_frames, skip_idle=False):
+        """Override to track thread status statistics before processing frames."""
+        # Increment sample count once per sample
+        self._sample_count += 1
+
+        # Collect both aggregate and per-thread statistics using base method
+        status_counts, has_gc_frame, per_thread_stats = self._collect_thread_status_stats(stack_frames)
+
+        # Merge aggregate status counts
+        for key in status_counts:
+            self.thread_status_counts[key] += status_counts[key]
+
+        # Update aggregate GC frame count
+        if has_gc_frame:
+            self.samples_with_gc_frames += 1
+
+        # Merge per-thread statistics
+        for thread_id, stats in per_thread_stats.items():
+            if thread_id not in self.per_thread_stats:
+                self.per_thread_stats[thread_id] = {
+                    "has_gil": 0,
+                    "on_cpu": 0,
+                    "gil_requested": 0,
+                    "unknown": 0,
+                    "total": 0,
+                    "gc_samples": 0,
+                }
+            for key, value in stats.items():
+                self.per_thread_stats[thread_id][key] += value
+
+        # Call parent collect to process frames
+        super().collect(stack_frames, skip_idle=skip_idle)
+
+    def set_stats(self, sample_interval_usec, duration_sec, sample_rate, error_rate=None, mode=None):
        """Set profiling statistics to include in flamegraph data."""
        self.stats = {
            "sample_interval_usec": sample_interval_usec,
            "duration_sec": duration_sec,
            "sample_rate": sample_rate,
-            "error_rate": error_rate
+            "error_rate": error_rate,
+            "mode": mode
        }

    def export(self, filename):
@ -117,7 +165,6 @@ def _format_function_name(func):
        return f"{funcname} ({filename}:{lineno})"

    def _convert_to_flamegraph_format(self):
-        """Convert aggregated trie to d3-flamegraph format with string table optimization."""
        if self._total_samples == 0:
            return {
                "name": self._string_table.intern("No Data"),
@ -178,6 +225,29 @@ def convert_children(children, min_samples):
                "strings": self._string_table.get_strings()
            }

+        # Calculate thread status percentages for display
+        total_threads = max(1, self.thread_status_counts["total"])
+        thread_stats = {
+            "has_gil_pct": (self.thread_status_counts["has_gil"] / total_threads) * 100,
+            "on_cpu_pct": (self.thread_status_counts["on_cpu"] / total_threads) * 100,
+            "gil_requested_pct": (self.thread_status_counts["gil_requested"] / total_threads) * 100,
+            "gc_pct": (self.samples_with_gc_frames / max(1, self._sample_count)) * 100,
+            **self.thread_status_counts
+        }
+
+        # Calculate per-thread statistics with percentages
+        per_thread_stats_with_pct = {}
+        total_samples_denominator = max(1, self._sample_count)
+        for thread_id, stats in self.per_thread_stats.items():
+            total = max(1, stats["total"])
+            per_thread_stats_with_pct[thread_id] = {
+                "has_gil_pct": (stats["has_gil"] / total) * 100,
+                "on_cpu_pct": (stats["on_cpu"] / total) * 100,
+                "gil_requested_pct": (stats["gil_requested"] / total) * 100,
+                "gc_pct": (stats["gc_samples"] / total_samples_denominator) * 100,
+                **stats
+            }
+
        # If we only have one root child, make it the root to avoid redundant level
        if len(root_children) == 1:
            main_child = root_children[0]
@ -185,7 +255,11 @@ def convert_children(children, min_samples):
            old_name = self._string_table.get_string(main_child["name"])
            new_name = f"Program Root: {old_name}"
            main_child["name"] = self._string_table.intern(new_name)
-            main_child["stats"] = self.stats
+            main_child["stats"] = {
+                **self.stats,
+                "thread_stats": thread_stats,
+                "per_thread_stats": per_thread_stats_with_pct
+            }
            main_child["threads"] = sorted(list(self._all_threads))
            main_child["strings"] = self._string_table.get_strings()
            return main_child
@ -194,7 +268,11 @@ def convert_children(children, min_samples):
            "name": self._string_table.intern("Program Root"),
            "value": total_samples,
            "children": root_children,
-            "stats": self.stats,
+            "stats": {
+                **self.stats,
+                "thread_stats": thread_stats,
+                "per_thread_stats": per_thread_stats_with_pct
+            },
            "threads": sorted(list(self._all_threads)),
            "strings": self._string_table.get_strings()
        }