mirror of
https://github.com/python/cpython.git
synced 2025-12-08 06:10:17 +00:00
gh-142374: Fix recursive function cumulative over-counting in sampling profiler
The sampling profiler counted every frame occurrence in a stack for cumulative statistics. For recursive functions appearing N times in a stack, this meant counting N instead of 1, causing cumul% to exceed 100%. A function recursing 500 deep in every sample would show 50000% cumulative presence. The fix tracks seen locations per sample using a reused set, ensuring each unique (filename, lineno, funcname) is counted once per sample. This matches the expected semantics: cumul% represents the percentage of samples where a function appeared on the stack, not the sum of all frame occurrences.
This commit is contained in:
parent
c5b37228af
commit
78b8bd521f
7 changed files with 304 additions and 18 deletions
|
|
@ -477,6 +477,10 @@ def __init__(self, *args, **kwargs):
|
|||
# File index (populated during export)
|
||||
self.file_index = {}
|
||||
|
||||
# Reusable set for deduplicating line locations within a single sample.
|
||||
# This avoids over-counting recursive functions in cumulative stats.
|
||||
self._seen_lines = set()
|
||||
|
||||
def set_stats(self, sample_interval_usec, duration_sec, sample_rate, error_rate=None, missed_samples=None, **kwargs):
|
||||
"""Set profiling statistics to include in heatmap output.
|
||||
|
||||
|
|
@ -509,6 +513,7 @@ def process_frames(self, frames, thread_id):
|
|||
thread_id: Thread ID for this stack trace
|
||||
"""
|
||||
self._total_samples += 1
|
||||
self._seen_lines.clear()
|
||||
|
||||
# Count each line in the stack and build call graph
|
||||
for i, frame_info in enumerate(frames):
|
||||
|
|
@ -519,7 +524,13 @@ def process_frames(self, frames, thread_id):
|
|||
|
||||
# frames[0] is the leaf - where execution is actually happening
|
||||
is_leaf = (i == 0)
|
||||
self._record_line_sample(filename, lineno, funcname, is_leaf=is_leaf)
|
||||
line_key = (filename, lineno)
|
||||
count_cumulative = line_key not in self._seen_lines
|
||||
if count_cumulative:
|
||||
self._seen_lines.add(line_key)
|
||||
|
||||
self._record_line_sample(filename, lineno, funcname, is_leaf=is_leaf,
|
||||
count_cumulative=count_cumulative)
|
||||
|
||||
# Build call graph for adjacent frames
|
||||
if i + 1 < len(frames):
|
||||
|
|
@ -537,11 +548,13 @@ def _is_valid_frame(self, filename, lineno):
|
|||
|
||||
return True
|
||||
|
||||
def _record_line_sample(self, filename, lineno, funcname, is_leaf=False):
|
||||
def _record_line_sample(self, filename, lineno, funcname, is_leaf=False,
|
||||
count_cumulative=True):
|
||||
"""Record a sample for a specific line."""
|
||||
# Track cumulative samples (all occurrences in stack)
|
||||
self.line_samples[(filename, lineno)] += 1
|
||||
self.file_samples[filename][lineno] += 1
|
||||
if count_cumulative:
|
||||
self.line_samples[(filename, lineno)] += 1
|
||||
self.file_samples[filename][lineno] += 1
|
||||
|
||||
# Track self/leaf samples (only when at top of stack)
|
||||
if is_leaf:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue