mirror of
https://github.com/python/cpython.git
synced 2025-12-08 06:10:17 +00:00
gh-142374: Fix recursive function cumulative over-counting in sampling profiler
The sampling profiler counted every frame occurrence in a stack for cumulative statistics. For recursive functions appearing N times in a stack, this meant counting N instead of 1, causing cumul% to exceed 100%. A function recursing 500 deep in every sample would show 50000% cumulative presence. The fix tracks seen locations per sample using a reused set, ensuring each unique (filename, lineno, funcname) is counted once per sample. This matches the expected semantics: cumul% represents the percentage of samples where a function appeared on the stack, not the sum of all frame occurrences.
This commit is contained in:
parent
c5b37228af
commit
78b8bd521f
7 changed files with 304 additions and 18 deletions
|
|
@ -477,6 +477,10 @@ def __init__(self, *args, **kwargs):
|
||||||
# File index (populated during export)
|
# File index (populated during export)
|
||||||
self.file_index = {}
|
self.file_index = {}
|
||||||
|
|
||||||
|
# Reusable set for deduplicating line locations within a single sample.
|
||||||
|
# This avoids over-counting recursive functions in cumulative stats.
|
||||||
|
self._seen_lines = set()
|
||||||
|
|
||||||
def set_stats(self, sample_interval_usec, duration_sec, sample_rate, error_rate=None, missed_samples=None, **kwargs):
|
def set_stats(self, sample_interval_usec, duration_sec, sample_rate, error_rate=None, missed_samples=None, **kwargs):
|
||||||
"""Set profiling statistics to include in heatmap output.
|
"""Set profiling statistics to include in heatmap output.
|
||||||
|
|
||||||
|
|
@ -509,6 +513,7 @@ def process_frames(self, frames, thread_id):
|
||||||
thread_id: Thread ID for this stack trace
|
thread_id: Thread ID for this stack trace
|
||||||
"""
|
"""
|
||||||
self._total_samples += 1
|
self._total_samples += 1
|
||||||
|
self._seen_lines.clear()
|
||||||
|
|
||||||
# Count each line in the stack and build call graph
|
# Count each line in the stack and build call graph
|
||||||
for i, frame_info in enumerate(frames):
|
for i, frame_info in enumerate(frames):
|
||||||
|
|
@ -519,7 +524,13 @@ def process_frames(self, frames, thread_id):
|
||||||
|
|
||||||
# frames[0] is the leaf - where execution is actually happening
|
# frames[0] is the leaf - where execution is actually happening
|
||||||
is_leaf = (i == 0)
|
is_leaf = (i == 0)
|
||||||
self._record_line_sample(filename, lineno, funcname, is_leaf=is_leaf)
|
line_key = (filename, lineno)
|
||||||
|
count_cumulative = line_key not in self._seen_lines
|
||||||
|
if count_cumulative:
|
||||||
|
self._seen_lines.add(line_key)
|
||||||
|
|
||||||
|
self._record_line_sample(filename, lineno, funcname, is_leaf=is_leaf,
|
||||||
|
count_cumulative=count_cumulative)
|
||||||
|
|
||||||
# Build call graph for adjacent frames
|
# Build call graph for adjacent frames
|
||||||
if i + 1 < len(frames):
|
if i + 1 < len(frames):
|
||||||
|
|
@ -537,11 +548,13 @@ def _is_valid_frame(self, filename, lineno):
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def _record_line_sample(self, filename, lineno, funcname, is_leaf=False):
|
def _record_line_sample(self, filename, lineno, funcname, is_leaf=False,
|
||||||
|
count_cumulative=True):
|
||||||
"""Record a sample for a specific line."""
|
"""Record a sample for a specific line."""
|
||||||
# Track cumulative samples (all occurrences in stack)
|
# Track cumulative samples (all occurrences in stack)
|
||||||
self.line_samples[(filename, lineno)] += 1
|
if count_cumulative:
|
||||||
self.file_samples[filename][lineno] += 1
|
self.line_samples[(filename, lineno)] += 1
|
||||||
|
self.file_samples[filename][lineno] += 1
|
||||||
|
|
||||||
# Track self/leaf samples (only when at top of stack)
|
# Track self/leaf samples (only when at top of stack)
|
||||||
if is_leaf:
|
if is_leaf:
|
||||||
|
|
|
||||||
|
|
@ -190,6 +190,8 @@ def __init__(
|
||||||
# Trend tracking (initialized after colors are set up)
|
# Trend tracking (initialized after colors are set up)
|
||||||
self._trend_tracker = None
|
self._trend_tracker = None
|
||||||
|
|
||||||
|
self._seen_locations = set()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def elapsed_time(self):
|
def elapsed_time(self):
|
||||||
"""Get the elapsed time, frozen when finished."""
|
"""Get the elapsed time, frozen when finished."""
|
||||||
|
|
@ -285,13 +287,16 @@ def process_frames(self, frames, thread_id=None):
|
||||||
|
|
||||||
# Get per-thread data if tracking per-thread
|
# Get per-thread data if tracking per-thread
|
||||||
thread_data = self._get_or_create_thread_data(thread_id) if thread_id is not None else None
|
thread_data = self._get_or_create_thread_data(thread_id) if thread_id is not None else None
|
||||||
|
self._seen_locations.clear()
|
||||||
|
|
||||||
# Process each frame in the stack to track cumulative calls
|
# Process each frame in the stack to track cumulative calls
|
||||||
for frame in frames:
|
for frame in frames:
|
||||||
location = (frame.filename, frame.lineno, frame.funcname)
|
location = (frame.filename, frame.lineno, frame.funcname)
|
||||||
self.result[location]["cumulative_calls"] += 1
|
if location not in self._seen_locations:
|
||||||
if thread_data:
|
self._seen_locations.add(location)
|
||||||
thread_data.result[location]["cumulative_calls"] += 1
|
self.result[location]["cumulative_calls"] += 1
|
||||||
|
if thread_data:
|
||||||
|
thread_data.result[location]["cumulative_calls"] += 1
|
||||||
|
|
||||||
# The top frame gets counted as an inline call (directly executing)
|
# The top frame gets counted as an inline call (directly executing)
|
||||||
top_location = (frames[0].filename, frames[0].lineno, frames[0].funcname)
|
top_location = (frames[0].filename, frames[0].lineno, frames[0].funcname)
|
||||||
|
|
|
||||||
|
|
@ -16,16 +16,21 @@ def __init__(self, sample_interval_usec, *, skip_idle=False):
|
||||||
lambda: collections.defaultdict(int)
|
lambda: collections.defaultdict(int)
|
||||||
)
|
)
|
||||||
self.skip_idle = skip_idle
|
self.skip_idle = skip_idle
|
||||||
|
self._seen_locations = set()
|
||||||
|
|
||||||
def _process_frames(self, frames):
|
def _process_frames(self, frames):
|
||||||
"""Process a single thread's frame stack."""
|
"""Process a single thread's frame stack."""
|
||||||
if not frames:
|
if not frames:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
self._seen_locations.clear()
|
||||||
|
|
||||||
# Process each frame in the stack to track cumulative calls
|
# Process each frame in the stack to track cumulative calls
|
||||||
for frame in frames:
|
for frame in frames:
|
||||||
location = (frame.filename, frame.lineno, frame.funcname)
|
location = (frame.filename, frame.lineno, frame.funcname)
|
||||||
self.result[location]["cumulative_calls"] += 1
|
if location not in self._seen_locations:
|
||||||
|
self._seen_locations.add(location)
|
||||||
|
self.result[location]["cumulative_calls"] += 1
|
||||||
|
|
||||||
# The top frame gets counted as an inline call (directly executing)
|
# The top frame gets counted as an inline call (directly executing)
|
||||||
top_location = (frames[0].filename, frames[0].lineno, frames[0].funcname)
|
top_location = (frames[0].filename, frames[0].lineno, frames[0].funcname)
|
||||||
|
|
|
||||||
|
|
@ -85,7 +85,7 @@ def test_pstats_collector_with_extreme_intervals_and_empty_data(self):
|
||||||
# Should still process the frames
|
# Should still process the frames
|
||||||
self.assertEqual(len(collector.result), 1)
|
self.assertEqual(len(collector.result), 1)
|
||||||
|
|
||||||
# Test collecting duplicate frames in same sample
|
# Test collecting duplicate frames in same sample (recursive function)
|
||||||
test_frames = [
|
test_frames = [
|
||||||
MockInterpreterInfo(
|
MockInterpreterInfo(
|
||||||
0, # interpreter_id
|
0, # interpreter_id
|
||||||
|
|
@ -94,7 +94,7 @@ def test_pstats_collector_with_extreme_intervals_and_empty_data(self):
|
||||||
1,
|
1,
|
||||||
[
|
[
|
||||||
MockFrameInfo("file.py", 10, "func1"),
|
MockFrameInfo("file.py", 10, "func1"),
|
||||||
MockFrameInfo("file.py", 10, "func1"), # Duplicate
|
MockFrameInfo("file.py", 10, "func1"), # Duplicate (recursion)
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
],
|
],
|
||||||
|
|
@ -102,9 +102,9 @@ def test_pstats_collector_with_extreme_intervals_and_empty_data(self):
|
||||||
]
|
]
|
||||||
collector = PstatsCollector(sample_interval_usec=1000)
|
collector = PstatsCollector(sample_interval_usec=1000)
|
||||||
collector.collect(test_frames)
|
collector.collect(test_frames)
|
||||||
# Should count both occurrences
|
# Should count only once per sample to avoid over-counting recursive functions
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
collector.result[("file.py", 10, "func1")]["cumulative_calls"], 2
|
collector.result[("file.py", 10, "func1")]["cumulative_calls"], 1
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_pstats_collector_single_frame_stacks(self):
|
def test_pstats_collector_single_frame_stacks(self):
|
||||||
|
|
@ -1201,3 +1201,194 @@ def test_flamegraph_collector_per_thread_gc_percentage(self):
|
||||||
self.assertEqual(collector.per_thread_stats[2]["gc_samples"], 1)
|
self.assertEqual(collector.per_thread_stats[2]["gc_samples"], 1)
|
||||||
self.assertEqual(collector.per_thread_stats[2]["total"], 6)
|
self.assertEqual(collector.per_thread_stats[2]["total"], 6)
|
||||||
self.assertAlmostEqual(per_thread_stats[2]["gc_pct"], 10.0, places=1)
|
self.assertAlmostEqual(per_thread_stats[2]["gc_pct"], 10.0, places=1)
|
||||||
|
|
||||||
|
|
||||||
|
class TestRecursiveFunctionHandling(unittest.TestCase):
|
||||||
|
"""Tests for correct handling of recursive functions in cumulative stats."""
|
||||||
|
|
||||||
|
def test_pstats_collector_recursive_function_single_sample(self):
|
||||||
|
"""Test that recursive functions are counted once per sample, not per occurrence."""
|
||||||
|
collector = PstatsCollector(sample_interval_usec=1000)
|
||||||
|
|
||||||
|
# Simulate a recursive function appearing 5 times in one sample
|
||||||
|
recursive_frames = [
|
||||||
|
MockInterpreterInfo(
|
||||||
|
0,
|
||||||
|
[
|
||||||
|
MockThreadInfo(
|
||||||
|
1,
|
||||||
|
[
|
||||||
|
MockFrameInfo("test.py", 10, "recursive_func"),
|
||||||
|
MockFrameInfo("test.py", 10, "recursive_func"),
|
||||||
|
MockFrameInfo("test.py", 10, "recursive_func"),
|
||||||
|
MockFrameInfo("test.py", 10, "recursive_func"),
|
||||||
|
MockFrameInfo("test.py", 10, "recursive_func"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
],
|
||||||
|
)
|
||||||
|
]
|
||||||
|
collector.collect(recursive_frames)
|
||||||
|
|
||||||
|
location = ("test.py", 10, "recursive_func")
|
||||||
|
# Should count as 1 cumulative call (present in 1 sample), not 5
|
||||||
|
self.assertEqual(collector.result[location]["cumulative_calls"], 1)
|
||||||
|
# Direct calls should be 1 (top of stack)
|
||||||
|
self.assertEqual(collector.result[location]["direct_calls"], 1)
|
||||||
|
|
||||||
|
def test_pstats_collector_recursive_function_multiple_samples(self):
|
||||||
|
"""Test cumulative counting across multiple samples with recursion."""
|
||||||
|
collector = PstatsCollector(sample_interval_usec=1000)
|
||||||
|
|
||||||
|
# Sample 1: recursive function at depth 3
|
||||||
|
sample1 = [
|
||||||
|
MockInterpreterInfo(
|
||||||
|
0,
|
||||||
|
[
|
||||||
|
MockThreadInfo(
|
||||||
|
1,
|
||||||
|
[
|
||||||
|
MockFrameInfo("test.py", 10, "recursive_func"),
|
||||||
|
MockFrameInfo("test.py", 10, "recursive_func"),
|
||||||
|
MockFrameInfo("test.py", 10, "recursive_func"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
],
|
||||||
|
)
|
||||||
|
]
|
||||||
|
# Sample 2: recursive function at depth 2
|
||||||
|
sample2 = [
|
||||||
|
MockInterpreterInfo(
|
||||||
|
0,
|
||||||
|
[
|
||||||
|
MockThreadInfo(
|
||||||
|
1,
|
||||||
|
[
|
||||||
|
MockFrameInfo("test.py", 10, "recursive_func"),
|
||||||
|
MockFrameInfo("test.py", 10, "recursive_func"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
],
|
||||||
|
)
|
||||||
|
]
|
||||||
|
# Sample 3: recursive function at depth 4
|
||||||
|
sample3 = [
|
||||||
|
MockInterpreterInfo(
|
||||||
|
0,
|
||||||
|
[
|
||||||
|
MockThreadInfo(
|
||||||
|
1,
|
||||||
|
[
|
||||||
|
MockFrameInfo("test.py", 10, "recursive_func"),
|
||||||
|
MockFrameInfo("test.py", 10, "recursive_func"),
|
||||||
|
MockFrameInfo("test.py", 10, "recursive_func"),
|
||||||
|
MockFrameInfo("test.py", 10, "recursive_func"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
],
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
collector.collect(sample1)
|
||||||
|
collector.collect(sample2)
|
||||||
|
collector.collect(sample3)
|
||||||
|
|
||||||
|
location = ("test.py", 10, "recursive_func")
|
||||||
|
# Should count as 3 cumulative calls (present in 3 samples)
|
||||||
|
# Not 3+2+4=9 which would be the buggy behavior
|
||||||
|
self.assertEqual(collector.result[location]["cumulative_calls"], 3)
|
||||||
|
self.assertEqual(collector.result[location]["direct_calls"], 3)
|
||||||
|
|
||||||
|
def test_pstats_collector_mixed_recursive_and_nonrecursive(self):
|
||||||
|
"""Test a call stack with both recursive and non-recursive functions."""
|
||||||
|
collector = PstatsCollector(sample_interval_usec=1000)
|
||||||
|
|
||||||
|
# Stack: main -> foo (recursive x3) -> bar
|
||||||
|
frames = [
|
||||||
|
MockInterpreterInfo(
|
||||||
|
0,
|
||||||
|
[
|
||||||
|
MockThreadInfo(
|
||||||
|
1,
|
||||||
|
[
|
||||||
|
MockFrameInfo("test.py", 50, "bar"), # top of stack
|
||||||
|
MockFrameInfo("test.py", 20, "foo"), # recursive
|
||||||
|
MockFrameInfo("test.py", 20, "foo"), # recursive
|
||||||
|
MockFrameInfo("test.py", 20, "foo"), # recursive
|
||||||
|
MockFrameInfo("test.py", 10, "main"), # bottom
|
||||||
|
],
|
||||||
|
)
|
||||||
|
],
|
||||||
|
)
|
||||||
|
]
|
||||||
|
collector.collect(frames)
|
||||||
|
|
||||||
|
# bar: 1 cumulative (in stack), 1 direct (top)
|
||||||
|
self.assertEqual(collector.result[("test.py", 50, "bar")]["cumulative_calls"], 1)
|
||||||
|
self.assertEqual(collector.result[("test.py", 50, "bar")]["direct_calls"], 1)
|
||||||
|
|
||||||
|
# foo: 1 cumulative (counted once despite 3 occurrences), 0 direct
|
||||||
|
self.assertEqual(collector.result[("test.py", 20, "foo")]["cumulative_calls"], 1)
|
||||||
|
self.assertEqual(collector.result[("test.py", 20, "foo")]["direct_calls"], 0)
|
||||||
|
|
||||||
|
# main: 1 cumulative, 0 direct
|
||||||
|
self.assertEqual(collector.result[("test.py", 10, "main")]["cumulative_calls"], 1)
|
||||||
|
self.assertEqual(collector.result[("test.py", 10, "main")]["direct_calls"], 0)
|
||||||
|
|
||||||
|
def test_pstats_collector_cumulative_percentage_cannot_exceed_100(self):
|
||||||
|
"""Test that cumulative percentage stays <= 100% even with deep recursion."""
|
||||||
|
collector = PstatsCollector(sample_interval_usec=1000000) # 1 second for easy math
|
||||||
|
|
||||||
|
# Collect 10 samples, each with recursive function at depth 100
|
||||||
|
for _ in range(10):
|
||||||
|
frames = [
|
||||||
|
MockInterpreterInfo(
|
||||||
|
0,
|
||||||
|
[
|
||||||
|
MockThreadInfo(
|
||||||
|
1,
|
||||||
|
[MockFrameInfo("test.py", 10, "deep_recursive")] * 100,
|
||||||
|
)
|
||||||
|
],
|
||||||
|
)
|
||||||
|
]
|
||||||
|
collector.collect(frames)
|
||||||
|
|
||||||
|
location = ("test.py", 10, "deep_recursive")
|
||||||
|
# Cumulative calls should be 10 (number of samples), not 1000
|
||||||
|
self.assertEqual(collector.result[location]["cumulative_calls"], 10)
|
||||||
|
|
||||||
|
# Verify stats calculation gives correct percentage
|
||||||
|
collector.create_stats()
|
||||||
|
stats = collector.stats[location]
|
||||||
|
# stats format: (direct_calls, cumulative_calls, total_time, cumulative_time, callers)
|
||||||
|
cumulative_calls = stats[1]
|
||||||
|
self.assertEqual(cumulative_calls, 10)
|
||||||
|
|
||||||
|
def test_pstats_collector_different_lines_same_function_counted_separately(self):
|
||||||
|
"""Test that different line numbers in same function are tracked separately."""
|
||||||
|
collector = PstatsCollector(sample_interval_usec=1000)
|
||||||
|
|
||||||
|
# Function with multiple line numbers (e.g., different call sites within recursion)
|
||||||
|
frames = [
|
||||||
|
MockInterpreterInfo(
|
||||||
|
0,
|
||||||
|
[
|
||||||
|
MockThreadInfo(
|
||||||
|
1,
|
||||||
|
[
|
||||||
|
MockFrameInfo("test.py", 15, "func"), # line 15
|
||||||
|
MockFrameInfo("test.py", 12, "func"), # line 12
|
||||||
|
MockFrameInfo("test.py", 15, "func"), # line 15 again
|
||||||
|
MockFrameInfo("test.py", 10, "func"), # line 10
|
||||||
|
],
|
||||||
|
)
|
||||||
|
],
|
||||||
|
)
|
||||||
|
]
|
||||||
|
collector.collect(frames)
|
||||||
|
|
||||||
|
# Each unique (file, line, func) should be counted once
|
||||||
|
self.assertEqual(collector.result[("test.py", 15, "func")]["cumulative_calls"], 1)
|
||||||
|
self.assertEqual(collector.result[("test.py", 12, "func")]["cumulative_calls"], 1)
|
||||||
|
self.assertEqual(collector.result[("test.py", 10, "func")]["cumulative_calls"], 1)
|
||||||
|
|
|
||||||
|
|
@ -118,16 +118,17 @@ def test_recursive_function_call_counting(self):
|
||||||
self.assertIn(fib_key, collector.stats)
|
self.assertIn(fib_key, collector.stats)
|
||||||
self.assertIn(main_key, collector.stats)
|
self.assertIn(main_key, collector.stats)
|
||||||
|
|
||||||
# Fibonacci should have many calls due to recursion
|
# Fibonacci: counted once per sample, not per occurrence
|
||||||
fib_stats = collector.stats[fib_key]
|
fib_stats = collector.stats[fib_key]
|
||||||
direct_calls, cumulative_calls, tt, ct, callers = fib_stats
|
direct_calls, cumulative_calls, tt, ct, callers = fib_stats
|
||||||
|
|
||||||
# Should have recorded multiple calls (9 total appearances in samples)
|
# Should count 3 (present in 3 samples), not 9 (total occurrences)
|
||||||
self.assertEqual(cumulative_calls, 9)
|
self.assertEqual(cumulative_calls, 3)
|
||||||
self.assertGreater(tt, 0) # Should have some total time
|
self.assertEqual(direct_calls, 3) # Top of stack in all samples
|
||||||
self.assertGreater(ct, 0) # Should have some cumulative time
|
self.assertGreater(tt, 0)
|
||||||
|
self.assertGreater(ct, 0)
|
||||||
|
|
||||||
# Main should have fewer calls
|
# Main should also have 3 cumulative calls (in all 3 samples)
|
||||||
main_stats = collector.stats[main_key]
|
main_stats = collector.stats[main_key]
|
||||||
main_direct_calls, main_cumulative_calls = main_stats[0], main_stats[1]
|
main_direct_calls, main_cumulative_calls = main_stats[0], main_stats[1]
|
||||||
self.assertEqual(main_direct_calls, 0) # Never directly executing
|
self.assertEqual(main_direct_calls, 0) # Never directly executing
|
||||||
|
|
|
||||||
|
|
@ -157,6 +157,70 @@ def test_process_frames_multiple_threads(self):
|
||||||
)
|
)
|
||||||
self.assertNotIn(loc1, collector.per_thread_data[456].result)
|
self.assertNotIn(loc1, collector.per_thread_data[456].result)
|
||||||
|
|
||||||
|
def test_process_recursive_frames_counted_once(self):
|
||||||
|
"""Test that recursive functions are counted once per sample."""
|
||||||
|
collector = LiveStatsCollector(1000)
|
||||||
|
# Simulate recursive function appearing 5 times in stack
|
||||||
|
frames = [
|
||||||
|
MockFrameInfo("test.py", 10, "recursive_func"),
|
||||||
|
MockFrameInfo("test.py", 10, "recursive_func"),
|
||||||
|
MockFrameInfo("test.py", 10, "recursive_func"),
|
||||||
|
MockFrameInfo("test.py", 10, "recursive_func"),
|
||||||
|
MockFrameInfo("test.py", 10, "recursive_func"),
|
||||||
|
]
|
||||||
|
collector.process_frames(frames)
|
||||||
|
|
||||||
|
location = ("test.py", 10, "recursive_func")
|
||||||
|
# Should count as 1 cumulative (present in 1 sample), not 5
|
||||||
|
self.assertEqual(collector.result[location]["cumulative_calls"], 1)
|
||||||
|
self.assertEqual(collector.result[location]["direct_calls"], 1)
|
||||||
|
|
||||||
|
def test_process_recursive_frames_multiple_samples(self):
|
||||||
|
"""Test cumulative counting across multiple samples with recursion."""
|
||||||
|
collector = LiveStatsCollector(1000)
|
||||||
|
|
||||||
|
# Sample 1: depth 3
|
||||||
|
frames1 = [
|
||||||
|
MockFrameInfo("test.py", 10, "recursive_func"),
|
||||||
|
MockFrameInfo("test.py", 10, "recursive_func"),
|
||||||
|
MockFrameInfo("test.py", 10, "recursive_func"),
|
||||||
|
]
|
||||||
|
# Sample 2: depth 2
|
||||||
|
frames2 = [
|
||||||
|
MockFrameInfo("test.py", 10, "recursive_func"),
|
||||||
|
MockFrameInfo("test.py", 10, "recursive_func"),
|
||||||
|
]
|
||||||
|
|
||||||
|
collector.process_frames(frames1)
|
||||||
|
collector.process_frames(frames2)
|
||||||
|
|
||||||
|
location = ("test.py", 10, "recursive_func")
|
||||||
|
# Should count as 2 (present in 2 samples), not 5
|
||||||
|
self.assertEqual(collector.result[location]["cumulative_calls"], 2)
|
||||||
|
self.assertEqual(collector.result[location]["direct_calls"], 2)
|
||||||
|
|
||||||
|
def test_process_mixed_recursive_nonrecursive(self):
|
||||||
|
"""Test stack with both recursive and non-recursive functions."""
|
||||||
|
collector = LiveStatsCollector(1000)
|
||||||
|
|
||||||
|
# Stack: main -> foo (recursive x3) -> bar
|
||||||
|
frames = [
|
||||||
|
MockFrameInfo("test.py", 50, "bar"),
|
||||||
|
MockFrameInfo("test.py", 20, "foo"),
|
||||||
|
MockFrameInfo("test.py", 20, "foo"),
|
||||||
|
MockFrameInfo("test.py", 20, "foo"),
|
||||||
|
MockFrameInfo("test.py", 10, "main"),
|
||||||
|
]
|
||||||
|
collector.process_frames(frames)
|
||||||
|
|
||||||
|
# foo: 1 cumulative despite 3 occurrences
|
||||||
|
self.assertEqual(collector.result[("test.py", 20, "foo")]["cumulative_calls"], 1)
|
||||||
|
self.assertEqual(collector.result[("test.py", 20, "foo")]["direct_calls"], 0)
|
||||||
|
|
||||||
|
# bar and main: 1 cumulative each
|
||||||
|
self.assertEqual(collector.result[("test.py", 50, "bar")]["cumulative_calls"], 1)
|
||||||
|
self.assertEqual(collector.result[("test.py", 10, "main")]["cumulative_calls"], 1)
|
||||||
|
|
||||||
|
|
||||||
class TestLiveStatsCollectorCollect(unittest.TestCase):
|
class TestLiveStatsCollectorCollect(unittest.TestCase):
|
||||||
"""Tests for the collect method."""
|
"""Tests for the collect method."""
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,7 @@
|
||||||
|
Fix cumulative percentage calculation for recursive functions in the new
|
||||||
|
sampling profiler. When profiling recursive functions, cumulative statistics
|
||||||
|
(cumul%, cumtime) could exceed 100% because each recursive frame in a stack
|
||||||
|
was counted separately. For example, a function recursing 500 times in every
|
||||||
|
sample would show 50000% cumulative presence. The fix deduplicates locations
|
||||||
|
within each sample so cumulative stats correctly represent "percentage of
|
||||||
|
samples where this function was on the stack". Patch by Pablo Galindo.
|
||||||
Loading…
Add table
Add a link
Reference in a new issue