mirror of
https://github.com/python/cpython.git
synced 2025-12-08 06:10:17 +00:00
gh-138122: Implement frame caching in RemoteUnwinder to reduce memory reads (#142137)
This PR implements frame caching in the RemoteUnwinder class to significantly reduce memory reads when profiling remote processes with deep call stacks. When cache_frames=True, the unwinder stores the frame chain from each sample and reuses unchanged portions in subsequent samples. Since most profiling samples capture similar call stacks (especially the parent frames), this optimization avoids repeatedly reading the same frame data from the target process. The implementation adds a last_profiled_frame field to the thread state that tracks where the previous sample stopped. On the next sample, if the current frame chain reaches this marker, the cached frames from that point onward are reused instead of being re-read from remote memory. The sampling profiler now enables frame caching by default.
This commit is contained in:
parent
332da6295f
commit
572c780aa8
24 changed files with 1855 additions and 142 deletions
|
|
@ -27,21 +27,24 @@
|
|||
|
||||
|
||||
class SampleProfiler:
|
||||
def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL, native=False, gc=True, skip_non_matching_threads=True):
|
||||
def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL, native=False, gc=True, skip_non_matching_threads=True, collect_stats=False):
|
||||
self.pid = pid
|
||||
self.sample_interval_usec = sample_interval_usec
|
||||
self.all_threads = all_threads
|
||||
self.mode = mode # Store mode for later use
|
||||
self.collect_stats = collect_stats
|
||||
if _FREE_THREADED_BUILD:
|
||||
self.unwinder = _remote_debugging.RemoteUnwinder(
|
||||
self.pid, all_threads=self.all_threads, mode=mode, native=native, gc=gc,
|
||||
skip_non_matching_threads=skip_non_matching_threads
|
||||
skip_non_matching_threads=skip_non_matching_threads, cache_frames=True,
|
||||
stats=collect_stats
|
||||
)
|
||||
else:
|
||||
only_active_threads = bool(self.all_threads)
|
||||
self.unwinder = _remote_debugging.RemoteUnwinder(
|
||||
self.pid, only_active_thread=only_active_threads, mode=mode, native=native, gc=gc,
|
||||
skip_non_matching_threads=skip_non_matching_threads
|
||||
skip_non_matching_threads=skip_non_matching_threads, cache_frames=True,
|
||||
stats=collect_stats
|
||||
)
|
||||
# Track sample intervals and total sample count
|
||||
self.sample_intervals = deque(maxlen=100)
|
||||
|
|
@ -129,6 +132,10 @@ def sample(self, collector, duration_sec=10, *, async_aware=False):
|
|||
print(f"Sample rate: {sample_rate:.2f} samples/sec")
|
||||
print(f"Error rate: {error_rate:.2f}%")
|
||||
|
||||
# Print unwinder stats if stats collection is enabled
|
||||
if self.collect_stats:
|
||||
self._print_unwinder_stats()
|
||||
|
||||
# Pass stats to flamegraph collector if it's the right type
|
||||
if hasattr(collector, 'set_stats'):
|
||||
collector.set_stats(self.sample_interval_usec, running_time, sample_rate, error_rate, missed_samples, mode=self.mode)
|
||||
|
|
@ -176,17 +183,100 @@ def _print_realtime_stats(self):
|
|||
(1.0 / min_hz) * 1_000_000 if min_hz > 0 else 0
|
||||
) # Max time = Min Hz
|
||||
|
||||
# Build cache stats string if stats collection is enabled
|
||||
cache_stats_str = ""
|
||||
if self.collect_stats:
|
||||
try:
|
||||
stats = self.unwinder.get_stats()
|
||||
hits = stats.get('frame_cache_hits', 0)
|
||||
partial = stats.get('frame_cache_partial_hits', 0)
|
||||
misses = stats.get('frame_cache_misses', 0)
|
||||
total = hits + partial + misses
|
||||
if total > 0:
|
||||
hit_pct = (hits + partial) / total * 100
|
||||
cache_stats_str = f" {ANSIColors.MAGENTA}Cache: {hit_pct:.1f}% ({hits}+{partial}/{misses}){ANSIColors.RESET}"
|
||||
except RuntimeError:
|
||||
pass
|
||||
|
||||
# Clear line and print stats
|
||||
print(
|
||||
f"\r\033[K{ANSIColors.BOLD_BLUE}Real-time sampling stats:{ANSIColors.RESET} "
|
||||
f"{ANSIColors.YELLOW}Mean: {mean_hz:.1f}Hz ({mean_us_per_sample:.2f}µs){ANSIColors.RESET} "
|
||||
f"{ANSIColors.GREEN}Min: {min_hz:.1f}Hz ({max_us_per_sample:.2f}µs){ANSIColors.RESET} "
|
||||
f"{ANSIColors.RED}Max: {max_hz:.1f}Hz ({min_us_per_sample:.2f}µs){ANSIColors.RESET} "
|
||||
f"{ANSIColors.CYAN}Samples: {self.total_samples}{ANSIColors.RESET}",
|
||||
f"\r\033[K{ANSIColors.BOLD_BLUE}Stats:{ANSIColors.RESET} "
|
||||
f"{ANSIColors.YELLOW}{mean_hz:.1f}Hz ({mean_us_per_sample:.1f}µs){ANSIColors.RESET} "
|
||||
f"{ANSIColors.GREEN}Min: {min_hz:.1f}Hz{ANSIColors.RESET} "
|
||||
f"{ANSIColors.RED}Max: {max_hz:.1f}Hz{ANSIColors.RESET} "
|
||||
f"{ANSIColors.CYAN}N={self.total_samples}{ANSIColors.RESET}"
|
||||
f"{cache_stats_str}",
|
||||
end="",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
def _print_unwinder_stats(self):
|
||||
"""Print unwinder statistics including cache performance."""
|
||||
try:
|
||||
stats = self.unwinder.get_stats()
|
||||
except RuntimeError:
|
||||
return # Stats not enabled
|
||||
|
||||
print(f"\n{ANSIColors.BOLD_BLUE}{'='*50}{ANSIColors.RESET}")
|
||||
print(f"{ANSIColors.BOLD_BLUE}Unwinder Statistics:{ANSIColors.RESET}")
|
||||
|
||||
# Frame cache stats
|
||||
total_samples = stats.get('total_samples', 0)
|
||||
frame_cache_hits = stats.get('frame_cache_hits', 0)
|
||||
frame_cache_partial_hits = stats.get('frame_cache_partial_hits', 0)
|
||||
frame_cache_misses = stats.get('frame_cache_misses', 0)
|
||||
total_lookups = frame_cache_hits + frame_cache_partial_hits + frame_cache_misses
|
||||
|
||||
# Calculate percentages
|
||||
hits_pct = (frame_cache_hits / total_lookups * 100) if total_lookups > 0 else 0
|
||||
partial_pct = (frame_cache_partial_hits / total_lookups * 100) if total_lookups > 0 else 0
|
||||
misses_pct = (frame_cache_misses / total_lookups * 100) if total_lookups > 0 else 0
|
||||
|
||||
print(f" {ANSIColors.CYAN}Frame Cache:{ANSIColors.RESET}")
|
||||
print(f" Total samples: {total_samples:,}")
|
||||
print(f" Full hits: {frame_cache_hits:,} ({ANSIColors.GREEN}{hits_pct:.1f}%{ANSIColors.RESET})")
|
||||
print(f" Partial hits: {frame_cache_partial_hits:,} ({ANSIColors.YELLOW}{partial_pct:.1f}%{ANSIColors.RESET})")
|
||||
print(f" Misses: {frame_cache_misses:,} ({ANSIColors.RED}{misses_pct:.1f}%{ANSIColors.RESET})")
|
||||
|
||||
# Frame read stats
|
||||
frames_from_cache = stats.get('frames_read_from_cache', 0)
|
||||
frames_from_memory = stats.get('frames_read_from_memory', 0)
|
||||
total_frames = frames_from_cache + frames_from_memory
|
||||
cache_frame_pct = (frames_from_cache / total_frames * 100) if total_frames > 0 else 0
|
||||
memory_frame_pct = (frames_from_memory / total_frames * 100) if total_frames > 0 else 0
|
||||
|
||||
print(f" {ANSIColors.CYAN}Frame Reads:{ANSIColors.RESET}")
|
||||
print(f" From cache: {frames_from_cache:,} ({ANSIColors.GREEN}{cache_frame_pct:.1f}%{ANSIColors.RESET})")
|
||||
print(f" From memory: {frames_from_memory:,} ({ANSIColors.RED}{memory_frame_pct:.1f}%{ANSIColors.RESET})")
|
||||
|
||||
# Code object cache stats
|
||||
code_hits = stats.get('code_object_cache_hits', 0)
|
||||
code_misses = stats.get('code_object_cache_misses', 0)
|
||||
total_code = code_hits + code_misses
|
||||
code_hits_pct = (code_hits / total_code * 100) if total_code > 0 else 0
|
||||
code_misses_pct = (code_misses / total_code * 100) if total_code > 0 else 0
|
||||
|
||||
print(f" {ANSIColors.CYAN}Code Object Cache:{ANSIColors.RESET}")
|
||||
print(f" Hits: {code_hits:,} ({ANSIColors.GREEN}{code_hits_pct:.1f}%{ANSIColors.RESET})")
|
||||
print(f" Misses: {code_misses:,} ({ANSIColors.RED}{code_misses_pct:.1f}%{ANSIColors.RESET})")
|
||||
|
||||
# Memory operations
|
||||
memory_reads = stats.get('memory_reads', 0)
|
||||
memory_bytes = stats.get('memory_bytes_read', 0)
|
||||
if memory_bytes >= 1024 * 1024:
|
||||
memory_str = f"{memory_bytes / (1024 * 1024):.1f} MB"
|
||||
elif memory_bytes >= 1024:
|
||||
memory_str = f"{memory_bytes / 1024:.1f} KB"
|
||||
else:
|
||||
memory_str = f"{memory_bytes} B"
|
||||
print(f" {ANSIColors.CYAN}Memory:{ANSIColors.RESET}")
|
||||
print(f" Read operations: {memory_reads:,} ({memory_str})")
|
||||
|
||||
# Stale invalidations
|
||||
stale_invalidations = stats.get('stale_cache_invalidations', 0)
|
||||
if stale_invalidations > 0:
|
||||
print(f" {ANSIColors.YELLOW}Stale cache invalidations: {stale_invalidations}{ANSIColors.RESET}")
|
||||
|
||||
|
||||
def sample(
|
||||
pid,
|
||||
|
|
@ -234,7 +324,8 @@ def sample(
|
|||
mode=mode,
|
||||
native=native,
|
||||
gc=gc,
|
||||
skip_non_matching_threads=skip_non_matching_threads
|
||||
skip_non_matching_threads=skip_non_matching_threads,
|
||||
collect_stats=realtime_stats,
|
||||
)
|
||||
profiler.realtime_stats = realtime_stats
|
||||
|
||||
|
|
@ -290,7 +381,8 @@ def sample_live(
|
|||
mode=mode,
|
||||
native=native,
|
||||
gc=gc,
|
||||
skip_non_matching_threads=skip_non_matching_threads
|
||||
skip_non_matching_threads=skip_non_matching_threads,
|
||||
collect_stats=realtime_stats,
|
||||
)
|
||||
profiler.realtime_stats = realtime_stats
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue