mirror of
https://github.com/python/cpython.git
synced 2025-12-08 06:10:17 +00:00
gh-135953: Add GIL contention markers to sampling profiler Gecko format (#139485)
This commit enhances the Gecko format reporter in the sampling profiler to include markers for GIL acquisition events.
This commit is contained in:
parent
994ab5c922
commit
89a914c58d
9 changed files with 627 additions and 82 deletions
|
|
@ -1,17 +1,14 @@
|
|||
from abc import ABC, abstractmethod
|
||||
|
||||
# Enums are slow
|
||||
THREAD_STATE_RUNNING = 0
|
||||
THREAD_STATE_IDLE = 1
|
||||
THREAD_STATE_GIL_WAIT = 2
|
||||
THREAD_STATE_UNKNOWN = 3
|
||||
|
||||
STATUS = {
|
||||
THREAD_STATE_RUNNING: "running",
|
||||
THREAD_STATE_IDLE: "idle",
|
||||
THREAD_STATE_GIL_WAIT: "gil_wait",
|
||||
THREAD_STATE_UNKNOWN: "unknown",
|
||||
}
|
||||
# Thread status flags
|
||||
try:
|
||||
from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED
|
||||
except ImportError:
|
||||
# Fallback for tests or when module is not available
|
||||
THREAD_STATUS_HAS_GIL = (1 << 0)
|
||||
THREAD_STATUS_ON_CPU = (1 << 1)
|
||||
THREAD_STATUS_UNKNOWN = (1 << 2)
|
||||
THREAD_STATUS_GIL_REQUESTED = (1 << 3)
|
||||
|
||||
class Collector(ABC):
|
||||
@abstractmethod
|
||||
|
|
@ -26,8 +23,14 @@ def _iter_all_frames(self, stack_frames, skip_idle=False):
|
|||
"""Iterate over all frame stacks from all interpreters and threads."""
|
||||
for interpreter_info in stack_frames:
|
||||
for thread_info in interpreter_info.threads:
|
||||
if skip_idle and thread_info.status != THREAD_STATE_RUNNING:
|
||||
continue
|
||||
# skip_idle now means: skip if thread is not actively running
|
||||
# A thread is "active" if it has the GIL OR is on CPU
|
||||
if skip_idle:
|
||||
status_flags = thread_info.status
|
||||
has_gil = bool(status_flags & THREAD_STATUS_HAS_GIL)
|
||||
on_cpu = bool(status_flags & THREAD_STATUS_ON_CPU)
|
||||
if not (has_gil or on_cpu):
|
||||
continue
|
||||
frames = thread_info.frame_info
|
||||
if frames:
|
||||
yield frames, thread_info.thread_id
|
||||
|
|
|
|||
|
|
@ -1,9 +1,20 @@
|
|||
import itertools
|
||||
import json
|
||||
import os
|
||||
import platform
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
|
||||
from .collector import Collector, THREAD_STATE_RUNNING
|
||||
from .collector import Collector
|
||||
try:
|
||||
from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED
|
||||
except ImportError:
|
||||
# Fallback if module not available (shouldn't happen in normal use)
|
||||
THREAD_STATUS_HAS_GIL = (1 << 0)
|
||||
THREAD_STATUS_ON_CPU = (1 << 1)
|
||||
THREAD_STATUS_UNKNOWN = (1 << 2)
|
||||
THREAD_STATUS_GIL_REQUESTED = (1 << 3)
|
||||
|
||||
|
||||
# Categories matching Firefox Profiler expectations
|
||||
|
|
@ -11,14 +22,20 @@
|
|||
{"name": "Other", "color": "grey", "subcategories": ["Other"]},
|
||||
{"name": "Python", "color": "yellow", "subcategories": ["Other"]},
|
||||
{"name": "Native", "color": "blue", "subcategories": ["Other"]},
|
||||
{"name": "Idle", "color": "transparent", "subcategories": ["Other"]},
|
||||
{"name": "GC", "color": "orange", "subcategories": ["Other"]},
|
||||
{"name": "GIL", "color": "green", "subcategories": ["Other"]},
|
||||
{"name": "CPU", "color": "purple", "subcategories": ["Other"]},
|
||||
{"name": "Code Type", "color": "red", "subcategories": ["Other"]},
|
||||
]
|
||||
|
||||
# Category indices
|
||||
CATEGORY_OTHER = 0
|
||||
CATEGORY_PYTHON = 1
|
||||
CATEGORY_NATIVE = 2
|
||||
CATEGORY_IDLE = 3
|
||||
CATEGORY_GC = 3
|
||||
CATEGORY_GIL = 4
|
||||
CATEGORY_CPU = 5
|
||||
CATEGORY_CODE_TYPE = 6
|
||||
|
||||
# Subcategory indices
|
||||
DEFAULT_SUBCATEGORY = 0
|
||||
|
|
@ -58,6 +75,56 @@ def __init__(self, *, skip_idle=False):
|
|||
self.last_sample_time = 0
|
||||
self.interval = 1.0 # Will be calculated from actual sampling
|
||||
|
||||
# State tracking for interval markers (tid -> start_time)
|
||||
self.has_gil_start = {} # Thread has the GIL
|
||||
self.no_gil_start = {} # Thread doesn't have the GIL
|
||||
self.on_cpu_start = {} # Thread is running on CPU
|
||||
self.off_cpu_start = {} # Thread is off CPU
|
||||
self.python_code_start = {} # Thread running Python code (has GIL)
|
||||
self.native_code_start = {} # Thread running native code (on CPU without GIL)
|
||||
self.gil_wait_start = {} # Thread waiting for GIL
|
||||
|
||||
# GC event tracking: track GC start time per thread
|
||||
self.gc_start_per_thread = {} # tid -> start_time
|
||||
|
||||
# Track which threads have been initialized for state tracking
|
||||
self.initialized_threads = set()
|
||||
|
||||
def _track_state_transition(self, tid, condition, active_dict, inactive_dict,
|
||||
active_name, inactive_name, category, current_time):
|
||||
"""Track binary state transitions and emit markers.
|
||||
|
||||
Args:
|
||||
tid: Thread ID
|
||||
condition: Whether the active state is true
|
||||
active_dict: Dict tracking start time of active state
|
||||
inactive_dict: Dict tracking start time of inactive state
|
||||
active_name: Name for active state marker
|
||||
inactive_name: Name for inactive state marker
|
||||
category: Gecko category for the markers
|
||||
current_time: Current timestamp
|
||||
"""
|
||||
# On first observation of a thread, just record the current state
|
||||
# without creating a marker (we don't know what the previous state was)
|
||||
if tid not in self.initialized_threads:
|
||||
if condition:
|
||||
active_dict[tid] = current_time
|
||||
else:
|
||||
inactive_dict[tid] = current_time
|
||||
return
|
||||
|
||||
# For already-initialized threads, track transitions
|
||||
if condition:
|
||||
active_dict.setdefault(tid, current_time)
|
||||
if tid in inactive_dict:
|
||||
self._add_marker(tid, inactive_name, inactive_dict.pop(tid),
|
||||
current_time, category)
|
||||
else:
|
||||
inactive_dict.setdefault(tid, current_time)
|
||||
if tid in active_dict:
|
||||
self._add_marker(tid, active_name, active_dict.pop(tid),
|
||||
current_time, category)
|
||||
|
||||
def collect(self, stack_frames):
|
||||
"""Collect a sample from stack frames."""
|
||||
current_time = (time.time() * 1000) - self.start_time
|
||||
|
|
@ -69,19 +136,12 @@ def collect(self, stack_frames):
|
|||
) / self.sample_count
|
||||
self.last_sample_time = current_time
|
||||
|
||||
# Process threads and track GC per thread
|
||||
for interpreter_info in stack_frames:
|
||||
for thread_info in interpreter_info.threads:
|
||||
if (
|
||||
self.skip_idle
|
||||
and thread_info.status != THREAD_STATE_RUNNING
|
||||
):
|
||||
continue
|
||||
|
||||
frames = thread_info.frame_info
|
||||
if not frames:
|
||||
continue
|
||||
|
||||
tid = thread_info.thread_id
|
||||
gc_collecting = thread_info.gc_collecting
|
||||
|
||||
# Initialize thread if needed
|
||||
if tid not in self.threads:
|
||||
|
|
@ -89,6 +149,80 @@ def collect(self, stack_frames):
|
|||
|
||||
thread_data = self.threads[tid]
|
||||
|
||||
# Decode status flags
|
||||
status_flags = thread_info.status
|
||||
has_gil = bool(status_flags & THREAD_STATUS_HAS_GIL)
|
||||
on_cpu = bool(status_flags & THREAD_STATUS_ON_CPU)
|
||||
gil_requested = bool(status_flags & THREAD_STATUS_GIL_REQUESTED)
|
||||
|
||||
# Track GIL possession (Has GIL / No GIL)
|
||||
self._track_state_transition(
|
||||
tid, has_gil, self.has_gil_start, self.no_gil_start,
|
||||
"Has GIL", "No GIL", CATEGORY_GIL, current_time
|
||||
)
|
||||
|
||||
# Track CPU state (On CPU / Off CPU)
|
||||
self._track_state_transition(
|
||||
tid, on_cpu, self.on_cpu_start, self.off_cpu_start,
|
||||
"On CPU", "Off CPU", CATEGORY_CPU, current_time
|
||||
)
|
||||
|
||||
# Track code type (Python Code / Native Code)
|
||||
# This is tri-state: Python (has_gil), Native (on_cpu without gil), or Neither
|
||||
if has_gil:
|
||||
self._track_state_transition(
|
||||
tid, True, self.python_code_start, self.native_code_start,
|
||||
"Python Code", "Native Code", CATEGORY_CODE_TYPE, current_time
|
||||
)
|
||||
elif on_cpu:
|
||||
self._track_state_transition(
|
||||
tid, True, self.native_code_start, self.python_code_start,
|
||||
"Native Code", "Python Code", CATEGORY_CODE_TYPE, current_time
|
||||
)
|
||||
else:
|
||||
# Thread is idle (neither has GIL nor on CPU) - close any open code markers
|
||||
# This handles the third state that _track_state_transition doesn't cover
|
||||
if tid in self.initialized_threads:
|
||||
if tid in self.python_code_start:
|
||||
self._add_marker(tid, "Python Code", self.python_code_start.pop(tid),
|
||||
current_time, CATEGORY_CODE_TYPE)
|
||||
if tid in self.native_code_start:
|
||||
self._add_marker(tid, "Native Code", self.native_code_start.pop(tid),
|
||||
current_time, CATEGORY_CODE_TYPE)
|
||||
|
||||
# Track "Waiting for GIL" intervals (one-sided tracking)
|
||||
if gil_requested:
|
||||
self.gil_wait_start.setdefault(tid, current_time)
|
||||
elif tid in self.gil_wait_start:
|
||||
self._add_marker(tid, "Waiting for GIL", self.gil_wait_start.pop(tid),
|
||||
current_time, CATEGORY_GIL)
|
||||
|
||||
# Track GC events - attribute to all threads that hold the GIL during GC
|
||||
# (GC is interpreter-wide but runs on whichever thread(s) have the GIL)
|
||||
# If GIL switches during GC, multiple threads will get GC markers
|
||||
if gc_collecting and has_gil:
|
||||
# Start GC marker if not already started for this thread
|
||||
if tid not in self.gc_start_per_thread:
|
||||
self.gc_start_per_thread[tid] = current_time
|
||||
elif tid in self.gc_start_per_thread:
|
||||
# End GC marker if it was running for this thread
|
||||
# (either GC finished or thread lost GIL)
|
||||
self._add_marker(tid, "GC Collecting", self.gc_start_per_thread.pop(tid),
|
||||
current_time, CATEGORY_GC)
|
||||
|
||||
# Mark thread as initialized after processing all state transitions
|
||||
self.initialized_threads.add(tid)
|
||||
|
||||
# Categorize: idle if neither has GIL nor on CPU
|
||||
is_idle = not has_gil and not on_cpu
|
||||
|
||||
# Skip idle threads if skip_idle is enabled
|
||||
if self.skip_idle and is_idle:
|
||||
continue
|
||||
|
||||
if not frames:
|
||||
continue
|
||||
|
||||
# Process the stack
|
||||
stack_index = self._process_stack(thread_data, frames)
|
||||
|
||||
|
|
@ -102,7 +236,6 @@ def collect(self, stack_frames):
|
|||
|
||||
def _create_thread(self, tid):
|
||||
"""Create a new thread structure with processed profile format."""
|
||||
import threading
|
||||
|
||||
# Determine if this is the main thread
|
||||
try:
|
||||
|
|
@ -181,7 +314,7 @@ def _create_thread(self, tid):
|
|||
"functionSize": [],
|
||||
"length": 0,
|
||||
},
|
||||
# Markers - processed format
|
||||
# Markers - processed format (arrays)
|
||||
"markers": {
|
||||
"data": [],
|
||||
"name": [],
|
||||
|
|
@ -215,6 +348,27 @@ def _intern_string(self, s):
|
|||
self.global_string_map[s] = idx
|
||||
return idx
|
||||
|
||||
def _add_marker(self, tid, name, start_time, end_time, category):
|
||||
"""Add an interval marker for a specific thread."""
|
||||
if tid not in self.threads:
|
||||
return
|
||||
|
||||
thread_data = self.threads[tid]
|
||||
duration = end_time - start_time
|
||||
|
||||
name_idx = self._intern_string(name)
|
||||
markers = thread_data["markers"]
|
||||
markers["name"].append(name_idx)
|
||||
markers["startTime"].append(start_time)
|
||||
markers["endTime"].append(end_time)
|
||||
markers["phase"].append(1) # 1 = interval marker
|
||||
markers["category"].append(category)
|
||||
markers["data"].append({
|
||||
"type": name.replace(" ", ""),
|
||||
"duration": duration,
|
||||
"tid": tid
|
||||
})
|
||||
|
||||
def _process_stack(self, thread_data, frames):
|
||||
"""Process a stack and return the stack index."""
|
||||
if not frames:
|
||||
|
|
@ -383,15 +537,63 @@ def _get_or_create_frame(self, thread_data, func_idx, lineno):
|
|||
frame_cache[frame_key] = frame_idx
|
||||
return frame_idx
|
||||
|
||||
def _finalize_markers(self):
|
||||
"""Close any open markers at the end of profiling."""
|
||||
end_time = self.last_sample_time
|
||||
|
||||
# Close all open markers for each thread using a generic approach
|
||||
marker_states = [
|
||||
(self.has_gil_start, "Has GIL", CATEGORY_GIL),
|
||||
(self.no_gil_start, "No GIL", CATEGORY_GIL),
|
||||
(self.on_cpu_start, "On CPU", CATEGORY_CPU),
|
||||
(self.off_cpu_start, "Off CPU", CATEGORY_CPU),
|
||||
(self.python_code_start, "Python Code", CATEGORY_CODE_TYPE),
|
||||
(self.native_code_start, "Native Code", CATEGORY_CODE_TYPE),
|
||||
(self.gil_wait_start, "Waiting for GIL", CATEGORY_GIL),
|
||||
(self.gc_start_per_thread, "GC Collecting", CATEGORY_GC),
|
||||
]
|
||||
|
||||
for state_dict, marker_name, category in marker_states:
|
||||
for tid in list(state_dict.keys()):
|
||||
self._add_marker(tid, marker_name, state_dict[tid], end_time, category)
|
||||
del state_dict[tid]
|
||||
|
||||
def export(self, filename):
|
||||
"""Export the profile to a Gecko JSON file."""
|
||||
|
||||
if self.sample_count > 0 and self.last_sample_time > 0:
|
||||
self.interval = self.last_sample_time / self.sample_count
|
||||
|
||||
profile = self._build_profile()
|
||||
# Spinner for progress indication
|
||||
spinner = itertools.cycle(['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'])
|
||||
stop_spinner = threading.Event()
|
||||
|
||||
with open(filename, "w") as f:
|
||||
json.dump(profile, f, separators=(",", ":"))
|
||||
def spin():
|
||||
message = 'Building Gecko profile...'
|
||||
while not stop_spinner.is_set():
|
||||
sys.stderr.write(f'\r{next(spinner)} {message}')
|
||||
sys.stderr.flush()
|
||||
time.sleep(0.1)
|
||||
# Clear the spinner line
|
||||
sys.stderr.write('\r' + ' ' * (len(message) + 3) + '\r')
|
||||
sys.stderr.flush()
|
||||
|
||||
spinner_thread = threading.Thread(target=spin, daemon=True)
|
||||
spinner_thread.start()
|
||||
|
||||
try:
|
||||
# Finalize any open markers before building profile
|
||||
self._finalize_markers()
|
||||
|
||||
profile = self._build_profile()
|
||||
|
||||
with open(filename, "w") as f:
|
||||
json.dump(profile, f, separators=(",", ":"))
|
||||
finally:
|
||||
stop_spinner.set()
|
||||
spinner_thread.join(timeout=1.0)
|
||||
# Small delay to ensure the clear happens
|
||||
time.sleep(0.01)
|
||||
|
||||
print(f"Gecko profile written to {filename}")
|
||||
print(
|
||||
|
|
@ -416,6 +618,7 @@ def _build_profile(self):
|
|||
frame_table["length"] = len(frame_table["func"])
|
||||
func_table["length"] = len(func_table["name"])
|
||||
resource_table["length"] = len(resource_table["name"])
|
||||
thread_data["markers"]["length"] = len(thread_data["markers"]["name"])
|
||||
|
||||
# Clean up internal caches
|
||||
del thread_data["_stackCache"]
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
PROFILING_MODE_WALL = 0
|
||||
PROFILING_MODE_CPU = 1
|
||||
PROFILING_MODE_GIL = 2
|
||||
PROFILING_MODE_ALL = 3 # Combines GIL + CPU checks
|
||||
|
||||
|
||||
def _parse_mode(mode_string):
|
||||
|
|
@ -136,18 +137,20 @@ def _run_with_sync(original_cmd):
|
|||
|
||||
|
||||
class SampleProfiler:
|
||||
def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL):
|
||||
def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL, skip_non_matching_threads=True):
|
||||
self.pid = pid
|
||||
self.sample_interval_usec = sample_interval_usec
|
||||
self.all_threads = all_threads
|
||||
if _FREE_THREADED_BUILD:
|
||||
self.unwinder = _remote_debugging.RemoteUnwinder(
|
||||
self.pid, all_threads=self.all_threads, mode=mode
|
||||
self.pid, all_threads=self.all_threads, mode=mode,
|
||||
skip_non_matching_threads=skip_non_matching_threads
|
||||
)
|
||||
else:
|
||||
only_active_threads = bool(self.all_threads)
|
||||
self.unwinder = _remote_debugging.RemoteUnwinder(
|
||||
self.pid, only_active_thread=only_active_threads, mode=mode
|
||||
self.pid, only_active_thread=only_active_threads, mode=mode,
|
||||
skip_non_matching_threads=skip_non_matching_threads
|
||||
)
|
||||
# Track sample intervals and total sample count
|
||||
self.sample_intervals = deque(maxlen=100)
|
||||
|
|
@ -614,14 +617,21 @@ def sample(
|
|||
realtime_stats=False,
|
||||
mode=PROFILING_MODE_WALL,
|
||||
):
|
||||
# PROFILING_MODE_ALL implies no skipping at all
|
||||
if mode == PROFILING_MODE_ALL:
|
||||
skip_non_matching_threads = False
|
||||
skip_idle = False
|
||||
else:
|
||||
# Determine skip settings based on output format and mode
|
||||
skip_non_matching_threads = output_format != "gecko"
|
||||
skip_idle = mode != PROFILING_MODE_WALL
|
||||
|
||||
profiler = SampleProfiler(
|
||||
pid, sample_interval_usec, all_threads=all_threads, mode=mode
|
||||
pid, sample_interval_usec, all_threads=all_threads, mode=mode,
|
||||
skip_non_matching_threads=skip_non_matching_threads
|
||||
)
|
||||
profiler.realtime_stats = realtime_stats
|
||||
|
||||
# Determine skip_idle for collector compatibility
|
||||
skip_idle = mode != PROFILING_MODE_WALL
|
||||
|
||||
collector = None
|
||||
match output_format:
|
||||
case "pstats":
|
||||
|
|
@ -633,7 +643,8 @@ def sample(
|
|||
collector = FlamegraphCollector(skip_idle=skip_idle)
|
||||
filename = filename or f"flamegraph.{pid}.html"
|
||||
case "gecko":
|
||||
collector = GeckoCollector(skip_idle=skip_idle)
|
||||
# Gecko format never skips idle threads to show full thread states
|
||||
collector = GeckoCollector(skip_idle=False)
|
||||
filename = filename or f"gecko.{pid}.json"
|
||||
case _:
|
||||
raise ValueError(f"Invalid output format: {output_format}")
|
||||
|
|
@ -882,6 +893,10 @@ def main():
|
|||
if args.format in ("collapsed", "gecko"):
|
||||
_validate_collapsed_format_args(args, parser)
|
||||
|
||||
# Validate that --mode is not used with --gecko
|
||||
if args.format == "gecko" and args.mode != "wall":
|
||||
parser.error("--mode option is incompatible with --gecko format. Gecko format automatically uses ALL mode (GIL + CPU analysis).")
|
||||
|
||||
sort_value = args.sort if args.sort is not None else 2
|
||||
|
||||
if args.module is not None and not args.module:
|
||||
|
|
@ -900,7 +915,11 @@ def main():
|
|||
elif target_count > 1:
|
||||
parser.error("only one target type can be specified: -p/--pid, -m/--module, or script")
|
||||
|
||||
mode = _parse_mode(args.mode)
|
||||
# Use PROFILING_MODE_ALL for gecko format, otherwise parse user's choice
|
||||
if args.format == "gecko":
|
||||
mode = PROFILING_MODE_ALL
|
||||
else:
|
||||
mode = _parse_mode(args.mode)
|
||||
|
||||
if args.pid:
|
||||
sample(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue