mirror of
https://github.com/python/cpython.git
synced 2025-12-08 06:10:17 +00:00
gh-135953: Add GIL contention markers to sampling profiler Gecko format (#139485)
This commit enhances the Gecko format reporter in the sampling profiler to include markers for GIL acquisition events.
This commit is contained in:
parent
994ab5c922
commit
89a914c58d
9 changed files with 627 additions and 82 deletions
|
|
@ -113,6 +113,9 @@ struct _ts {
|
||||||
/* Currently holds the GIL. Must be its own field to avoid data races */
|
/* Currently holds the GIL. Must be its own field to avoid data races */
|
||||||
int holds_gil;
|
int holds_gil;
|
||||||
|
|
||||||
|
/* Currently requesting the GIL */
|
||||||
|
int gil_requested;
|
||||||
|
|
||||||
int _whence;
|
int _whence;
|
||||||
|
|
||||||
/* Thread state (_Py_THREAD_ATTACHED, _Py_THREAD_DETACHED, _Py_THREAD_SUSPENDED).
|
/* Thread state (_Py_THREAD_ATTACHED, _Py_THREAD_DETACHED, _Py_THREAD_SUSPENDED).
|
||||||
|
|
|
||||||
|
|
@ -106,6 +106,8 @@ typedef struct _Py_DebugOffsets {
|
||||||
uint64_t native_thread_id;
|
uint64_t native_thread_id;
|
||||||
uint64_t datastack_chunk;
|
uint64_t datastack_chunk;
|
||||||
uint64_t status;
|
uint64_t status;
|
||||||
|
uint64_t holds_gil;
|
||||||
|
uint64_t gil_requested;
|
||||||
} thread_state;
|
} thread_state;
|
||||||
|
|
||||||
// InterpreterFrame offset;
|
// InterpreterFrame offset;
|
||||||
|
|
@ -273,6 +275,8 @@ typedef struct _Py_DebugOffsets {
|
||||||
.native_thread_id = offsetof(PyThreadState, native_thread_id), \
|
.native_thread_id = offsetof(PyThreadState, native_thread_id), \
|
||||||
.datastack_chunk = offsetof(PyThreadState, datastack_chunk), \
|
.datastack_chunk = offsetof(PyThreadState, datastack_chunk), \
|
||||||
.status = offsetof(PyThreadState, _status), \
|
.status = offsetof(PyThreadState, _status), \
|
||||||
|
.holds_gil = offsetof(PyThreadState, holds_gil), \
|
||||||
|
.gil_requested = offsetof(PyThreadState, gil_requested), \
|
||||||
}, \
|
}, \
|
||||||
.interpreter_frame = { \
|
.interpreter_frame = { \
|
||||||
.size = sizeof(_PyInterpreterFrame), \
|
.size = sizeof(_PyInterpreterFrame), \
|
||||||
|
|
|
||||||
|
|
@ -1,17 +1,14 @@
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
# Enums are slow
|
# Thread status flags
|
||||||
THREAD_STATE_RUNNING = 0
|
try:
|
||||||
THREAD_STATE_IDLE = 1
|
from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED
|
||||||
THREAD_STATE_GIL_WAIT = 2
|
except ImportError:
|
||||||
THREAD_STATE_UNKNOWN = 3
|
# Fallback for tests or when module is not available
|
||||||
|
THREAD_STATUS_HAS_GIL = (1 << 0)
|
||||||
STATUS = {
|
THREAD_STATUS_ON_CPU = (1 << 1)
|
||||||
THREAD_STATE_RUNNING: "running",
|
THREAD_STATUS_UNKNOWN = (1 << 2)
|
||||||
THREAD_STATE_IDLE: "idle",
|
THREAD_STATUS_GIL_REQUESTED = (1 << 3)
|
||||||
THREAD_STATE_GIL_WAIT: "gil_wait",
|
|
||||||
THREAD_STATE_UNKNOWN: "unknown",
|
|
||||||
}
|
|
||||||
|
|
||||||
class Collector(ABC):
|
class Collector(ABC):
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
|
|
@ -26,8 +23,14 @@ def _iter_all_frames(self, stack_frames, skip_idle=False):
|
||||||
"""Iterate over all frame stacks from all interpreters and threads."""
|
"""Iterate over all frame stacks from all interpreters and threads."""
|
||||||
for interpreter_info in stack_frames:
|
for interpreter_info in stack_frames:
|
||||||
for thread_info in interpreter_info.threads:
|
for thread_info in interpreter_info.threads:
|
||||||
if skip_idle and thread_info.status != THREAD_STATE_RUNNING:
|
# skip_idle now means: skip if thread is not actively running
|
||||||
continue
|
# A thread is "active" if it has the GIL OR is on CPU
|
||||||
|
if skip_idle:
|
||||||
|
status_flags = thread_info.status
|
||||||
|
has_gil = bool(status_flags & THREAD_STATUS_HAS_GIL)
|
||||||
|
on_cpu = bool(status_flags & THREAD_STATUS_ON_CPU)
|
||||||
|
if not (has_gil or on_cpu):
|
||||||
|
continue
|
||||||
frames = thread_info.frame_info
|
frames = thread_info.frame_info
|
||||||
if frames:
|
if frames:
|
||||||
yield frames, thread_info.thread_id
|
yield frames, thread_info.thread_id
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,20 @@
|
||||||
|
import itertools
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import platform
|
import platform
|
||||||
|
import sys
|
||||||
|
import threading
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from .collector import Collector, THREAD_STATE_RUNNING
|
from .collector import Collector
|
||||||
|
try:
|
||||||
|
from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED
|
||||||
|
except ImportError:
|
||||||
|
# Fallback if module not available (shouldn't happen in normal use)
|
||||||
|
THREAD_STATUS_HAS_GIL = (1 << 0)
|
||||||
|
THREAD_STATUS_ON_CPU = (1 << 1)
|
||||||
|
THREAD_STATUS_UNKNOWN = (1 << 2)
|
||||||
|
THREAD_STATUS_GIL_REQUESTED = (1 << 3)
|
||||||
|
|
||||||
|
|
||||||
# Categories matching Firefox Profiler expectations
|
# Categories matching Firefox Profiler expectations
|
||||||
|
|
@ -11,14 +22,20 @@
|
||||||
{"name": "Other", "color": "grey", "subcategories": ["Other"]},
|
{"name": "Other", "color": "grey", "subcategories": ["Other"]},
|
||||||
{"name": "Python", "color": "yellow", "subcategories": ["Other"]},
|
{"name": "Python", "color": "yellow", "subcategories": ["Other"]},
|
||||||
{"name": "Native", "color": "blue", "subcategories": ["Other"]},
|
{"name": "Native", "color": "blue", "subcategories": ["Other"]},
|
||||||
{"name": "Idle", "color": "transparent", "subcategories": ["Other"]},
|
{"name": "GC", "color": "orange", "subcategories": ["Other"]},
|
||||||
|
{"name": "GIL", "color": "green", "subcategories": ["Other"]},
|
||||||
|
{"name": "CPU", "color": "purple", "subcategories": ["Other"]},
|
||||||
|
{"name": "Code Type", "color": "red", "subcategories": ["Other"]},
|
||||||
]
|
]
|
||||||
|
|
||||||
# Category indices
|
# Category indices
|
||||||
CATEGORY_OTHER = 0
|
CATEGORY_OTHER = 0
|
||||||
CATEGORY_PYTHON = 1
|
CATEGORY_PYTHON = 1
|
||||||
CATEGORY_NATIVE = 2
|
CATEGORY_NATIVE = 2
|
||||||
CATEGORY_IDLE = 3
|
CATEGORY_GC = 3
|
||||||
|
CATEGORY_GIL = 4
|
||||||
|
CATEGORY_CPU = 5
|
||||||
|
CATEGORY_CODE_TYPE = 6
|
||||||
|
|
||||||
# Subcategory indices
|
# Subcategory indices
|
||||||
DEFAULT_SUBCATEGORY = 0
|
DEFAULT_SUBCATEGORY = 0
|
||||||
|
|
@ -58,6 +75,56 @@ def __init__(self, *, skip_idle=False):
|
||||||
self.last_sample_time = 0
|
self.last_sample_time = 0
|
||||||
self.interval = 1.0 # Will be calculated from actual sampling
|
self.interval = 1.0 # Will be calculated from actual sampling
|
||||||
|
|
||||||
|
# State tracking for interval markers (tid -> start_time)
|
||||||
|
self.has_gil_start = {} # Thread has the GIL
|
||||||
|
self.no_gil_start = {} # Thread doesn't have the GIL
|
||||||
|
self.on_cpu_start = {} # Thread is running on CPU
|
||||||
|
self.off_cpu_start = {} # Thread is off CPU
|
||||||
|
self.python_code_start = {} # Thread running Python code (has GIL)
|
||||||
|
self.native_code_start = {} # Thread running native code (on CPU without GIL)
|
||||||
|
self.gil_wait_start = {} # Thread waiting for GIL
|
||||||
|
|
||||||
|
# GC event tracking: track GC start time per thread
|
||||||
|
self.gc_start_per_thread = {} # tid -> start_time
|
||||||
|
|
||||||
|
# Track which threads have been initialized for state tracking
|
||||||
|
self.initialized_threads = set()
|
||||||
|
|
||||||
|
def _track_state_transition(self, tid, condition, active_dict, inactive_dict,
|
||||||
|
active_name, inactive_name, category, current_time):
|
||||||
|
"""Track binary state transitions and emit markers.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
tid: Thread ID
|
||||||
|
condition: Whether the active state is true
|
||||||
|
active_dict: Dict tracking start time of active state
|
||||||
|
inactive_dict: Dict tracking start time of inactive state
|
||||||
|
active_name: Name for active state marker
|
||||||
|
inactive_name: Name for inactive state marker
|
||||||
|
category: Gecko category for the markers
|
||||||
|
current_time: Current timestamp
|
||||||
|
"""
|
||||||
|
# On first observation of a thread, just record the current state
|
||||||
|
# without creating a marker (we don't know what the previous state was)
|
||||||
|
if tid not in self.initialized_threads:
|
||||||
|
if condition:
|
||||||
|
active_dict[tid] = current_time
|
||||||
|
else:
|
||||||
|
inactive_dict[tid] = current_time
|
||||||
|
return
|
||||||
|
|
||||||
|
# For already-initialized threads, track transitions
|
||||||
|
if condition:
|
||||||
|
active_dict.setdefault(tid, current_time)
|
||||||
|
if tid in inactive_dict:
|
||||||
|
self._add_marker(tid, inactive_name, inactive_dict.pop(tid),
|
||||||
|
current_time, category)
|
||||||
|
else:
|
||||||
|
inactive_dict.setdefault(tid, current_time)
|
||||||
|
if tid in active_dict:
|
||||||
|
self._add_marker(tid, active_name, active_dict.pop(tid),
|
||||||
|
current_time, category)
|
||||||
|
|
||||||
def collect(self, stack_frames):
|
def collect(self, stack_frames):
|
||||||
"""Collect a sample from stack frames."""
|
"""Collect a sample from stack frames."""
|
||||||
current_time = (time.time() * 1000) - self.start_time
|
current_time = (time.time() * 1000) - self.start_time
|
||||||
|
|
@ -69,19 +136,12 @@ def collect(self, stack_frames):
|
||||||
) / self.sample_count
|
) / self.sample_count
|
||||||
self.last_sample_time = current_time
|
self.last_sample_time = current_time
|
||||||
|
|
||||||
|
# Process threads and track GC per thread
|
||||||
for interpreter_info in stack_frames:
|
for interpreter_info in stack_frames:
|
||||||
for thread_info in interpreter_info.threads:
|
for thread_info in interpreter_info.threads:
|
||||||
if (
|
|
||||||
self.skip_idle
|
|
||||||
and thread_info.status != THREAD_STATE_RUNNING
|
|
||||||
):
|
|
||||||
continue
|
|
||||||
|
|
||||||
frames = thread_info.frame_info
|
frames = thread_info.frame_info
|
||||||
if not frames:
|
|
||||||
continue
|
|
||||||
|
|
||||||
tid = thread_info.thread_id
|
tid = thread_info.thread_id
|
||||||
|
gc_collecting = thread_info.gc_collecting
|
||||||
|
|
||||||
# Initialize thread if needed
|
# Initialize thread if needed
|
||||||
if tid not in self.threads:
|
if tid not in self.threads:
|
||||||
|
|
@ -89,6 +149,80 @@ def collect(self, stack_frames):
|
||||||
|
|
||||||
thread_data = self.threads[tid]
|
thread_data = self.threads[tid]
|
||||||
|
|
||||||
|
# Decode status flags
|
||||||
|
status_flags = thread_info.status
|
||||||
|
has_gil = bool(status_flags & THREAD_STATUS_HAS_GIL)
|
||||||
|
on_cpu = bool(status_flags & THREAD_STATUS_ON_CPU)
|
||||||
|
gil_requested = bool(status_flags & THREAD_STATUS_GIL_REQUESTED)
|
||||||
|
|
||||||
|
# Track GIL possession (Has GIL / No GIL)
|
||||||
|
self._track_state_transition(
|
||||||
|
tid, has_gil, self.has_gil_start, self.no_gil_start,
|
||||||
|
"Has GIL", "No GIL", CATEGORY_GIL, current_time
|
||||||
|
)
|
||||||
|
|
||||||
|
# Track CPU state (On CPU / Off CPU)
|
||||||
|
self._track_state_transition(
|
||||||
|
tid, on_cpu, self.on_cpu_start, self.off_cpu_start,
|
||||||
|
"On CPU", "Off CPU", CATEGORY_CPU, current_time
|
||||||
|
)
|
||||||
|
|
||||||
|
# Track code type (Python Code / Native Code)
|
||||||
|
# This is tri-state: Python (has_gil), Native (on_cpu without gil), or Neither
|
||||||
|
if has_gil:
|
||||||
|
self._track_state_transition(
|
||||||
|
tid, True, self.python_code_start, self.native_code_start,
|
||||||
|
"Python Code", "Native Code", CATEGORY_CODE_TYPE, current_time
|
||||||
|
)
|
||||||
|
elif on_cpu:
|
||||||
|
self._track_state_transition(
|
||||||
|
tid, True, self.native_code_start, self.python_code_start,
|
||||||
|
"Native Code", "Python Code", CATEGORY_CODE_TYPE, current_time
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Thread is idle (neither has GIL nor on CPU) - close any open code markers
|
||||||
|
# This handles the third state that _track_state_transition doesn't cover
|
||||||
|
if tid in self.initialized_threads:
|
||||||
|
if tid in self.python_code_start:
|
||||||
|
self._add_marker(tid, "Python Code", self.python_code_start.pop(tid),
|
||||||
|
current_time, CATEGORY_CODE_TYPE)
|
||||||
|
if tid in self.native_code_start:
|
||||||
|
self._add_marker(tid, "Native Code", self.native_code_start.pop(tid),
|
||||||
|
current_time, CATEGORY_CODE_TYPE)
|
||||||
|
|
||||||
|
# Track "Waiting for GIL" intervals (one-sided tracking)
|
||||||
|
if gil_requested:
|
||||||
|
self.gil_wait_start.setdefault(tid, current_time)
|
||||||
|
elif tid in self.gil_wait_start:
|
||||||
|
self._add_marker(tid, "Waiting for GIL", self.gil_wait_start.pop(tid),
|
||||||
|
current_time, CATEGORY_GIL)
|
||||||
|
|
||||||
|
# Track GC events - attribute to all threads that hold the GIL during GC
|
||||||
|
# (GC is interpreter-wide but runs on whichever thread(s) have the GIL)
|
||||||
|
# If GIL switches during GC, multiple threads will get GC markers
|
||||||
|
if gc_collecting and has_gil:
|
||||||
|
# Start GC marker if not already started for this thread
|
||||||
|
if tid not in self.gc_start_per_thread:
|
||||||
|
self.gc_start_per_thread[tid] = current_time
|
||||||
|
elif tid in self.gc_start_per_thread:
|
||||||
|
# End GC marker if it was running for this thread
|
||||||
|
# (either GC finished or thread lost GIL)
|
||||||
|
self._add_marker(tid, "GC Collecting", self.gc_start_per_thread.pop(tid),
|
||||||
|
current_time, CATEGORY_GC)
|
||||||
|
|
||||||
|
# Mark thread as initialized after processing all state transitions
|
||||||
|
self.initialized_threads.add(tid)
|
||||||
|
|
||||||
|
# Categorize: idle if neither has GIL nor on CPU
|
||||||
|
is_idle = not has_gil and not on_cpu
|
||||||
|
|
||||||
|
# Skip idle threads if skip_idle is enabled
|
||||||
|
if self.skip_idle and is_idle:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not frames:
|
||||||
|
continue
|
||||||
|
|
||||||
# Process the stack
|
# Process the stack
|
||||||
stack_index = self._process_stack(thread_data, frames)
|
stack_index = self._process_stack(thread_data, frames)
|
||||||
|
|
||||||
|
|
@ -102,7 +236,6 @@ def collect(self, stack_frames):
|
||||||
|
|
||||||
def _create_thread(self, tid):
|
def _create_thread(self, tid):
|
||||||
"""Create a new thread structure with processed profile format."""
|
"""Create a new thread structure with processed profile format."""
|
||||||
import threading
|
|
||||||
|
|
||||||
# Determine if this is the main thread
|
# Determine if this is the main thread
|
||||||
try:
|
try:
|
||||||
|
|
@ -181,7 +314,7 @@ def _create_thread(self, tid):
|
||||||
"functionSize": [],
|
"functionSize": [],
|
||||||
"length": 0,
|
"length": 0,
|
||||||
},
|
},
|
||||||
# Markers - processed format
|
# Markers - processed format (arrays)
|
||||||
"markers": {
|
"markers": {
|
||||||
"data": [],
|
"data": [],
|
||||||
"name": [],
|
"name": [],
|
||||||
|
|
@ -215,6 +348,27 @@ def _intern_string(self, s):
|
||||||
self.global_string_map[s] = idx
|
self.global_string_map[s] = idx
|
||||||
return idx
|
return idx
|
||||||
|
|
||||||
|
def _add_marker(self, tid, name, start_time, end_time, category):
|
||||||
|
"""Add an interval marker for a specific thread."""
|
||||||
|
if tid not in self.threads:
|
||||||
|
return
|
||||||
|
|
||||||
|
thread_data = self.threads[tid]
|
||||||
|
duration = end_time - start_time
|
||||||
|
|
||||||
|
name_idx = self._intern_string(name)
|
||||||
|
markers = thread_data["markers"]
|
||||||
|
markers["name"].append(name_idx)
|
||||||
|
markers["startTime"].append(start_time)
|
||||||
|
markers["endTime"].append(end_time)
|
||||||
|
markers["phase"].append(1) # 1 = interval marker
|
||||||
|
markers["category"].append(category)
|
||||||
|
markers["data"].append({
|
||||||
|
"type": name.replace(" ", ""),
|
||||||
|
"duration": duration,
|
||||||
|
"tid": tid
|
||||||
|
})
|
||||||
|
|
||||||
def _process_stack(self, thread_data, frames):
|
def _process_stack(self, thread_data, frames):
|
||||||
"""Process a stack and return the stack index."""
|
"""Process a stack and return the stack index."""
|
||||||
if not frames:
|
if not frames:
|
||||||
|
|
@ -383,15 +537,63 @@ def _get_or_create_frame(self, thread_data, func_idx, lineno):
|
||||||
frame_cache[frame_key] = frame_idx
|
frame_cache[frame_key] = frame_idx
|
||||||
return frame_idx
|
return frame_idx
|
||||||
|
|
||||||
|
def _finalize_markers(self):
|
||||||
|
"""Close any open markers at the end of profiling."""
|
||||||
|
end_time = self.last_sample_time
|
||||||
|
|
||||||
|
# Close all open markers for each thread using a generic approach
|
||||||
|
marker_states = [
|
||||||
|
(self.has_gil_start, "Has GIL", CATEGORY_GIL),
|
||||||
|
(self.no_gil_start, "No GIL", CATEGORY_GIL),
|
||||||
|
(self.on_cpu_start, "On CPU", CATEGORY_CPU),
|
||||||
|
(self.off_cpu_start, "Off CPU", CATEGORY_CPU),
|
||||||
|
(self.python_code_start, "Python Code", CATEGORY_CODE_TYPE),
|
||||||
|
(self.native_code_start, "Native Code", CATEGORY_CODE_TYPE),
|
||||||
|
(self.gil_wait_start, "Waiting for GIL", CATEGORY_GIL),
|
||||||
|
(self.gc_start_per_thread, "GC Collecting", CATEGORY_GC),
|
||||||
|
]
|
||||||
|
|
||||||
|
for state_dict, marker_name, category in marker_states:
|
||||||
|
for tid in list(state_dict.keys()):
|
||||||
|
self._add_marker(tid, marker_name, state_dict[tid], end_time, category)
|
||||||
|
del state_dict[tid]
|
||||||
|
|
||||||
def export(self, filename):
|
def export(self, filename):
|
||||||
"""Export the profile to a Gecko JSON file."""
|
"""Export the profile to a Gecko JSON file."""
|
||||||
|
|
||||||
if self.sample_count > 0 and self.last_sample_time > 0:
|
if self.sample_count > 0 and self.last_sample_time > 0:
|
||||||
self.interval = self.last_sample_time / self.sample_count
|
self.interval = self.last_sample_time / self.sample_count
|
||||||
|
|
||||||
profile = self._build_profile()
|
# Spinner for progress indication
|
||||||
|
spinner = itertools.cycle(['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'])
|
||||||
|
stop_spinner = threading.Event()
|
||||||
|
|
||||||
with open(filename, "w") as f:
|
def spin():
|
||||||
json.dump(profile, f, separators=(",", ":"))
|
message = 'Building Gecko profile...'
|
||||||
|
while not stop_spinner.is_set():
|
||||||
|
sys.stderr.write(f'\r{next(spinner)} {message}')
|
||||||
|
sys.stderr.flush()
|
||||||
|
time.sleep(0.1)
|
||||||
|
# Clear the spinner line
|
||||||
|
sys.stderr.write('\r' + ' ' * (len(message) + 3) + '\r')
|
||||||
|
sys.stderr.flush()
|
||||||
|
|
||||||
|
spinner_thread = threading.Thread(target=spin, daemon=True)
|
||||||
|
spinner_thread.start()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Finalize any open markers before building profile
|
||||||
|
self._finalize_markers()
|
||||||
|
|
||||||
|
profile = self._build_profile()
|
||||||
|
|
||||||
|
with open(filename, "w") as f:
|
||||||
|
json.dump(profile, f, separators=(",", ":"))
|
||||||
|
finally:
|
||||||
|
stop_spinner.set()
|
||||||
|
spinner_thread.join(timeout=1.0)
|
||||||
|
# Small delay to ensure the clear happens
|
||||||
|
time.sleep(0.01)
|
||||||
|
|
||||||
print(f"Gecko profile written to {filename}")
|
print(f"Gecko profile written to {filename}")
|
||||||
print(
|
print(
|
||||||
|
|
@ -416,6 +618,7 @@ def _build_profile(self):
|
||||||
frame_table["length"] = len(frame_table["func"])
|
frame_table["length"] = len(frame_table["func"])
|
||||||
func_table["length"] = len(func_table["name"])
|
func_table["length"] = len(func_table["name"])
|
||||||
resource_table["length"] = len(resource_table["name"])
|
resource_table["length"] = len(resource_table["name"])
|
||||||
|
thread_data["markers"]["length"] = len(thread_data["markers"]["name"])
|
||||||
|
|
||||||
# Clean up internal caches
|
# Clean up internal caches
|
||||||
del thread_data["_stackCache"]
|
del thread_data["_stackCache"]
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,7 @@
|
||||||
PROFILING_MODE_WALL = 0
|
PROFILING_MODE_WALL = 0
|
||||||
PROFILING_MODE_CPU = 1
|
PROFILING_MODE_CPU = 1
|
||||||
PROFILING_MODE_GIL = 2
|
PROFILING_MODE_GIL = 2
|
||||||
|
PROFILING_MODE_ALL = 3 # Combines GIL + CPU checks
|
||||||
|
|
||||||
|
|
||||||
def _parse_mode(mode_string):
|
def _parse_mode(mode_string):
|
||||||
|
|
@ -136,18 +137,20 @@ def _run_with_sync(original_cmd):
|
||||||
|
|
||||||
|
|
||||||
class SampleProfiler:
|
class SampleProfiler:
|
||||||
def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL):
|
def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL, skip_non_matching_threads=True):
|
||||||
self.pid = pid
|
self.pid = pid
|
||||||
self.sample_interval_usec = sample_interval_usec
|
self.sample_interval_usec = sample_interval_usec
|
||||||
self.all_threads = all_threads
|
self.all_threads = all_threads
|
||||||
if _FREE_THREADED_BUILD:
|
if _FREE_THREADED_BUILD:
|
||||||
self.unwinder = _remote_debugging.RemoteUnwinder(
|
self.unwinder = _remote_debugging.RemoteUnwinder(
|
||||||
self.pid, all_threads=self.all_threads, mode=mode
|
self.pid, all_threads=self.all_threads, mode=mode,
|
||||||
|
skip_non_matching_threads=skip_non_matching_threads
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
only_active_threads = bool(self.all_threads)
|
only_active_threads = bool(self.all_threads)
|
||||||
self.unwinder = _remote_debugging.RemoteUnwinder(
|
self.unwinder = _remote_debugging.RemoteUnwinder(
|
||||||
self.pid, only_active_thread=only_active_threads, mode=mode
|
self.pid, only_active_thread=only_active_threads, mode=mode,
|
||||||
|
skip_non_matching_threads=skip_non_matching_threads
|
||||||
)
|
)
|
||||||
# Track sample intervals and total sample count
|
# Track sample intervals and total sample count
|
||||||
self.sample_intervals = deque(maxlen=100)
|
self.sample_intervals = deque(maxlen=100)
|
||||||
|
|
@ -614,14 +617,21 @@ def sample(
|
||||||
realtime_stats=False,
|
realtime_stats=False,
|
||||||
mode=PROFILING_MODE_WALL,
|
mode=PROFILING_MODE_WALL,
|
||||||
):
|
):
|
||||||
|
# PROFILING_MODE_ALL implies no skipping at all
|
||||||
|
if mode == PROFILING_MODE_ALL:
|
||||||
|
skip_non_matching_threads = False
|
||||||
|
skip_idle = False
|
||||||
|
else:
|
||||||
|
# Determine skip settings based on output format and mode
|
||||||
|
skip_non_matching_threads = output_format != "gecko"
|
||||||
|
skip_idle = mode != PROFILING_MODE_WALL
|
||||||
|
|
||||||
profiler = SampleProfiler(
|
profiler = SampleProfiler(
|
||||||
pid, sample_interval_usec, all_threads=all_threads, mode=mode
|
pid, sample_interval_usec, all_threads=all_threads, mode=mode,
|
||||||
|
skip_non_matching_threads=skip_non_matching_threads
|
||||||
)
|
)
|
||||||
profiler.realtime_stats = realtime_stats
|
profiler.realtime_stats = realtime_stats
|
||||||
|
|
||||||
# Determine skip_idle for collector compatibility
|
|
||||||
skip_idle = mode != PROFILING_MODE_WALL
|
|
||||||
|
|
||||||
collector = None
|
collector = None
|
||||||
match output_format:
|
match output_format:
|
||||||
case "pstats":
|
case "pstats":
|
||||||
|
|
@ -633,7 +643,8 @@ def sample(
|
||||||
collector = FlamegraphCollector(skip_idle=skip_idle)
|
collector = FlamegraphCollector(skip_idle=skip_idle)
|
||||||
filename = filename or f"flamegraph.{pid}.html"
|
filename = filename or f"flamegraph.{pid}.html"
|
||||||
case "gecko":
|
case "gecko":
|
||||||
collector = GeckoCollector(skip_idle=skip_idle)
|
# Gecko format never skips idle threads to show full thread states
|
||||||
|
collector = GeckoCollector(skip_idle=False)
|
||||||
filename = filename or f"gecko.{pid}.json"
|
filename = filename or f"gecko.{pid}.json"
|
||||||
case _:
|
case _:
|
||||||
raise ValueError(f"Invalid output format: {output_format}")
|
raise ValueError(f"Invalid output format: {output_format}")
|
||||||
|
|
@ -882,6 +893,10 @@ def main():
|
||||||
if args.format in ("collapsed", "gecko"):
|
if args.format in ("collapsed", "gecko"):
|
||||||
_validate_collapsed_format_args(args, parser)
|
_validate_collapsed_format_args(args, parser)
|
||||||
|
|
||||||
|
# Validate that --mode is not used with --gecko
|
||||||
|
if args.format == "gecko" and args.mode != "wall":
|
||||||
|
parser.error("--mode option is incompatible with --gecko format. Gecko format automatically uses ALL mode (GIL + CPU analysis).")
|
||||||
|
|
||||||
sort_value = args.sort if args.sort is not None else 2
|
sort_value = args.sort if args.sort is not None else 2
|
||||||
|
|
||||||
if args.module is not None and not args.module:
|
if args.module is not None and not args.module:
|
||||||
|
|
@ -900,7 +915,11 @@ def main():
|
||||||
elif target_count > 1:
|
elif target_count > 1:
|
||||||
parser.error("only one target type can be specified: -p/--pid, -m/--module, or script")
|
parser.error("only one target type can be specified: -p/--pid, -m/--module, or script")
|
||||||
|
|
||||||
mode = _parse_mode(args.mode)
|
# Use PROFILING_MODE_ALL for gecko format, otherwise parse user's choice
|
||||||
|
if args.format == "gecko":
|
||||||
|
mode = PROFILING_MODE_ALL
|
||||||
|
else:
|
||||||
|
mode = _parse_mode(args.mode)
|
||||||
|
|
||||||
if args.pid:
|
if args.pid:
|
||||||
sample(
|
sample(
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,12 @@
|
||||||
PROFILING_MODE_WALL = 0
|
PROFILING_MODE_WALL = 0
|
||||||
PROFILING_MODE_CPU = 1
|
PROFILING_MODE_CPU = 1
|
||||||
PROFILING_MODE_GIL = 2
|
PROFILING_MODE_GIL = 2
|
||||||
|
PROFILING_MODE_ALL = 3
|
||||||
|
|
||||||
|
# Thread status flags
|
||||||
|
THREAD_STATUS_HAS_GIL = (1 << 0)
|
||||||
|
THREAD_STATUS_ON_CPU = (1 << 1)
|
||||||
|
THREAD_STATUS_UNKNOWN = (1 << 2)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from concurrent import interpreters
|
from concurrent import interpreters
|
||||||
|
|
@ -1763,11 +1769,14 @@ def busy():
|
||||||
for thread_info in interpreter_info.threads:
|
for thread_info in interpreter_info.threads:
|
||||||
statuses[thread_info.thread_id] = thread_info.status
|
statuses[thread_info.thread_id] = thread_info.status
|
||||||
|
|
||||||
# Check if sleeper thread is idle and busy thread is running
|
# Check if sleeper thread is off CPU and busy thread is on CPU
|
||||||
|
# In the new flags system:
|
||||||
|
# - sleeper should NOT have ON_CPU flag (off CPU)
|
||||||
|
# - busy should have ON_CPU flag
|
||||||
if (sleeper_tid in statuses and
|
if (sleeper_tid in statuses and
|
||||||
busy_tid in statuses and
|
busy_tid in statuses and
|
||||||
statuses[sleeper_tid] == 1 and
|
not (statuses[sleeper_tid] & THREAD_STATUS_ON_CPU) and
|
||||||
statuses[busy_tid] == 0):
|
(statuses[busy_tid] & THREAD_STATUS_ON_CPU)):
|
||||||
break
|
break
|
||||||
time.sleep(0.5) # Give a bit of time to let threads settle
|
time.sleep(0.5) # Give a bit of time to let threads settle
|
||||||
except PermissionError:
|
except PermissionError:
|
||||||
|
|
@ -1779,8 +1788,8 @@ def busy():
|
||||||
self.assertIsNotNone(busy_tid, "Busy thread id not received")
|
self.assertIsNotNone(busy_tid, "Busy thread id not received")
|
||||||
self.assertIn(sleeper_tid, statuses, "Sleeper tid not found in sampled threads")
|
self.assertIn(sleeper_tid, statuses, "Sleeper tid not found in sampled threads")
|
||||||
self.assertIn(busy_tid, statuses, "Busy tid not found in sampled threads")
|
self.assertIn(busy_tid, statuses, "Busy tid not found in sampled threads")
|
||||||
self.assertEqual(statuses[sleeper_tid], 1, "Sleeper thread should be idle (1)")
|
self.assertFalse(statuses[sleeper_tid] & THREAD_STATUS_ON_CPU, "Sleeper thread should be off CPU")
|
||||||
self.assertEqual(statuses[busy_tid], 0, "Busy thread should be running (0)")
|
self.assertTrue(statuses[busy_tid] & THREAD_STATUS_ON_CPU, "Busy thread should be on CPU")
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
if client_socket is not None:
|
if client_socket is not None:
|
||||||
|
|
@ -1875,11 +1884,14 @@ def busy():
|
||||||
for thread_info in interpreter_info.threads:
|
for thread_info in interpreter_info.threads:
|
||||||
statuses[thread_info.thread_id] = thread_info.status
|
statuses[thread_info.thread_id] = thread_info.status
|
||||||
|
|
||||||
# Check if sleeper thread is idle (status 2 for GIL mode) and busy thread is running
|
# Check if sleeper thread doesn't have GIL and busy thread has GIL
|
||||||
|
# In the new flags system:
|
||||||
|
# - sleeper should NOT have HAS_GIL flag (waiting for GIL)
|
||||||
|
# - busy should have HAS_GIL flag
|
||||||
if (sleeper_tid in statuses and
|
if (sleeper_tid in statuses and
|
||||||
busy_tid in statuses and
|
busy_tid in statuses and
|
||||||
statuses[sleeper_tid] == 2 and
|
not (statuses[sleeper_tid] & THREAD_STATUS_HAS_GIL) and
|
||||||
statuses[busy_tid] == 0):
|
(statuses[busy_tid] & THREAD_STATUS_HAS_GIL)):
|
||||||
break
|
break
|
||||||
time.sleep(0.5) # Give a bit of time to let threads settle
|
time.sleep(0.5) # Give a bit of time to let threads settle
|
||||||
except PermissionError:
|
except PermissionError:
|
||||||
|
|
@ -1891,8 +1903,8 @@ def busy():
|
||||||
self.assertIsNotNone(busy_tid, "Busy thread id not received")
|
self.assertIsNotNone(busy_tid, "Busy thread id not received")
|
||||||
self.assertIn(sleeper_tid, statuses, "Sleeper tid not found in sampled threads")
|
self.assertIn(sleeper_tid, statuses, "Sleeper tid not found in sampled threads")
|
||||||
self.assertIn(busy_tid, statuses, "Busy tid not found in sampled threads")
|
self.assertIn(busy_tid, statuses, "Busy tid not found in sampled threads")
|
||||||
self.assertEqual(statuses[sleeper_tid], 2, "Sleeper thread should be idle (1)")
|
self.assertFalse(statuses[sleeper_tid] & THREAD_STATUS_HAS_GIL, "Sleeper thread should not have GIL")
|
||||||
self.assertEqual(statuses[busy_tid], 0, "Busy thread should be running (0)")
|
self.assertTrue(statuses[busy_tid] & THREAD_STATUS_HAS_GIL, "Busy thread should have GIL")
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
if client_socket is not None:
|
if client_socket is not None:
|
||||||
|
|
@ -1900,6 +1912,128 @@ def busy():
|
||||||
p.terminate()
|
p.terminate()
|
||||||
p.wait(timeout=SHORT_TIMEOUT)
|
p.wait(timeout=SHORT_TIMEOUT)
|
||||||
|
|
||||||
|
@unittest.skipIf(
|
||||||
|
sys.platform not in ("linux", "darwin", "win32"),
|
||||||
|
"Test only runs on supported platforms (Linux, macOS, or Windows)",
|
||||||
|
)
|
||||||
|
@unittest.skipIf(sys.platform == "android", "Android raises Linux-specific exception")
|
||||||
|
def test_thread_status_all_mode_detection(self):
|
||||||
|
port = find_unused_port()
|
||||||
|
script = textwrap.dedent(
|
||||||
|
f"""\
|
||||||
|
import socket
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
import sys
|
||||||
|
|
||||||
|
def sleeper_thread():
|
||||||
|
conn = socket.create_connection(("localhost", {port}))
|
||||||
|
conn.sendall(b"sleeper:" + str(threading.get_native_id()).encode())
|
||||||
|
while True:
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
def busy_thread():
|
||||||
|
conn = socket.create_connection(("localhost", {port}))
|
||||||
|
conn.sendall(b"busy:" + str(threading.get_native_id()).encode())
|
||||||
|
while True:
|
||||||
|
sum(range(100000))
|
||||||
|
|
||||||
|
t1 = threading.Thread(target=sleeper_thread)
|
||||||
|
t2 = threading.Thread(target=busy_thread)
|
||||||
|
t1.start()
|
||||||
|
t2.start()
|
||||||
|
t1.join()
|
||||||
|
t2.join()
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
with os_helper.temp_dir() as tmp_dir:
|
||||||
|
script_file = make_script(tmp_dir, "script", script)
|
||||||
|
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||||
|
server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||||
|
server_socket.bind(("localhost", port))
|
||||||
|
server_socket.listen(2)
|
||||||
|
server_socket.settimeout(SHORT_TIMEOUT)
|
||||||
|
|
||||||
|
p = subprocess.Popen(
|
||||||
|
[sys.executable, script_file],
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE,
|
||||||
|
)
|
||||||
|
|
||||||
|
client_sockets = []
|
||||||
|
try:
|
||||||
|
sleeper_tid = None
|
||||||
|
busy_tid = None
|
||||||
|
|
||||||
|
# Receive thread IDs from the child process
|
||||||
|
for _ in range(2):
|
||||||
|
client_socket, _ = server_socket.accept()
|
||||||
|
client_sockets.append(client_socket)
|
||||||
|
line = client_socket.recv(1024)
|
||||||
|
if line:
|
||||||
|
if line.startswith(b"sleeper:"):
|
||||||
|
try:
|
||||||
|
sleeper_tid = int(line.split(b":")[-1])
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
elif line.startswith(b"busy:"):
|
||||||
|
try:
|
||||||
|
busy_tid = int(line.split(b":")[-1])
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
server_socket.close()
|
||||||
|
|
||||||
|
attempts = 10
|
||||||
|
statuses = {}
|
||||||
|
try:
|
||||||
|
unwinder = RemoteUnwinder(p.pid, all_threads=True, mode=PROFILING_MODE_ALL,
|
||||||
|
skip_non_matching_threads=False)
|
||||||
|
for _ in range(attempts):
|
||||||
|
traces = unwinder.get_stack_trace()
|
||||||
|
# Find threads and their statuses
|
||||||
|
statuses = {}
|
||||||
|
for interpreter_info in traces:
|
||||||
|
for thread_info in interpreter_info.threads:
|
||||||
|
statuses[thread_info.thread_id] = thread_info.status
|
||||||
|
|
||||||
|
# Check ALL mode provides both GIL and CPU info
|
||||||
|
# - sleeper should NOT have ON_CPU and NOT have HAS_GIL
|
||||||
|
# - busy should have ON_CPU and have HAS_GIL
|
||||||
|
if (sleeper_tid in statuses and
|
||||||
|
busy_tid in statuses and
|
||||||
|
not (statuses[sleeper_tid] & THREAD_STATUS_ON_CPU) and
|
||||||
|
not (statuses[sleeper_tid] & THREAD_STATUS_HAS_GIL) and
|
||||||
|
(statuses[busy_tid] & THREAD_STATUS_ON_CPU) and
|
||||||
|
(statuses[busy_tid] & THREAD_STATUS_HAS_GIL)):
|
||||||
|
break
|
||||||
|
time.sleep(0.5)
|
||||||
|
except PermissionError:
|
||||||
|
self.skipTest(
|
||||||
|
"Insufficient permissions to read the stack trace"
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertIsNotNone(sleeper_tid, "Sleeper thread id not received")
|
||||||
|
self.assertIsNotNone(busy_tid, "Busy thread id not received")
|
||||||
|
self.assertIn(sleeper_tid, statuses, "Sleeper tid not found in sampled threads")
|
||||||
|
self.assertIn(busy_tid, statuses, "Busy tid not found in sampled threads")
|
||||||
|
|
||||||
|
# Sleeper thread: off CPU, no GIL
|
||||||
|
self.assertFalse(statuses[sleeper_tid] & THREAD_STATUS_ON_CPU, "Sleeper should be off CPU")
|
||||||
|
self.assertFalse(statuses[sleeper_tid] & THREAD_STATUS_HAS_GIL, "Sleeper should not have GIL")
|
||||||
|
|
||||||
|
# Busy thread: on CPU, has GIL
|
||||||
|
self.assertTrue(statuses[busy_tid] & THREAD_STATUS_ON_CPU, "Busy should be on CPU")
|
||||||
|
self.assertTrue(statuses[busy_tid] & THREAD_STATUS_HAS_GIL, "Busy should have GIL")
|
||||||
|
|
||||||
|
finally:
|
||||||
|
for client_socket in client_sockets:
|
||||||
|
client_socket.close()
|
||||||
|
p.terminate()
|
||||||
|
p.wait(timeout=SHORT_TIMEOUT)
|
||||||
|
p.stdout.close()
|
||||||
|
p.stderr.close()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
||||||
|
|
@ -63,12 +63,14 @@ def __repr__(self):
|
||||||
class MockThreadInfo:
|
class MockThreadInfo:
|
||||||
"""Mock ThreadInfo for testing since the real one isn't accessible."""
|
"""Mock ThreadInfo for testing since the real one isn't accessible."""
|
||||||
|
|
||||||
def __init__(self, thread_id, frame_info):
|
def __init__(self, thread_id, frame_info, status=0, gc_collecting=False): # Default to THREAD_STATE_RUNNING (0)
|
||||||
self.thread_id = thread_id
|
self.thread_id = thread_id
|
||||||
self.frame_info = frame_info
|
self.frame_info = frame_info
|
||||||
|
self.status = status
|
||||||
|
self.gc_collecting = gc_collecting
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return f"MockThreadInfo(thread_id={self.thread_id}, frame_info={self.frame_info})"
|
return f"MockThreadInfo(thread_id={self.thread_id}, frame_info={self.frame_info}, status={self.status}, gc_collecting={self.gc_collecting})"
|
||||||
|
|
||||||
|
|
||||||
class MockInterpreterInfo:
|
class MockInterpreterInfo:
|
||||||
|
|
@ -674,6 +676,97 @@ def test_gecko_collector_export(self):
|
||||||
self.assertIn("func2", string_array)
|
self.assertIn("func2", string_array)
|
||||||
self.assertIn("other_func", string_array)
|
self.assertIn("other_func", string_array)
|
||||||
|
|
||||||
|
def test_gecko_collector_markers(self):
|
||||||
|
"""Test Gecko profile markers for GIL and CPU state tracking."""
|
||||||
|
try:
|
||||||
|
from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_GIL_REQUESTED
|
||||||
|
except ImportError:
|
||||||
|
THREAD_STATUS_HAS_GIL = (1 << 0)
|
||||||
|
THREAD_STATUS_ON_CPU = (1 << 1)
|
||||||
|
THREAD_STATUS_GIL_REQUESTED = (1 << 3)
|
||||||
|
|
||||||
|
collector = GeckoCollector()
|
||||||
|
|
||||||
|
# Status combinations for different thread states
|
||||||
|
HAS_GIL_ON_CPU = THREAD_STATUS_HAS_GIL | THREAD_STATUS_ON_CPU # Running Python code
|
||||||
|
NO_GIL_ON_CPU = THREAD_STATUS_ON_CPU # Running native code
|
||||||
|
WAITING_FOR_GIL = THREAD_STATUS_GIL_REQUESTED # Waiting for GIL
|
||||||
|
|
||||||
|
# Simulate thread state transitions
|
||||||
|
collector.collect([
|
||||||
|
MockInterpreterInfo(0, [
|
||||||
|
MockThreadInfo(1, [("test.py", 10, "python_func")], status=HAS_GIL_ON_CPU)
|
||||||
|
])
|
||||||
|
])
|
||||||
|
|
||||||
|
collector.collect([
|
||||||
|
MockInterpreterInfo(0, [
|
||||||
|
MockThreadInfo(1, [("test.py", 15, "wait_func")], status=WAITING_FOR_GIL)
|
||||||
|
])
|
||||||
|
])
|
||||||
|
|
||||||
|
collector.collect([
|
||||||
|
MockInterpreterInfo(0, [
|
||||||
|
MockThreadInfo(1, [("test.py", 20, "python_func2")], status=HAS_GIL_ON_CPU)
|
||||||
|
])
|
||||||
|
])
|
||||||
|
|
||||||
|
collector.collect([
|
||||||
|
MockInterpreterInfo(0, [
|
||||||
|
MockThreadInfo(1, [("native.c", 100, "native_func")], status=NO_GIL_ON_CPU)
|
||||||
|
])
|
||||||
|
])
|
||||||
|
|
||||||
|
profile_data = collector._build_profile()
|
||||||
|
|
||||||
|
# Verify we have threads with markers
|
||||||
|
self.assertIn("threads", profile_data)
|
||||||
|
self.assertEqual(len(profile_data["threads"]), 1)
|
||||||
|
thread_data = profile_data["threads"][0]
|
||||||
|
|
||||||
|
# Check markers exist
|
||||||
|
self.assertIn("markers", thread_data)
|
||||||
|
markers = thread_data["markers"]
|
||||||
|
|
||||||
|
# Should have marker arrays
|
||||||
|
self.assertIn("name", markers)
|
||||||
|
self.assertIn("startTime", markers)
|
||||||
|
self.assertIn("endTime", markers)
|
||||||
|
self.assertIn("category", markers)
|
||||||
|
self.assertGreater(markers["length"], 0, "Should have generated markers")
|
||||||
|
|
||||||
|
# Get marker names from string table
|
||||||
|
string_array = profile_data["shared"]["stringArray"]
|
||||||
|
marker_names = [string_array[idx] for idx in markers["name"]]
|
||||||
|
|
||||||
|
# Verify we have different marker types
|
||||||
|
marker_name_set = set(marker_names)
|
||||||
|
|
||||||
|
# Should have "Has GIL" markers (when thread had GIL)
|
||||||
|
self.assertIn("Has GIL", marker_name_set, "Should have 'Has GIL' markers")
|
||||||
|
|
||||||
|
# Should have "No GIL" markers (when thread didn't have GIL)
|
||||||
|
self.assertIn("No GIL", marker_name_set, "Should have 'No GIL' markers")
|
||||||
|
|
||||||
|
# Should have "On CPU" markers (when thread was on CPU)
|
||||||
|
self.assertIn("On CPU", marker_name_set, "Should have 'On CPU' markers")
|
||||||
|
|
||||||
|
# Should have "Waiting for GIL" markers (when thread was waiting)
|
||||||
|
self.assertIn("Waiting for GIL", marker_name_set, "Should have 'Waiting for GIL' markers")
|
||||||
|
|
||||||
|
# Verify marker structure
|
||||||
|
for i in range(markers["length"]):
|
||||||
|
# All markers should be interval markers (phase = 1)
|
||||||
|
self.assertEqual(markers["phase"][i], 1, f"Marker {i} should be interval marker")
|
||||||
|
|
||||||
|
# All markers should have valid time range
|
||||||
|
start_time = markers["startTime"][i]
|
||||||
|
end_time = markers["endTime"][i]
|
||||||
|
self.assertLessEqual(start_time, end_time, f"Marker {i} should have valid time range")
|
||||||
|
|
||||||
|
# All markers should have valid category
|
||||||
|
self.assertGreaterEqual(markers["category"][i], 0, f"Marker {i} should have valid category")
|
||||||
|
|
||||||
def test_pstats_collector_export(self):
|
def test_pstats_collector_export(self):
|
||||||
collector = PstatsCollector(
|
collector = PstatsCollector(
|
||||||
sample_interval_usec=1000000
|
sample_interval_usec=1000000
|
||||||
|
|
@ -2625,19 +2718,30 @@ def test_mode_validation(self):
|
||||||
|
|
||||||
def test_frames_filtered_with_skip_idle(self):
|
def test_frames_filtered_with_skip_idle(self):
|
||||||
"""Test that frames are actually filtered when skip_idle=True."""
|
"""Test that frames are actually filtered when skip_idle=True."""
|
||||||
|
# Import thread status flags
|
||||||
|
try:
|
||||||
|
from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU
|
||||||
|
except ImportError:
|
||||||
|
THREAD_STATUS_HAS_GIL = (1 << 0)
|
||||||
|
THREAD_STATUS_ON_CPU = (1 << 1)
|
||||||
|
|
||||||
# Create mock frames with different thread statuses
|
# Create mock frames with different thread statuses
|
||||||
class MockThreadInfoWithStatus:
|
class MockThreadInfoWithStatus:
|
||||||
def __init__(self, thread_id, frame_info, status):
|
def __init__(self, thread_id, frame_info, status):
|
||||||
self.thread_id = thread_id
|
self.thread_id = thread_id
|
||||||
self.frame_info = frame_info
|
self.frame_info = frame_info
|
||||||
self.status = status
|
self.status = status
|
||||||
|
self.gc_collecting = False
|
||||||
|
|
||||||
|
# Create test data: active thread (HAS_GIL | ON_CPU), idle thread (neither), and another active thread
|
||||||
|
ACTIVE_STATUS = THREAD_STATUS_HAS_GIL | THREAD_STATUS_ON_CPU # Has GIL and on CPU
|
||||||
|
IDLE_STATUS = 0 # Neither has GIL nor on CPU
|
||||||
|
|
||||||
# Create test data: running thread, idle thread, and another running thread
|
|
||||||
test_frames = [
|
test_frames = [
|
||||||
MockInterpreterInfo(0, [
|
MockInterpreterInfo(0, [
|
||||||
MockThreadInfoWithStatus(1, [MockFrameInfo("active1.py", 10, "active_func1")], 0), # RUNNING
|
MockThreadInfoWithStatus(1, [MockFrameInfo("active1.py", 10, "active_func1")], ACTIVE_STATUS),
|
||||||
MockThreadInfoWithStatus(2, [MockFrameInfo("idle.py", 20, "idle_func")], 1), # IDLE
|
MockThreadInfoWithStatus(2, [MockFrameInfo("idle.py", 20, "idle_func")], IDLE_STATUS),
|
||||||
MockThreadInfoWithStatus(3, [MockFrameInfo("active2.py", 30, "active_func2")], 0), # RUNNING
|
MockThreadInfoWithStatus(3, [MockFrameInfo("active2.py", 30, "active_func2")], ACTIVE_STATUS),
|
||||||
])
|
])
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,7 @@
|
||||||
* HEADERS AND INCLUDES
|
* HEADERS AND INCLUDES
|
||||||
* ============================================================================ */
|
* ============================================================================ */
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
|
|
@ -81,6 +82,8 @@ typedef enum _WIN32_THREADSTATE {
|
||||||
#define SIZEOF_TYPE_OBJ sizeof(PyTypeObject)
|
#define SIZEOF_TYPE_OBJ sizeof(PyTypeObject)
|
||||||
#define SIZEOF_UNICODE_OBJ sizeof(PyUnicodeObject)
|
#define SIZEOF_UNICODE_OBJ sizeof(PyUnicodeObject)
|
||||||
#define SIZEOF_LONG_OBJ sizeof(PyLongObject)
|
#define SIZEOF_LONG_OBJ sizeof(PyLongObject)
|
||||||
|
#define SIZEOF_GC_RUNTIME_STATE sizeof(struct _gc_runtime_state)
|
||||||
|
#define SIZEOF_INTERPRETER_STATE sizeof(PyInterpreterState)
|
||||||
|
|
||||||
// Calculate the minimum buffer size needed to read interpreter state fields
|
// Calculate the minimum buffer size needed to read interpreter state fields
|
||||||
// We need to read code_object_generation and potentially tlbc_generation
|
// We need to read code_object_generation and potentially tlbc_generation
|
||||||
|
|
@ -178,8 +181,9 @@ static PyStructSequence_Desc CoroInfo_desc = {
|
||||||
// ThreadInfo structseq type - replaces 2-tuple (thread_id, frame_info)
|
// ThreadInfo structseq type - replaces 2-tuple (thread_id, frame_info)
|
||||||
static PyStructSequence_Field ThreadInfo_fields[] = {
|
static PyStructSequence_Field ThreadInfo_fields[] = {
|
||||||
{"thread_id", "Thread ID"},
|
{"thread_id", "Thread ID"},
|
||||||
{"status", "Thread status"},
|
{"status", "Thread status (flags: HAS_GIL, ON_CPU, UNKNOWN or legacy enum)"},
|
||||||
{"frame_info", "Frame information"},
|
{"frame_info", "Frame information"},
|
||||||
|
{"gc_collecting", "Whether GC is collecting (interpreter-level)"},
|
||||||
{NULL}
|
{NULL}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -187,7 +191,7 @@ static PyStructSequence_Desc ThreadInfo_desc = {
|
||||||
"_remote_debugging.ThreadInfo",
|
"_remote_debugging.ThreadInfo",
|
||||||
"Information about a thread",
|
"Information about a thread",
|
||||||
ThreadInfo_fields,
|
ThreadInfo_fields,
|
||||||
2
|
3
|
||||||
};
|
};
|
||||||
|
|
||||||
// InterpreterInfo structseq type - replaces 2-tuple (interpreter_id, thread_list)
|
// InterpreterInfo structseq type - replaces 2-tuple (interpreter_id, thread_list)
|
||||||
|
|
@ -247,9 +251,16 @@ enum _ThreadState {
|
||||||
enum _ProfilingMode {
|
enum _ProfilingMode {
|
||||||
PROFILING_MODE_WALL = 0,
|
PROFILING_MODE_WALL = 0,
|
||||||
PROFILING_MODE_CPU = 1,
|
PROFILING_MODE_CPU = 1,
|
||||||
PROFILING_MODE_GIL = 2
|
PROFILING_MODE_GIL = 2,
|
||||||
|
PROFILING_MODE_ALL = 3 // Combines GIL + CPU checks
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Thread status flags (can be combined)
|
||||||
|
#define THREAD_STATUS_HAS_GIL (1 << 0) // Thread has the GIL
|
||||||
|
#define THREAD_STATUS_ON_CPU (1 << 1) // Thread is running on CPU
|
||||||
|
#define THREAD_STATUS_UNKNOWN (1 << 2) // Status could not be determined
|
||||||
|
#define THREAD_STATUS_GIL_REQUESTED (1 << 3) // Thread is waiting for the GIL
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
PyObject_HEAD
|
PyObject_HEAD
|
||||||
proc_handle_t handle;
|
proc_handle_t handle;
|
||||||
|
|
@ -2650,34 +2661,70 @@ unwind_stack_for_thread(
|
||||||
|
|
||||||
long tid = GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.native_thread_id);
|
long tid = GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.native_thread_id);
|
||||||
|
|
||||||
// Calculate thread status based on mode
|
// Read GC collecting state from the interpreter (before any skip checks)
|
||||||
int status = THREAD_STATE_UNKNOWN;
|
uintptr_t interp_addr = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.interp);
|
||||||
if (unwinder->mode == PROFILING_MODE_CPU) {
|
|
||||||
long pthread_id = GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.thread_id);
|
// Read the GC runtime state from the interpreter state
|
||||||
status = get_thread_status(unwinder, tid, pthread_id);
|
uintptr_t gc_addr = interp_addr + unwinder->debug_offsets.interpreter_state.gc;
|
||||||
if (status == -1) {
|
char gc_state[SIZEOF_GC_RUNTIME_STATE];
|
||||||
PyErr_Print();
|
if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, gc_addr, unwinder->debug_offsets.gc.size, gc_state) < 0) {
|
||||||
PyErr_SetString(PyExc_RuntimeError, "Failed to get thread status");
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read GC state");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
} else if (unwinder->mode == PROFILING_MODE_GIL) {
|
|
||||||
|
int gc_collecting = GET_MEMBER(int, gc_state, unwinder->debug_offsets.gc.collecting);
|
||||||
|
|
||||||
|
// Calculate thread status using flags (always)
|
||||||
|
int status_flags = 0;
|
||||||
|
|
||||||
|
// Check GIL status
|
||||||
|
int has_gil = 0;
|
||||||
|
int gil_requested = 0;
|
||||||
#ifdef Py_GIL_DISABLED
|
#ifdef Py_GIL_DISABLED
|
||||||
// All threads are considered running in free threading builds if they have a thread state attached
|
int active = GET_MEMBER(_thread_status, ts, unwinder->debug_offsets.thread_state.status).active;
|
||||||
int active = GET_MEMBER(_thread_status, ts, unwinder->debug_offsets.thread_state.status).active;
|
has_gil = active;
|
||||||
status = active ? THREAD_STATE_RUNNING : THREAD_STATE_GIL_WAIT;
|
|
||||||
#else
|
#else
|
||||||
status = (*current_tstate == gil_holder_tstate) ? THREAD_STATE_RUNNING : THREAD_STATE_GIL_WAIT;
|
// Read holds_gil directly from thread state
|
||||||
|
has_gil = GET_MEMBER(int, ts, unwinder->debug_offsets.thread_state.holds_gil);
|
||||||
|
|
||||||
|
// Check if thread is actively requesting the GIL
|
||||||
|
if (unwinder->debug_offsets.thread_state.gil_requested != 0) {
|
||||||
|
gil_requested = GET_MEMBER(int, ts, unwinder->debug_offsets.thread_state.gil_requested);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set GIL_REQUESTED flag if thread is waiting
|
||||||
|
if (!has_gil && gil_requested) {
|
||||||
|
status_flags |= THREAD_STATUS_GIL_REQUESTED;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
} else {
|
if (has_gil) {
|
||||||
// PROFILING_MODE_WALL - all threads are considered running
|
status_flags |= THREAD_STATUS_HAS_GIL;
|
||||||
status = THREAD_STATE_RUNNING;
|
}
|
||||||
|
|
||||||
|
// Assert that we never have both HAS_GIL and GIL_REQUESTED set at the same time
|
||||||
|
// This would indicate a race condition in the GIL state tracking
|
||||||
|
assert(!(has_gil && gil_requested));
|
||||||
|
|
||||||
|
// Check CPU status
|
||||||
|
long pthread_id = GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.thread_id);
|
||||||
|
int cpu_status = get_thread_status(unwinder, tid, pthread_id);
|
||||||
|
if (cpu_status == -1) {
|
||||||
|
status_flags |= THREAD_STATUS_UNKNOWN;
|
||||||
|
} else if (cpu_status == THREAD_STATE_RUNNING) {
|
||||||
|
status_flags |= THREAD_STATUS_ON_CPU;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if we should skip this thread based on mode
|
// Check if we should skip this thread based on mode
|
||||||
int should_skip = 0;
|
int should_skip = 0;
|
||||||
if (unwinder->skip_non_matching_threads && status != THREAD_STATE_RUNNING &&
|
if (unwinder->skip_non_matching_threads) {
|
||||||
(unwinder->mode == PROFILING_MODE_CPU || unwinder->mode == PROFILING_MODE_GIL)) {
|
if (unwinder->mode == PROFILING_MODE_CPU) {
|
||||||
should_skip = 1;
|
// Skip if not on CPU
|
||||||
|
should_skip = !(status_flags & THREAD_STATUS_ON_CPU);
|
||||||
|
} else if (unwinder->mode == PROFILING_MODE_GIL) {
|
||||||
|
// Skip if doesn't have GIL
|
||||||
|
should_skip = !(status_flags & THREAD_STATUS_HAS_GIL);
|
||||||
|
}
|
||||||
|
// PROFILING_MODE_WALL and PROFILING_MODE_ALL never skip
|
||||||
}
|
}
|
||||||
|
|
||||||
if (should_skip) {
|
if (should_skip) {
|
||||||
|
|
@ -2719,16 +2766,25 @@ unwind_stack_for_thread(
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject *py_status = PyLong_FromLong(status);
|
// Always use status_flags
|
||||||
|
PyObject *py_status = PyLong_FromLong(status_flags);
|
||||||
if (py_status == NULL) {
|
if (py_status == NULL) {
|
||||||
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create thread status");
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create thread status");
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
PyErr_Print();
|
|
||||||
|
|
||||||
|
PyObject *py_gc_collecting = PyBool_FromLong(gc_collecting);
|
||||||
|
if (py_gc_collecting == NULL) {
|
||||||
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create gc_collecting");
|
||||||
|
Py_DECREF(py_status);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
// py_status contains status flags (bitfield)
|
||||||
PyStructSequence_SetItem(result, 0, thread_id);
|
PyStructSequence_SetItem(result, 0, thread_id);
|
||||||
PyStructSequence_SetItem(result, 1, py_status); // Steals reference
|
PyStructSequence_SetItem(result, 1, py_status); // Steals reference
|
||||||
PyStructSequence_SetItem(result, 2, frame_info); // Steals reference
|
PyStructSequence_SetItem(result, 2, frame_info); // Steals reference
|
||||||
|
PyStructSequence_SetItem(result, 3, py_gc_collecting); // Steals reference
|
||||||
|
|
||||||
cleanup_stack_chunks(&chunks);
|
cleanup_stack_chunks(&chunks);
|
||||||
return result;
|
return result;
|
||||||
|
|
@ -3401,6 +3457,21 @@ _remote_debugging_exec(PyObject *m)
|
||||||
if (rc < 0) {
|
if (rc < 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Add thread status flag constants
|
||||||
|
if (PyModule_AddIntConstant(m, "THREAD_STATUS_HAS_GIL", THREAD_STATUS_HAS_GIL) < 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (PyModule_AddIntConstant(m, "THREAD_STATUS_ON_CPU", THREAD_STATUS_ON_CPU) < 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (PyModule_AddIntConstant(m, "THREAD_STATUS_UNKNOWN", THREAD_STATUS_UNKNOWN) < 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (PyModule_AddIntConstant(m, "THREAD_STATUS_GIL_REQUESTED", THREAD_STATUS_GIL_REQUESTED) < 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
if (RemoteDebugging_InitState(st) < 0) {
|
if (RemoteDebugging_InitState(st) < 0) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -207,6 +207,7 @@ drop_gil_impl(PyThreadState *tstate, struct _gil_runtime_state *gil)
|
||||||
_Py_atomic_store_int_relaxed(&gil->locked, 0);
|
_Py_atomic_store_int_relaxed(&gil->locked, 0);
|
||||||
if (tstate != NULL) {
|
if (tstate != NULL) {
|
||||||
tstate->holds_gil = 0;
|
tstate->holds_gil = 0;
|
||||||
|
tstate->gil_requested = 0;
|
||||||
}
|
}
|
||||||
COND_SIGNAL(gil->cond);
|
COND_SIGNAL(gil->cond);
|
||||||
MUTEX_UNLOCK(gil->mutex);
|
MUTEX_UNLOCK(gil->mutex);
|
||||||
|
|
@ -320,6 +321,8 @@ take_gil(PyThreadState *tstate)
|
||||||
|
|
||||||
MUTEX_LOCK(gil->mutex);
|
MUTEX_LOCK(gil->mutex);
|
||||||
|
|
||||||
|
tstate->gil_requested = 1;
|
||||||
|
|
||||||
int drop_requested = 0;
|
int drop_requested = 0;
|
||||||
while (_Py_atomic_load_int_relaxed(&gil->locked)) {
|
while (_Py_atomic_load_int_relaxed(&gil->locked)) {
|
||||||
unsigned long saved_switchnum = gil->switch_number;
|
unsigned long saved_switchnum = gil->switch_number;
|
||||||
|
|
@ -407,6 +410,7 @@ take_gil(PyThreadState *tstate)
|
||||||
}
|
}
|
||||||
assert(_PyThreadState_CheckConsistency(tstate));
|
assert(_PyThreadState_CheckConsistency(tstate));
|
||||||
|
|
||||||
|
tstate->gil_requested = 0;
|
||||||
tstate->holds_gil = 1;
|
tstate->holds_gil = 1;
|
||||||
_Py_unset_eval_breaker_bit(tstate, _PY_GIL_DROP_REQUEST_BIT);
|
_Py_unset_eval_breaker_bit(tstate, _PY_GIL_DROP_REQUEST_BIT);
|
||||||
update_eval_breaker_for_thread(interp, tstate);
|
update_eval_breaker_for_thread(interp, tstate);
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue