gh-135953: Add GIL contention markers to sampling profiler Gecko format (#139485)

This commit enhances the Gecko format reporter in the sampling profiler
to include markers for GIL acquisition events.
This commit is contained in:
Pablo Galindo Salgado 2025-11-17 12:46:26 +00:00 committed by GitHub
parent 994ab5c922
commit 89a914c58d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 627 additions and 82 deletions

View file

@ -113,6 +113,9 @@ struct _ts {
/* Currently holds the GIL. Must be its own field to avoid data races */
int holds_gil;
/* Currently requesting the GIL */
int gil_requested;
int _whence;
/* Thread state (_Py_THREAD_ATTACHED, _Py_THREAD_DETACHED, _Py_THREAD_SUSPENDED).

View file

@ -106,6 +106,8 @@ typedef struct _Py_DebugOffsets {
uint64_t native_thread_id;
uint64_t datastack_chunk;
uint64_t status;
uint64_t holds_gil;
uint64_t gil_requested;
} thread_state;
// InterpreterFrame offset;
@ -273,6 +275,8 @@ typedef struct _Py_DebugOffsets {
.native_thread_id = offsetof(PyThreadState, native_thread_id), \
.datastack_chunk = offsetof(PyThreadState, datastack_chunk), \
.status = offsetof(PyThreadState, _status), \
.holds_gil = offsetof(PyThreadState, holds_gil), \
.gil_requested = offsetof(PyThreadState, gil_requested), \
}, \
.interpreter_frame = { \
.size = sizeof(_PyInterpreterFrame), \

View file

@ -1,17 +1,14 @@
from abc import ABC, abstractmethod
# Enums are slow
THREAD_STATE_RUNNING = 0
THREAD_STATE_IDLE = 1
THREAD_STATE_GIL_WAIT = 2
THREAD_STATE_UNKNOWN = 3
STATUS = {
THREAD_STATE_RUNNING: "running",
THREAD_STATE_IDLE: "idle",
THREAD_STATE_GIL_WAIT: "gil_wait",
THREAD_STATE_UNKNOWN: "unknown",
}
# Thread status flags
try:
from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED
except ImportError:
# Fallback for tests or when module is not available
THREAD_STATUS_HAS_GIL = (1 << 0)
THREAD_STATUS_ON_CPU = (1 << 1)
THREAD_STATUS_UNKNOWN = (1 << 2)
THREAD_STATUS_GIL_REQUESTED = (1 << 3)
class Collector(ABC):
@abstractmethod
@ -26,8 +23,14 @@ def _iter_all_frames(self, stack_frames, skip_idle=False):
"""Iterate over all frame stacks from all interpreters and threads."""
for interpreter_info in stack_frames:
for thread_info in interpreter_info.threads:
if skip_idle and thread_info.status != THREAD_STATE_RUNNING:
continue
# skip_idle now means: skip if thread is not actively running
# A thread is "active" if it has the GIL OR is on CPU
if skip_idle:
status_flags = thread_info.status
has_gil = bool(status_flags & THREAD_STATUS_HAS_GIL)
on_cpu = bool(status_flags & THREAD_STATUS_ON_CPU)
if not (has_gil or on_cpu):
continue
frames = thread_info.frame_info
if frames:
yield frames, thread_info.thread_id

View file

@ -1,9 +1,20 @@
import itertools
import json
import os
import platform
import sys
import threading
import time
from .collector import Collector, THREAD_STATE_RUNNING
from .collector import Collector
try:
from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED
except ImportError:
# Fallback if module not available (shouldn't happen in normal use)
THREAD_STATUS_HAS_GIL = (1 << 0)
THREAD_STATUS_ON_CPU = (1 << 1)
THREAD_STATUS_UNKNOWN = (1 << 2)
THREAD_STATUS_GIL_REQUESTED = (1 << 3)
# Categories matching Firefox Profiler expectations
@ -11,14 +22,20 @@
{"name": "Other", "color": "grey", "subcategories": ["Other"]},
{"name": "Python", "color": "yellow", "subcategories": ["Other"]},
{"name": "Native", "color": "blue", "subcategories": ["Other"]},
{"name": "Idle", "color": "transparent", "subcategories": ["Other"]},
{"name": "GC", "color": "orange", "subcategories": ["Other"]},
{"name": "GIL", "color": "green", "subcategories": ["Other"]},
{"name": "CPU", "color": "purple", "subcategories": ["Other"]},
{"name": "Code Type", "color": "red", "subcategories": ["Other"]},
]
# Category indices
CATEGORY_OTHER = 0
CATEGORY_PYTHON = 1
CATEGORY_NATIVE = 2
CATEGORY_IDLE = 3
CATEGORY_GC = 3
CATEGORY_GIL = 4
CATEGORY_CPU = 5
CATEGORY_CODE_TYPE = 6
# Subcategory indices
DEFAULT_SUBCATEGORY = 0
@ -58,6 +75,56 @@ def __init__(self, *, skip_idle=False):
self.last_sample_time = 0
self.interval = 1.0 # Will be calculated from actual sampling
# State tracking for interval markers (tid -> start_time)
self.has_gil_start = {} # Thread has the GIL
self.no_gil_start = {} # Thread doesn't have the GIL
self.on_cpu_start = {} # Thread is running on CPU
self.off_cpu_start = {} # Thread is off CPU
self.python_code_start = {} # Thread running Python code (has GIL)
self.native_code_start = {} # Thread running native code (on CPU without GIL)
self.gil_wait_start = {} # Thread waiting for GIL
# GC event tracking: track GC start time per thread
self.gc_start_per_thread = {} # tid -> start_time
# Track which threads have been initialized for state tracking
self.initialized_threads = set()
def _track_state_transition(self, tid, condition, active_dict, inactive_dict,
active_name, inactive_name, category, current_time):
"""Track binary state transitions and emit markers.
Args:
tid: Thread ID
condition: Whether the active state is true
active_dict: Dict tracking start time of active state
inactive_dict: Dict tracking start time of inactive state
active_name: Name for active state marker
inactive_name: Name for inactive state marker
category: Gecko category for the markers
current_time: Current timestamp
"""
# On first observation of a thread, just record the current state
# without creating a marker (we don't know what the previous state was)
if tid not in self.initialized_threads:
if condition:
active_dict[tid] = current_time
else:
inactive_dict[tid] = current_time
return
# For already-initialized threads, track transitions
if condition:
active_dict.setdefault(tid, current_time)
if tid in inactive_dict:
self._add_marker(tid, inactive_name, inactive_dict.pop(tid),
current_time, category)
else:
inactive_dict.setdefault(tid, current_time)
if tid in active_dict:
self._add_marker(tid, active_name, active_dict.pop(tid),
current_time, category)
def collect(self, stack_frames):
"""Collect a sample from stack frames."""
current_time = (time.time() * 1000) - self.start_time
@ -69,19 +136,12 @@ def collect(self, stack_frames):
) / self.sample_count
self.last_sample_time = current_time
# Process threads and track GC per thread
for interpreter_info in stack_frames:
for thread_info in interpreter_info.threads:
if (
self.skip_idle
and thread_info.status != THREAD_STATE_RUNNING
):
continue
frames = thread_info.frame_info
if not frames:
continue
tid = thread_info.thread_id
gc_collecting = thread_info.gc_collecting
# Initialize thread if needed
if tid not in self.threads:
@ -89,6 +149,80 @@ def collect(self, stack_frames):
thread_data = self.threads[tid]
# Decode status flags
status_flags = thread_info.status
has_gil = bool(status_flags & THREAD_STATUS_HAS_GIL)
on_cpu = bool(status_flags & THREAD_STATUS_ON_CPU)
gil_requested = bool(status_flags & THREAD_STATUS_GIL_REQUESTED)
# Track GIL possession (Has GIL / No GIL)
self._track_state_transition(
tid, has_gil, self.has_gil_start, self.no_gil_start,
"Has GIL", "No GIL", CATEGORY_GIL, current_time
)
# Track CPU state (On CPU / Off CPU)
self._track_state_transition(
tid, on_cpu, self.on_cpu_start, self.off_cpu_start,
"On CPU", "Off CPU", CATEGORY_CPU, current_time
)
# Track code type (Python Code / Native Code)
# This is tri-state: Python (has_gil), Native (on_cpu without gil), or Neither
if has_gil:
self._track_state_transition(
tid, True, self.python_code_start, self.native_code_start,
"Python Code", "Native Code", CATEGORY_CODE_TYPE, current_time
)
elif on_cpu:
self._track_state_transition(
tid, True, self.native_code_start, self.python_code_start,
"Native Code", "Python Code", CATEGORY_CODE_TYPE, current_time
)
else:
# Thread is idle (neither has GIL nor on CPU) - close any open code markers
# This handles the third state that _track_state_transition doesn't cover
if tid in self.initialized_threads:
if tid in self.python_code_start:
self._add_marker(tid, "Python Code", self.python_code_start.pop(tid),
current_time, CATEGORY_CODE_TYPE)
if tid in self.native_code_start:
self._add_marker(tid, "Native Code", self.native_code_start.pop(tid),
current_time, CATEGORY_CODE_TYPE)
# Track "Waiting for GIL" intervals (one-sided tracking)
if gil_requested:
self.gil_wait_start.setdefault(tid, current_time)
elif tid in self.gil_wait_start:
self._add_marker(tid, "Waiting for GIL", self.gil_wait_start.pop(tid),
current_time, CATEGORY_GIL)
# Track GC events - attribute to all threads that hold the GIL during GC
# (GC is interpreter-wide but runs on whichever thread(s) have the GIL)
# If GIL switches during GC, multiple threads will get GC markers
if gc_collecting and has_gil:
# Start GC marker if not already started for this thread
if tid not in self.gc_start_per_thread:
self.gc_start_per_thread[tid] = current_time
elif tid in self.gc_start_per_thread:
# End GC marker if it was running for this thread
# (either GC finished or thread lost GIL)
self._add_marker(tid, "GC Collecting", self.gc_start_per_thread.pop(tid),
current_time, CATEGORY_GC)
# Mark thread as initialized after processing all state transitions
self.initialized_threads.add(tid)
# Categorize: idle if neither has GIL nor on CPU
is_idle = not has_gil and not on_cpu
# Skip idle threads if skip_idle is enabled
if self.skip_idle and is_idle:
continue
if not frames:
continue
# Process the stack
stack_index = self._process_stack(thread_data, frames)
@ -102,7 +236,6 @@ def collect(self, stack_frames):
def _create_thread(self, tid):
"""Create a new thread structure with processed profile format."""
import threading
# Determine if this is the main thread
try:
@ -181,7 +314,7 @@ def _create_thread(self, tid):
"functionSize": [],
"length": 0,
},
# Markers - processed format
# Markers - processed format (arrays)
"markers": {
"data": [],
"name": [],
@ -215,6 +348,27 @@ def _intern_string(self, s):
self.global_string_map[s] = idx
return idx
def _add_marker(self, tid, name, start_time, end_time, category):
"""Add an interval marker for a specific thread."""
if tid not in self.threads:
return
thread_data = self.threads[tid]
duration = end_time - start_time
name_idx = self._intern_string(name)
markers = thread_data["markers"]
markers["name"].append(name_idx)
markers["startTime"].append(start_time)
markers["endTime"].append(end_time)
markers["phase"].append(1) # 1 = interval marker
markers["category"].append(category)
markers["data"].append({
"type": name.replace(" ", ""),
"duration": duration,
"tid": tid
})
def _process_stack(self, thread_data, frames):
"""Process a stack and return the stack index."""
if not frames:
@ -383,15 +537,63 @@ def _get_or_create_frame(self, thread_data, func_idx, lineno):
frame_cache[frame_key] = frame_idx
return frame_idx
def _finalize_markers(self):
"""Close any open markers at the end of profiling."""
end_time = self.last_sample_time
# Close all open markers for each thread using a generic approach
marker_states = [
(self.has_gil_start, "Has GIL", CATEGORY_GIL),
(self.no_gil_start, "No GIL", CATEGORY_GIL),
(self.on_cpu_start, "On CPU", CATEGORY_CPU),
(self.off_cpu_start, "Off CPU", CATEGORY_CPU),
(self.python_code_start, "Python Code", CATEGORY_CODE_TYPE),
(self.native_code_start, "Native Code", CATEGORY_CODE_TYPE),
(self.gil_wait_start, "Waiting for GIL", CATEGORY_GIL),
(self.gc_start_per_thread, "GC Collecting", CATEGORY_GC),
]
for state_dict, marker_name, category in marker_states:
for tid in list(state_dict.keys()):
self._add_marker(tid, marker_name, state_dict[tid], end_time, category)
del state_dict[tid]
def export(self, filename):
"""Export the profile to a Gecko JSON file."""
if self.sample_count > 0 and self.last_sample_time > 0:
self.interval = self.last_sample_time / self.sample_count
profile = self._build_profile()
# Spinner for progress indication
spinner = itertools.cycle(['', '', '', '', '', '', '', '', '', ''])
stop_spinner = threading.Event()
with open(filename, "w") as f:
json.dump(profile, f, separators=(",", ":"))
def spin():
message = 'Building Gecko profile...'
while not stop_spinner.is_set():
sys.stderr.write(f'\r{next(spinner)} {message}')
sys.stderr.flush()
time.sleep(0.1)
# Clear the spinner line
sys.stderr.write('\r' + ' ' * (len(message) + 3) + '\r')
sys.stderr.flush()
spinner_thread = threading.Thread(target=spin, daemon=True)
spinner_thread.start()
try:
# Finalize any open markers before building profile
self._finalize_markers()
profile = self._build_profile()
with open(filename, "w") as f:
json.dump(profile, f, separators=(",", ":"))
finally:
stop_spinner.set()
spinner_thread.join(timeout=1.0)
# Small delay to ensure the clear happens
time.sleep(0.01)
print(f"Gecko profile written to {filename}")
print(
@ -416,6 +618,7 @@ def _build_profile(self):
frame_table["length"] = len(frame_table["func"])
func_table["length"] = len(func_table["name"])
resource_table["length"] = len(resource_table["name"])
thread_data["markers"]["length"] = len(thread_data["markers"]["name"])
# Clean up internal caches
del thread_data["_stackCache"]

View file

@ -21,6 +21,7 @@
PROFILING_MODE_WALL = 0
PROFILING_MODE_CPU = 1
PROFILING_MODE_GIL = 2
PROFILING_MODE_ALL = 3 # Combines GIL + CPU checks
def _parse_mode(mode_string):
@ -136,18 +137,20 @@ def _run_with_sync(original_cmd):
class SampleProfiler:
def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL):
def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL, skip_non_matching_threads=True):
self.pid = pid
self.sample_interval_usec = sample_interval_usec
self.all_threads = all_threads
if _FREE_THREADED_BUILD:
self.unwinder = _remote_debugging.RemoteUnwinder(
self.pid, all_threads=self.all_threads, mode=mode
self.pid, all_threads=self.all_threads, mode=mode,
skip_non_matching_threads=skip_non_matching_threads
)
else:
only_active_threads = bool(self.all_threads)
self.unwinder = _remote_debugging.RemoteUnwinder(
self.pid, only_active_thread=only_active_threads, mode=mode
self.pid, only_active_thread=only_active_threads, mode=mode,
skip_non_matching_threads=skip_non_matching_threads
)
# Track sample intervals and total sample count
self.sample_intervals = deque(maxlen=100)
@ -614,14 +617,21 @@ def sample(
realtime_stats=False,
mode=PROFILING_MODE_WALL,
):
# PROFILING_MODE_ALL implies no skipping at all
if mode == PROFILING_MODE_ALL:
skip_non_matching_threads = False
skip_idle = False
else:
# Determine skip settings based on output format and mode
skip_non_matching_threads = output_format != "gecko"
skip_idle = mode != PROFILING_MODE_WALL
profiler = SampleProfiler(
pid, sample_interval_usec, all_threads=all_threads, mode=mode
pid, sample_interval_usec, all_threads=all_threads, mode=mode,
skip_non_matching_threads=skip_non_matching_threads
)
profiler.realtime_stats = realtime_stats
# Determine skip_idle for collector compatibility
skip_idle = mode != PROFILING_MODE_WALL
collector = None
match output_format:
case "pstats":
@ -633,7 +643,8 @@ def sample(
collector = FlamegraphCollector(skip_idle=skip_idle)
filename = filename or f"flamegraph.{pid}.html"
case "gecko":
collector = GeckoCollector(skip_idle=skip_idle)
# Gecko format never skips idle threads to show full thread states
collector = GeckoCollector(skip_idle=False)
filename = filename or f"gecko.{pid}.json"
case _:
raise ValueError(f"Invalid output format: {output_format}")
@ -882,6 +893,10 @@ def main():
if args.format in ("collapsed", "gecko"):
_validate_collapsed_format_args(args, parser)
# Validate that --mode is not used with --gecko
if args.format == "gecko" and args.mode != "wall":
parser.error("--mode option is incompatible with --gecko format. Gecko format automatically uses ALL mode (GIL + CPU analysis).")
sort_value = args.sort if args.sort is not None else 2
if args.module is not None and not args.module:
@ -900,7 +915,11 @@ def main():
elif target_count > 1:
parser.error("only one target type can be specified: -p/--pid, -m/--module, or script")
mode = _parse_mode(args.mode)
# Use PROFILING_MODE_ALL for gecko format, otherwise parse user's choice
if args.format == "gecko":
mode = PROFILING_MODE_ALL
else:
mode = _parse_mode(args.mode)
if args.pid:
sample(

View file

@ -23,6 +23,12 @@
PROFILING_MODE_WALL = 0
PROFILING_MODE_CPU = 1
PROFILING_MODE_GIL = 2
PROFILING_MODE_ALL = 3
# Thread status flags
THREAD_STATUS_HAS_GIL = (1 << 0)
THREAD_STATUS_ON_CPU = (1 << 1)
THREAD_STATUS_UNKNOWN = (1 << 2)
try:
from concurrent import interpreters
@ -1763,11 +1769,14 @@ def busy():
for thread_info in interpreter_info.threads:
statuses[thread_info.thread_id] = thread_info.status
# Check if sleeper thread is idle and busy thread is running
# Check if sleeper thread is off CPU and busy thread is on CPU
# In the new flags system:
# - sleeper should NOT have ON_CPU flag (off CPU)
# - busy should have ON_CPU flag
if (sleeper_tid in statuses and
busy_tid in statuses and
statuses[sleeper_tid] == 1 and
statuses[busy_tid] == 0):
not (statuses[sleeper_tid] & THREAD_STATUS_ON_CPU) and
(statuses[busy_tid] & THREAD_STATUS_ON_CPU)):
break
time.sleep(0.5) # Give a bit of time to let threads settle
except PermissionError:
@ -1779,8 +1788,8 @@ def busy():
self.assertIsNotNone(busy_tid, "Busy thread id not received")
self.assertIn(sleeper_tid, statuses, "Sleeper tid not found in sampled threads")
self.assertIn(busy_tid, statuses, "Busy tid not found in sampled threads")
self.assertEqual(statuses[sleeper_tid], 1, "Sleeper thread should be idle (1)")
self.assertEqual(statuses[busy_tid], 0, "Busy thread should be running (0)")
self.assertFalse(statuses[sleeper_tid] & THREAD_STATUS_ON_CPU, "Sleeper thread should be off CPU")
self.assertTrue(statuses[busy_tid] & THREAD_STATUS_ON_CPU, "Busy thread should be on CPU")
finally:
if client_socket is not None:
@ -1875,11 +1884,14 @@ def busy():
for thread_info in interpreter_info.threads:
statuses[thread_info.thread_id] = thread_info.status
# Check if sleeper thread is idle (status 2 for GIL mode) and busy thread is running
# Check if sleeper thread doesn't have GIL and busy thread has GIL
# In the new flags system:
# - sleeper should NOT have HAS_GIL flag (waiting for GIL)
# - busy should have HAS_GIL flag
if (sleeper_tid in statuses and
busy_tid in statuses and
statuses[sleeper_tid] == 2 and
statuses[busy_tid] == 0):
not (statuses[sleeper_tid] & THREAD_STATUS_HAS_GIL) and
(statuses[busy_tid] & THREAD_STATUS_HAS_GIL)):
break
time.sleep(0.5) # Give a bit of time to let threads settle
except PermissionError:
@ -1891,8 +1903,8 @@ def busy():
self.assertIsNotNone(busy_tid, "Busy thread id not received")
self.assertIn(sleeper_tid, statuses, "Sleeper tid not found in sampled threads")
self.assertIn(busy_tid, statuses, "Busy tid not found in sampled threads")
self.assertEqual(statuses[sleeper_tid], 2, "Sleeper thread should be idle (1)")
self.assertEqual(statuses[busy_tid], 0, "Busy thread should be running (0)")
self.assertFalse(statuses[sleeper_tid] & THREAD_STATUS_HAS_GIL, "Sleeper thread should not have GIL")
self.assertTrue(statuses[busy_tid] & THREAD_STATUS_HAS_GIL, "Busy thread should have GIL")
finally:
if client_socket is not None:
@ -1900,6 +1912,128 @@ def busy():
p.terminate()
p.wait(timeout=SHORT_TIMEOUT)
@unittest.skipIf(
sys.platform not in ("linux", "darwin", "win32"),
"Test only runs on supported platforms (Linux, macOS, or Windows)",
)
@unittest.skipIf(sys.platform == "android", "Android raises Linux-specific exception")
def test_thread_status_all_mode_detection(self):
port = find_unused_port()
script = textwrap.dedent(
f"""\
import socket
import threading
import time
import sys
def sleeper_thread():
conn = socket.create_connection(("localhost", {port}))
conn.sendall(b"sleeper:" + str(threading.get_native_id()).encode())
while True:
time.sleep(1)
def busy_thread():
conn = socket.create_connection(("localhost", {port}))
conn.sendall(b"busy:" + str(threading.get_native_id()).encode())
while True:
sum(range(100000))
t1 = threading.Thread(target=sleeper_thread)
t2 = threading.Thread(target=busy_thread)
t1.start()
t2.start()
t1.join()
t2.join()
"""
)
with os_helper.temp_dir() as tmp_dir:
script_file = make_script(tmp_dir, "script", script)
server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
server_socket.bind(("localhost", port))
server_socket.listen(2)
server_socket.settimeout(SHORT_TIMEOUT)
p = subprocess.Popen(
[sys.executable, script_file],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
client_sockets = []
try:
sleeper_tid = None
busy_tid = None
# Receive thread IDs from the child process
for _ in range(2):
client_socket, _ = server_socket.accept()
client_sockets.append(client_socket)
line = client_socket.recv(1024)
if line:
if line.startswith(b"sleeper:"):
try:
sleeper_tid = int(line.split(b":")[-1])
except Exception:
pass
elif line.startswith(b"busy:"):
try:
busy_tid = int(line.split(b":")[-1])
except Exception:
pass
server_socket.close()
attempts = 10
statuses = {}
try:
unwinder = RemoteUnwinder(p.pid, all_threads=True, mode=PROFILING_MODE_ALL,
skip_non_matching_threads=False)
for _ in range(attempts):
traces = unwinder.get_stack_trace()
# Find threads and their statuses
statuses = {}
for interpreter_info in traces:
for thread_info in interpreter_info.threads:
statuses[thread_info.thread_id] = thread_info.status
# Check ALL mode provides both GIL and CPU info
# - sleeper should NOT have ON_CPU and NOT have HAS_GIL
# - busy should have ON_CPU and have HAS_GIL
if (sleeper_tid in statuses and
busy_tid in statuses and
not (statuses[sleeper_tid] & THREAD_STATUS_ON_CPU) and
not (statuses[sleeper_tid] & THREAD_STATUS_HAS_GIL) and
(statuses[busy_tid] & THREAD_STATUS_ON_CPU) and
(statuses[busy_tid] & THREAD_STATUS_HAS_GIL)):
break
time.sleep(0.5)
except PermissionError:
self.skipTest(
"Insufficient permissions to read the stack trace"
)
self.assertIsNotNone(sleeper_tid, "Sleeper thread id not received")
self.assertIsNotNone(busy_tid, "Busy thread id not received")
self.assertIn(sleeper_tid, statuses, "Sleeper tid not found in sampled threads")
self.assertIn(busy_tid, statuses, "Busy tid not found in sampled threads")
# Sleeper thread: off CPU, no GIL
self.assertFalse(statuses[sleeper_tid] & THREAD_STATUS_ON_CPU, "Sleeper should be off CPU")
self.assertFalse(statuses[sleeper_tid] & THREAD_STATUS_HAS_GIL, "Sleeper should not have GIL")
# Busy thread: on CPU, has GIL
self.assertTrue(statuses[busy_tid] & THREAD_STATUS_ON_CPU, "Busy should be on CPU")
self.assertTrue(statuses[busy_tid] & THREAD_STATUS_HAS_GIL, "Busy should have GIL")
finally:
for client_socket in client_sockets:
client_socket.close()
p.terminate()
p.wait(timeout=SHORT_TIMEOUT)
p.stdout.close()
p.stderr.close()
if __name__ == "__main__":

View file

@ -63,12 +63,14 @@ def __repr__(self):
class MockThreadInfo:
"""Mock ThreadInfo for testing since the real one isn't accessible."""
def __init__(self, thread_id, frame_info):
def __init__(self, thread_id, frame_info, status=0, gc_collecting=False): # Default to THREAD_STATE_RUNNING (0)
self.thread_id = thread_id
self.frame_info = frame_info
self.status = status
self.gc_collecting = gc_collecting
def __repr__(self):
return f"MockThreadInfo(thread_id={self.thread_id}, frame_info={self.frame_info})"
return f"MockThreadInfo(thread_id={self.thread_id}, frame_info={self.frame_info}, status={self.status}, gc_collecting={self.gc_collecting})"
class MockInterpreterInfo:
@ -674,6 +676,97 @@ def test_gecko_collector_export(self):
self.assertIn("func2", string_array)
self.assertIn("other_func", string_array)
def test_gecko_collector_markers(self):
"""Test Gecko profile markers for GIL and CPU state tracking."""
try:
from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_GIL_REQUESTED
except ImportError:
THREAD_STATUS_HAS_GIL = (1 << 0)
THREAD_STATUS_ON_CPU = (1 << 1)
THREAD_STATUS_GIL_REQUESTED = (1 << 3)
collector = GeckoCollector()
# Status combinations for different thread states
HAS_GIL_ON_CPU = THREAD_STATUS_HAS_GIL | THREAD_STATUS_ON_CPU # Running Python code
NO_GIL_ON_CPU = THREAD_STATUS_ON_CPU # Running native code
WAITING_FOR_GIL = THREAD_STATUS_GIL_REQUESTED # Waiting for GIL
# Simulate thread state transitions
collector.collect([
MockInterpreterInfo(0, [
MockThreadInfo(1, [("test.py", 10, "python_func")], status=HAS_GIL_ON_CPU)
])
])
collector.collect([
MockInterpreterInfo(0, [
MockThreadInfo(1, [("test.py", 15, "wait_func")], status=WAITING_FOR_GIL)
])
])
collector.collect([
MockInterpreterInfo(0, [
MockThreadInfo(1, [("test.py", 20, "python_func2")], status=HAS_GIL_ON_CPU)
])
])
collector.collect([
MockInterpreterInfo(0, [
MockThreadInfo(1, [("native.c", 100, "native_func")], status=NO_GIL_ON_CPU)
])
])
profile_data = collector._build_profile()
# Verify we have threads with markers
self.assertIn("threads", profile_data)
self.assertEqual(len(profile_data["threads"]), 1)
thread_data = profile_data["threads"][0]
# Check markers exist
self.assertIn("markers", thread_data)
markers = thread_data["markers"]
# Should have marker arrays
self.assertIn("name", markers)
self.assertIn("startTime", markers)
self.assertIn("endTime", markers)
self.assertIn("category", markers)
self.assertGreater(markers["length"], 0, "Should have generated markers")
# Get marker names from string table
string_array = profile_data["shared"]["stringArray"]
marker_names = [string_array[idx] for idx in markers["name"]]
# Verify we have different marker types
marker_name_set = set(marker_names)
# Should have "Has GIL" markers (when thread had GIL)
self.assertIn("Has GIL", marker_name_set, "Should have 'Has GIL' markers")
# Should have "No GIL" markers (when thread didn't have GIL)
self.assertIn("No GIL", marker_name_set, "Should have 'No GIL' markers")
# Should have "On CPU" markers (when thread was on CPU)
self.assertIn("On CPU", marker_name_set, "Should have 'On CPU' markers")
# Should have "Waiting for GIL" markers (when thread was waiting)
self.assertIn("Waiting for GIL", marker_name_set, "Should have 'Waiting for GIL' markers")
# Verify marker structure
for i in range(markers["length"]):
# All markers should be interval markers (phase = 1)
self.assertEqual(markers["phase"][i], 1, f"Marker {i} should be interval marker")
# All markers should have valid time range
start_time = markers["startTime"][i]
end_time = markers["endTime"][i]
self.assertLessEqual(start_time, end_time, f"Marker {i} should have valid time range")
# All markers should have valid category
self.assertGreaterEqual(markers["category"][i], 0, f"Marker {i} should have valid category")
def test_pstats_collector_export(self):
collector = PstatsCollector(
sample_interval_usec=1000000
@ -2625,19 +2718,30 @@ def test_mode_validation(self):
def test_frames_filtered_with_skip_idle(self):
"""Test that frames are actually filtered when skip_idle=True."""
# Import thread status flags
try:
from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU
except ImportError:
THREAD_STATUS_HAS_GIL = (1 << 0)
THREAD_STATUS_ON_CPU = (1 << 1)
# Create mock frames with different thread statuses
class MockThreadInfoWithStatus:
def __init__(self, thread_id, frame_info, status):
self.thread_id = thread_id
self.frame_info = frame_info
self.status = status
self.gc_collecting = False
# Create test data: active thread (HAS_GIL | ON_CPU), idle thread (neither), and another active thread
ACTIVE_STATUS = THREAD_STATUS_HAS_GIL | THREAD_STATUS_ON_CPU # Has GIL and on CPU
IDLE_STATUS = 0 # Neither has GIL nor on CPU
# Create test data: running thread, idle thread, and another running thread
test_frames = [
MockInterpreterInfo(0, [
MockThreadInfoWithStatus(1, [MockFrameInfo("active1.py", 10, "active_func1")], 0), # RUNNING
MockThreadInfoWithStatus(2, [MockFrameInfo("idle.py", 20, "idle_func")], 1), # IDLE
MockThreadInfoWithStatus(3, [MockFrameInfo("active2.py", 30, "active_func2")], 0), # RUNNING
MockThreadInfoWithStatus(1, [MockFrameInfo("active1.py", 10, "active_func1")], ACTIVE_STATUS),
MockThreadInfoWithStatus(2, [MockFrameInfo("idle.py", 20, "idle_func")], IDLE_STATUS),
MockThreadInfoWithStatus(3, [MockFrameInfo("active2.py", 30, "active_func2")], ACTIVE_STATUS),
])
]

View file

@ -11,6 +11,7 @@
* HEADERS AND INCLUDES
* ============================================================================ */
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <stddef.h>
@ -81,6 +82,8 @@ typedef enum _WIN32_THREADSTATE {
#define SIZEOF_TYPE_OBJ sizeof(PyTypeObject)
#define SIZEOF_UNICODE_OBJ sizeof(PyUnicodeObject)
#define SIZEOF_LONG_OBJ sizeof(PyLongObject)
#define SIZEOF_GC_RUNTIME_STATE sizeof(struct _gc_runtime_state)
#define SIZEOF_INTERPRETER_STATE sizeof(PyInterpreterState)
// Calculate the minimum buffer size needed to read interpreter state fields
// We need to read code_object_generation and potentially tlbc_generation
@ -178,8 +181,9 @@ static PyStructSequence_Desc CoroInfo_desc = {
// ThreadInfo structseq type - replaces 2-tuple (thread_id, frame_info)
static PyStructSequence_Field ThreadInfo_fields[] = {
{"thread_id", "Thread ID"},
{"status", "Thread status"},
{"status", "Thread status (flags: HAS_GIL, ON_CPU, UNKNOWN or legacy enum)"},
{"frame_info", "Frame information"},
{"gc_collecting", "Whether GC is collecting (interpreter-level)"},
{NULL}
};
@ -187,7 +191,7 @@ static PyStructSequence_Desc ThreadInfo_desc = {
"_remote_debugging.ThreadInfo",
"Information about a thread",
ThreadInfo_fields,
2
3
};
// InterpreterInfo structseq type - replaces 2-tuple (interpreter_id, thread_list)
@ -247,9 +251,16 @@ enum _ThreadState {
enum _ProfilingMode {
PROFILING_MODE_WALL = 0,
PROFILING_MODE_CPU = 1,
PROFILING_MODE_GIL = 2
PROFILING_MODE_GIL = 2,
PROFILING_MODE_ALL = 3 // Combines GIL + CPU checks
};
// Thread status flags (can be combined)
#define THREAD_STATUS_HAS_GIL (1 << 0) // Thread has the GIL
#define THREAD_STATUS_ON_CPU (1 << 1) // Thread is running on CPU
#define THREAD_STATUS_UNKNOWN (1 << 2) // Status could not be determined
#define THREAD_STATUS_GIL_REQUESTED (1 << 3) // Thread is waiting for the GIL
typedef struct {
PyObject_HEAD
proc_handle_t handle;
@ -2650,34 +2661,70 @@ unwind_stack_for_thread(
long tid = GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.native_thread_id);
// Calculate thread status based on mode
int status = THREAD_STATE_UNKNOWN;
if (unwinder->mode == PROFILING_MODE_CPU) {
long pthread_id = GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.thread_id);
status = get_thread_status(unwinder, tid, pthread_id);
if (status == -1) {
PyErr_Print();
PyErr_SetString(PyExc_RuntimeError, "Failed to get thread status");
goto error;
}
} else if (unwinder->mode == PROFILING_MODE_GIL) {
// Read GC collecting state from the interpreter (before any skip checks)
uintptr_t interp_addr = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.interp);
// Read the GC runtime state from the interpreter state
uintptr_t gc_addr = interp_addr + unwinder->debug_offsets.interpreter_state.gc;
char gc_state[SIZEOF_GC_RUNTIME_STATE];
if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, gc_addr, unwinder->debug_offsets.gc.size, gc_state) < 0) {
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read GC state");
goto error;
}
int gc_collecting = GET_MEMBER(int, gc_state, unwinder->debug_offsets.gc.collecting);
// Calculate thread status using flags (always)
int status_flags = 0;
// Check GIL status
int has_gil = 0;
int gil_requested = 0;
#ifdef Py_GIL_DISABLED
// All threads are considered running in free threading builds if they have a thread state attached
int active = GET_MEMBER(_thread_status, ts, unwinder->debug_offsets.thread_state.status).active;
status = active ? THREAD_STATE_RUNNING : THREAD_STATE_GIL_WAIT;
int active = GET_MEMBER(_thread_status, ts, unwinder->debug_offsets.thread_state.status).active;
has_gil = active;
#else
status = (*current_tstate == gil_holder_tstate) ? THREAD_STATE_RUNNING : THREAD_STATE_GIL_WAIT;
// Read holds_gil directly from thread state
has_gil = GET_MEMBER(int, ts, unwinder->debug_offsets.thread_state.holds_gil);
// Check if thread is actively requesting the GIL
if (unwinder->debug_offsets.thread_state.gil_requested != 0) {
gil_requested = GET_MEMBER(int, ts, unwinder->debug_offsets.thread_state.gil_requested);
}
// Set GIL_REQUESTED flag if thread is waiting
if (!has_gil && gil_requested) {
status_flags |= THREAD_STATUS_GIL_REQUESTED;
}
#endif
} else {
// PROFILING_MODE_WALL - all threads are considered running
status = THREAD_STATE_RUNNING;
if (has_gil) {
status_flags |= THREAD_STATUS_HAS_GIL;
}
// Assert that we never have both HAS_GIL and GIL_REQUESTED set at the same time
// This would indicate a race condition in the GIL state tracking
assert(!(has_gil && gil_requested));
// Check CPU status
long pthread_id = GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.thread_id);
int cpu_status = get_thread_status(unwinder, tid, pthread_id);
if (cpu_status == -1) {
status_flags |= THREAD_STATUS_UNKNOWN;
} else if (cpu_status == THREAD_STATE_RUNNING) {
status_flags |= THREAD_STATUS_ON_CPU;
}
// Check if we should skip this thread based on mode
int should_skip = 0;
if (unwinder->skip_non_matching_threads && status != THREAD_STATE_RUNNING &&
(unwinder->mode == PROFILING_MODE_CPU || unwinder->mode == PROFILING_MODE_GIL)) {
should_skip = 1;
if (unwinder->skip_non_matching_threads) {
if (unwinder->mode == PROFILING_MODE_CPU) {
// Skip if not on CPU
should_skip = !(status_flags & THREAD_STATUS_ON_CPU);
} else if (unwinder->mode == PROFILING_MODE_GIL) {
// Skip if doesn't have GIL
should_skip = !(status_flags & THREAD_STATUS_HAS_GIL);
}
// PROFILING_MODE_WALL and PROFILING_MODE_ALL never skip
}
if (should_skip) {
@ -2719,16 +2766,25 @@ unwind_stack_for_thread(
goto error;
}
PyObject *py_status = PyLong_FromLong(status);
// Always use status_flags
PyObject *py_status = PyLong_FromLong(status_flags);
if (py_status == NULL) {
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create thread status");
goto error;
}
PyErr_Print();
PyObject *py_gc_collecting = PyBool_FromLong(gc_collecting);
if (py_gc_collecting == NULL) {
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create gc_collecting");
Py_DECREF(py_status);
goto error;
}
// py_status contains status flags (bitfield)
PyStructSequence_SetItem(result, 0, thread_id);
PyStructSequence_SetItem(result, 1, py_status); // Steals reference
PyStructSequence_SetItem(result, 2, frame_info); // Steals reference
PyStructSequence_SetItem(result, 3, py_gc_collecting); // Steals reference
cleanup_stack_chunks(&chunks);
return result;
@ -3401,6 +3457,21 @@ _remote_debugging_exec(PyObject *m)
if (rc < 0) {
return -1;
}
// Add thread status flag constants
if (PyModule_AddIntConstant(m, "THREAD_STATUS_HAS_GIL", THREAD_STATUS_HAS_GIL) < 0) {
return -1;
}
if (PyModule_AddIntConstant(m, "THREAD_STATUS_ON_CPU", THREAD_STATUS_ON_CPU) < 0) {
return -1;
}
if (PyModule_AddIntConstant(m, "THREAD_STATUS_UNKNOWN", THREAD_STATUS_UNKNOWN) < 0) {
return -1;
}
if (PyModule_AddIntConstant(m, "THREAD_STATUS_GIL_REQUESTED", THREAD_STATUS_GIL_REQUESTED) < 0) {
return -1;
}
if (RemoteDebugging_InitState(st) < 0) {
return -1;
}

View file

@ -207,6 +207,7 @@ drop_gil_impl(PyThreadState *tstate, struct _gil_runtime_state *gil)
_Py_atomic_store_int_relaxed(&gil->locked, 0);
if (tstate != NULL) {
tstate->holds_gil = 0;
tstate->gil_requested = 0;
}
COND_SIGNAL(gil->cond);
MUTEX_UNLOCK(gil->mutex);
@ -320,6 +321,8 @@ take_gil(PyThreadState *tstate)
MUTEX_LOCK(gil->mutex);
tstate->gil_requested = 1;
int drop_requested = 0;
while (_Py_atomic_load_int_relaxed(&gil->locked)) {
unsigned long saved_switchnum = gil->switch_number;
@ -407,6 +410,7 @@ take_gil(PyThreadState *tstate)
}
assert(_PyThreadState_CheckConsistency(tstate));
tstate->gil_requested = 0;
tstate->holds_gil = 1;
_Py_unset_eval_breaker_bit(tstate, _PY_GIL_DROP_REQUEST_BIT);
update_eval_breaker_for_thread(interp, tstate);