Add opcode utilities and --opcodes CLI flag

New opcode_utils.py maps opcode numbers to names and detects specialized
variants using opcode module metadata. Adds normalize_location() and
extract_lineno() helpers to collector base for uniform location handling.

CLI gains --opcodes flag, validated against compatible formats (gecko,
flamegraph, heatmap, live).
This commit is contained in:
Pablo Galindo Salgado 2025-12-03 03:43:10 +00:00
parent dd27e5e679
commit 70f2ae025f
5 changed files with 161 additions and 7 deletions

View file

@ -195,6 +195,12 @@ def _add_sampling_options(parser):
dest="gc", dest="gc",
help='Don\'t include artificial "<GC>" frames to denote active garbage collection', help='Don\'t include artificial "<GC>" frames to denote active garbage collection',
) )
sampling_group.add_argument(
"--opcodes",
action="store_true",
help="Gather bytecode opcode information for instruction-level profiling "
"(shows which bytecode instructions are executing, including specializations).",
)
def _add_mode_options(parser): def _add_mode_options(parser):
@ -304,13 +310,15 @@ def _sort_to_mode(sort_choice):
return sort_map.get(sort_choice, SORT_MODE_NSAMPLES) return sort_map.get(sort_choice, SORT_MODE_NSAMPLES)
def _create_collector(format_type, interval, skip_idle): def _create_collector(format_type, interval, skip_idle, opcodes=False):
"""Create the appropriate collector based on format type. """Create the appropriate collector based on format type.
Args: Args:
format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko') format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko', 'heatmap')
interval: Sampling interval in microseconds interval: Sampling interval in microseconds
skip_idle: Whether to skip idle samples skip_idle: Whether to skip idle samples
opcodes: Whether to collect opcode information (only used by gecko format
for creating interval markers in Firefox Profiler)
Returns: Returns:
A collector instance of the appropriate type A collector instance of the appropriate type
@ -320,8 +328,10 @@ def _create_collector(format_type, interval, skip_idle):
raise ValueError(f"Unknown format: {format_type}") raise ValueError(f"Unknown format: {format_type}")
# Gecko format never skips idle (it needs both GIL and CPU data) # Gecko format never skips idle (it needs both GIL and CPU data)
# and is the only format that uses opcodes for interval markers
if format_type == "gecko": if format_type == "gecko":
skip_idle = False skip_idle = False
return collector_class(interval, skip_idle=skip_idle, opcodes=opcodes)
return collector_class(interval, skip_idle=skip_idle) return collector_class(interval, skip_idle=skip_idle)
@ -413,6 +423,13 @@ def _validate_args(args, parser):
"Gecko format automatically includes both GIL-holding and CPU status analysis." "Gecko format automatically includes both GIL-holding and CPU status analysis."
) )
# Validate --opcodes is only used with compatible formats
opcodes_compatible_formats = ("live", "gecko", "flamegraph", "heatmap")
if args.opcodes and args.format not in opcodes_compatible_formats:
parser.error(
f"--opcodes is only compatible with {', '.join('--' + f for f in opcodes_compatible_formats)}."
)
# Validate pstats-specific options are only used with pstats format # Validate pstats-specific options are only used with pstats format
if args.format != "pstats": if args.format != "pstats":
issues = [] issues = []
@ -560,7 +577,7 @@ def _handle_attach(args):
) )
# Create the appropriate collector # Create the appropriate collector
collector = _create_collector(args.format, args.interval, skip_idle) collector = _create_collector(args.format, args.interval, skip_idle, args.opcodes)
# Sample the process # Sample the process
collector = sample( collector = sample(
@ -572,6 +589,7 @@ def _handle_attach(args):
mode=mode, mode=mode,
native=args.native, native=args.native,
gc=args.gc, gc=args.gc,
opcodes=args.opcodes,
) )
# Handle output # Handle output
@ -607,7 +625,7 @@ def _handle_run(args):
) )
# Create the appropriate collector # Create the appropriate collector
collector = _create_collector(args.format, args.interval, skip_idle) collector = _create_collector(args.format, args.interval, skip_idle, args.opcodes)
# Profile the subprocess # Profile the subprocess
try: try:
@ -620,6 +638,7 @@ def _handle_run(args):
mode=mode, mode=mode,
native=args.native, native=args.native,
gc=args.gc, gc=args.gc,
opcodes=args.opcodes,
) )
# Handle output # Handle output
@ -650,6 +669,7 @@ def _handle_live_attach(args, pid):
limit=20, # Default limit limit=20, # Default limit
pid=pid, pid=pid,
mode=mode, mode=mode,
opcodes=args.opcodes,
) )
# Sample in live mode # Sample in live mode
@ -662,6 +682,7 @@ def _handle_live_attach(args, pid):
mode=mode, mode=mode,
native=args.native, native=args.native,
gc=args.gc, gc=args.gc,
opcodes=args.opcodes,
) )
@ -689,6 +710,7 @@ def _handle_live_run(args):
limit=20, # Default limit limit=20, # Default limit
pid=process.pid, pid=process.pid,
mode=mode, mode=mode,
opcodes=args.opcodes,
) )
# Profile the subprocess in live mode # Profile the subprocess in live mode
@ -702,6 +724,7 @@ def _handle_live_run(args):
mode=mode, mode=mode,
native=args.native, native=args.native,
gc=args.gc, gc=args.gc,
opcodes=args.opcodes,
) )
finally: finally:
# Clean up the subprocess # Clean up the subprocess

View file

@ -1,11 +1,40 @@
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from .constants import ( from .constants import (
DEFAULT_LOCATION,
THREAD_STATUS_HAS_GIL, THREAD_STATUS_HAS_GIL,
THREAD_STATUS_ON_CPU, THREAD_STATUS_ON_CPU,
THREAD_STATUS_UNKNOWN, THREAD_STATUS_UNKNOWN,
THREAD_STATUS_GIL_REQUESTED, THREAD_STATUS_GIL_REQUESTED,
) )
def normalize_location(location):
"""Normalize location to a 4-tuple format.
Args:
location: tuple (lineno, end_lineno, col_offset, end_col_offset) or None
Returns:
tuple: (lineno, end_lineno, col_offset, end_col_offset)
"""
if location is None:
return DEFAULT_LOCATION
return location
def extract_lineno(location):
"""Extract lineno from location.
Args:
location: tuple (lineno, end_lineno, col_offset, end_col_offset) or None
Returns:
int: The line number (0 for synthetic frames)
"""
if location is None:
return 0
return location[0]
class Collector(ABC): class Collector(ABC):
@abstractmethod @abstractmethod
def collect(self, stack_frames): def collect(self, stack_frames):

View file

@ -14,6 +14,10 @@
SORT_MODE_CUMUL_PCT = 4 SORT_MODE_CUMUL_PCT = 4
SORT_MODE_NSAMPLES_CUMUL = 5 SORT_MODE_NSAMPLES_CUMUL = 5
# Default location for synthetic frames (native, GC) that have no source location
# Format: (lineno, end_lineno, col_offset, end_col_offset)
DEFAULT_LOCATION = (0, 0, -1, -1)
# Thread status flags # Thread status flags
try: try:
from _remote_debugging import ( from _remote_debugging import (

View file

@ -0,0 +1,92 @@
"""Opcode utilities for bytecode-level profiler visualization.
This module provides utilities to get opcode names and detect specialization
status using the opcode module's metadata. Used by heatmap and flamegraph
collectors to display which bytecode instructions are executing at each
source line, including Python's adaptive specialization optimizations.
"""
import opcode
# Build opcode name mapping: opcode number -> opcode name
# This includes both standard opcodes and specialized variants (Python 3.11+)
_OPCODE_NAMES = dict(enumerate(opcode.opname))
if hasattr(opcode, '_specialized_opmap'):
for name, op in opcode._specialized_opmap.items():
_OPCODE_NAMES[op] = name
# Build deopt mapping: specialized opcode number -> base opcode number
# Python 3.11+ uses adaptive specialization where generic opcodes like
# LOAD_ATTR can be replaced at runtime with specialized variants like
# LOAD_ATTR_INSTANCE_VALUE. This mapping lets us show both forms.
_DEOPT_MAP = {}
if hasattr(opcode, '_specializations') and hasattr(opcode, '_specialized_opmap'):
for base_name, variant_names in opcode._specializations.items():
base_opcode = opcode.opmap.get(base_name)
if base_opcode is not None:
for variant_name in variant_names:
variant_opcode = opcode._specialized_opmap.get(variant_name)
if variant_opcode is not None:
_DEOPT_MAP[variant_opcode] = base_opcode
def get_opcode_info(opcode_num):
"""Get opcode name and specialization info from an opcode number.
Args:
opcode_num: The opcode number (0-255 or higher for specialized)
Returns:
A dict with keys:
- 'opname': The opcode name (e.g., 'LOAD_ATTR_INSTANCE_VALUE')
- 'base_opname': The base opcode name (e.g., 'LOAD_ATTR')
- 'is_specialized': True if this is a specialized instruction
"""
opname = _OPCODE_NAMES.get(opcode_num)
if opname is None:
return {
'opname': f'<{opcode_num}>',
'base_opname': f'<{opcode_num}>',
'is_specialized': False,
}
base_opcode = _DEOPT_MAP.get(opcode_num)
if base_opcode is not None:
base_opname = _OPCODE_NAMES.get(base_opcode, f'<{base_opcode}>')
return {
'opname': opname,
'base_opname': base_opname,
'is_specialized': True,
}
return {
'opname': opname,
'base_opname': opname,
'is_specialized': False,
}
def format_opcode(opcode_num):
"""Format an opcode for display, showing base opcode for specialized ones.
Args:
opcode_num: The opcode number (0-255 or higher for specialized)
Returns:
A formatted string like 'LOAD_ATTR' or 'LOAD_ATTR_INSTANCE_VALUE (LOAD_ATTR)'
"""
info = get_opcode_info(opcode_num)
if info['is_specialized']:
return f"{info['opname']} ({info['base_opname']})"
return info['opname']
def get_opcode_mapping():
"""Get opcode name and deopt mappings for JavaScript consumption.
Returns:
A dict with keys:
- 'names': Dict mapping opcode numbers to opcode names
- 'deopt': Dict mapping specialized opcode numbers to base opcode numbers
"""
return {"names": _OPCODE_NAMES, "deopt": _DEOPT_MAP}

View file

@ -27,7 +27,7 @@
class SampleProfiler: class SampleProfiler:
def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL, native=False, gc=True, skip_non_matching_threads=True): def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL, native=False, gc=True, opcodes=False, skip_non_matching_threads=True):
self.pid = pid self.pid = pid
self.sample_interval_usec = sample_interval_usec self.sample_interval_usec = sample_interval_usec
self.all_threads = all_threads self.all_threads = all_threads
@ -35,13 +35,13 @@ def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MOD
if _FREE_THREADED_BUILD: if _FREE_THREADED_BUILD:
self.unwinder = _remote_debugging.RemoteUnwinder( self.unwinder = _remote_debugging.RemoteUnwinder(
self.pid, all_threads=self.all_threads, mode=mode, native=native, gc=gc, self.pid, all_threads=self.all_threads, mode=mode, native=native, gc=gc,
skip_non_matching_threads=skip_non_matching_threads opcodes=opcodes, skip_non_matching_threads=skip_non_matching_threads
) )
else: else:
only_active_threads = bool(self.all_threads) only_active_threads = bool(self.all_threads)
self.unwinder = _remote_debugging.RemoteUnwinder( self.unwinder = _remote_debugging.RemoteUnwinder(
self.pid, only_active_thread=only_active_threads, mode=mode, native=native, gc=gc, self.pid, only_active_thread=only_active_threads, mode=mode, native=native, gc=gc,
skip_non_matching_threads=skip_non_matching_threads opcodes=opcodes, skip_non_matching_threads=skip_non_matching_threads
) )
# Track sample intervals and total sample count # Track sample intervals and total sample count
self.sample_intervals = deque(maxlen=100) self.sample_intervals = deque(maxlen=100)
@ -193,6 +193,7 @@ def sample(
mode=PROFILING_MODE_WALL, mode=PROFILING_MODE_WALL,
native=False, native=False,
gc=True, gc=True,
opcodes=False,
): ):
"""Sample a process using the provided collector. """Sample a process using the provided collector.
@ -206,6 +207,7 @@ def sample(
GIL (only when holding GIL), ALL (includes GIL and CPU status) GIL (only when holding GIL), ALL (includes GIL and CPU status)
native: Whether to include native frames native: Whether to include native frames
gc: Whether to include GC frames gc: Whether to include GC frames
opcodes: Whether to include opcode information
Returns: Returns:
The collector with collected samples The collector with collected samples
@ -228,6 +230,7 @@ def sample(
mode=mode, mode=mode,
native=native, native=native,
gc=gc, gc=gc,
opcodes=opcodes,
skip_non_matching_threads=skip_non_matching_threads skip_non_matching_threads=skip_non_matching_threads
) )
profiler.realtime_stats = realtime_stats profiler.realtime_stats = realtime_stats
@ -248,6 +251,7 @@ def sample_live(
mode=PROFILING_MODE_WALL, mode=PROFILING_MODE_WALL,
native=False, native=False,
gc=True, gc=True,
opcodes=False,
): ):
"""Sample a process in live/interactive mode with curses TUI. """Sample a process in live/interactive mode with curses TUI.
@ -261,6 +265,7 @@ def sample_live(
GIL (only when holding GIL), ALL (includes GIL and CPU status) GIL (only when holding GIL), ALL (includes GIL and CPU status)
native: Whether to include native frames native: Whether to include native frames
gc: Whether to include GC frames gc: Whether to include GC frames
opcodes: Whether to include opcode information
Returns: Returns:
The collector with collected samples The collector with collected samples
@ -283,6 +288,7 @@ def sample_live(
mode=mode, mode=mode,
native=native, native=native,
gc=gc, gc=gc,
opcodes=opcodes,
skip_non_matching_threads=skip_non_matching_threads skip_non_matching_threads=skip_non_matching_threads
) )
profiler.realtime_stats = realtime_stats profiler.realtime_stats = realtime_stats