Add opcode utilities and --opcodes CLI flag

New opcode_utils.py maps opcode numbers to names and detects specialized variants using opcode module metadata. Adds normalize_location() and extract_lineno() helpers to collector base for uniform location handling. CLI gains --opcodes flag, validated against compatible formats (gecko, flamegraph, heatmap, live).
2025-12-08 06:10:17 +00:00 · 2025-12-03 03:43:10 +00:00 · 2025-12-03 03:43:10 +00:00 · 70f2ae025f
commit 70f2ae025f
parent dd27e5e679
5 changed files with 161 additions and 7 deletions
--- a/Lib/profiling/sampling/cli.py
+++ b/Lib/profiling/sampling/cli.py
@ -195,6 +195,12 @@ def _add_sampling_options(parser):
        dest="gc",
        help='Don\'t include artificial "<GC>" frames to denote active garbage collection',
    )
    sampling_group.add_argument(
        "--opcodes",
        action="store_true",
        help="Gather bytecode opcode information for instruction-level profiling "
        "(shows which bytecode instructions are executing, including specializations).",
    )
 def _add_mode_options(parser):
@ -304,13 +310,15 @@ def _sort_to_mode(sort_choice):
    return sort_map.get(sort_choice, SORT_MODE_NSAMPLES)
-def _create_collector(format_type, interval, skip_idle):
+def _create_collector(format_type, interval, skip_idle, opcodes=False):
    """Create the appropriate collector based on format type.
    Args:
-        format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko')
+        format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko', 'heatmap')
        interval: Sampling interval in microseconds
        skip_idle: Whether to skip idle samples
        opcodes: Whether to collect opcode information (only used by gecko format
                 for creating interval markers in Firefox Profiler)
    Returns:
        A collector instance of the appropriate type
@ -320,8 +328,10 @@ def _create_collector(format_type, interval, skip_idle):
        raise ValueError(f"Unknown format: {format_type}")
    # Gecko format never skips idle (it needs both GIL and CPU data)
    # and is the only format that uses opcodes for interval markers
    if format_type == "gecko":
        skip_idle = False
        return collector_class(interval, skip_idle=skip_idle, opcodes=opcodes)
    return collector_class(interval, skip_idle=skip_idle)
@ -413,6 +423,13 @@ def _validate_args(args, parser):
            "Gecko format automatically includes both GIL-holding and CPU status analysis."
        )
    # Validate --opcodes is only used with compatible formats
    opcodes_compatible_formats = ("live", "gecko", "flamegraph", "heatmap")
    if args.opcodes and args.format not in opcodes_compatible_formats:
        parser.error(
            f"--opcodes is only compatible with {', '.join('--' + f for f in opcodes_compatible_formats)}."
        )
    # Validate pstats-specific options are only used with pstats format
    if args.format != "pstats":
        issues = []
@ -560,7 +577,7 @@ def _handle_attach(args):
    )
    # Create the appropriate collector
-    collector = _create_collector(args.format, args.interval, skip_idle)
+    collector = _create_collector(args.format, args.interval, skip_idle, args.opcodes)
    # Sample the process
    collector = sample(
@ -572,6 +589,7 @@ def _handle_attach(args):
        mode=mode,
        native=args.native,
        gc=args.gc,
        opcodes=args.opcodes,
    )
    # Handle output
@ -607,7 +625,7 @@ def _handle_run(args):
    )
    # Create the appropriate collector
-    collector = _create_collector(args.format, args.interval, skip_idle)
+    collector = _create_collector(args.format, args.interval, skip_idle, args.opcodes)
    # Profile the subprocess
    try:
@ -620,6 +638,7 @@ def _handle_run(args):
            mode=mode,
            native=args.native,
            gc=args.gc,
            opcodes=args.opcodes,
        )
        # Handle output
@ -650,6 +669,7 @@ def _handle_live_attach(args, pid):
        limit=20,  # Default limit
        pid=pid,
        mode=mode,
        opcodes=args.opcodes,
    )
    # Sample in live mode
@ -662,6 +682,7 @@ def _handle_live_attach(args, pid):
        mode=mode,
        native=args.native,
        gc=args.gc,
        opcodes=args.opcodes,
    )
@ -689,6 +710,7 @@ def _handle_live_run(args):
        limit=20,  # Default limit
        pid=process.pid,
        mode=mode,
        opcodes=args.opcodes,
    )
    # Profile the subprocess in live mode
@ -702,6 +724,7 @@ def _handle_live_run(args):
            mode=mode,
            native=args.native,
            gc=args.gc,
            opcodes=args.opcodes,
        )
    finally:
        # Clean up the subprocess
--- a/Lib/profiling/sampling/collector.py
+++ b/Lib/profiling/sampling/collector.py
@ -1,11 +1,40 @@
 from abc import ABC, abstractmethod
 from .constants import (
    DEFAULT_LOCATION,
    THREAD_STATUS_HAS_GIL,
    THREAD_STATUS_ON_CPU,
    THREAD_STATUS_UNKNOWN,
    THREAD_STATUS_GIL_REQUESTED,
 )
 def normalize_location(location):
    """Normalize location to a 4-tuple format.
    Args:
        location: tuple (lineno, end_lineno, col_offset, end_col_offset) or None
    Returns:
        tuple: (lineno, end_lineno, col_offset, end_col_offset)
    """
    if location is None:
        return DEFAULT_LOCATION
    return location
 def extract_lineno(location):
    """Extract lineno from location.
    Args:
        location: tuple (lineno, end_lineno, col_offset, end_col_offset) or None
    Returns:
        int: The line number (0 for synthetic frames)
    """
    if location is None:
        return 0
    return location[0]
 class Collector(ABC):
    @abstractmethod
    def collect(self, stack_frames):
--- a/Lib/profiling/sampling/constants.py
+++ b/Lib/profiling/sampling/constants.py
@ -14,6 +14,10 @@
 SORT_MODE_CUMUL_PCT = 4
 SORT_MODE_NSAMPLES_CUMUL = 5
 # Default location for synthetic frames (native, GC) that have no source location
 # Format: (lineno, end_lineno, col_offset, end_col_offset)
 DEFAULT_LOCATION = (0, 0, -1, -1)
 # Thread status flags
 try:
    from _remote_debugging import (
--- a/Lib/profiling/sampling/opcode_utils.py
+++ b/Lib/profiling/sampling/opcode_utils.py
@ -0,0 +1,92 @@
 """Opcode utilities for bytecode-level profiler visualization.
 This module provides utilities to get opcode names and detect specialization
 status using the opcode module's metadata. Used by heatmap and flamegraph
 collectors to display which bytecode instructions are executing at each
 source line, including Python's adaptive specialization optimizations.
 """
 import opcode
 # Build opcode name mapping: opcode number -> opcode name
 # This includes both standard opcodes and specialized variants (Python 3.11+)
 _OPCODE_NAMES = dict(enumerate(opcode.opname))
 if hasattr(opcode, '_specialized_opmap'):
    for name, op in opcode._specialized_opmap.items():
        _OPCODE_NAMES[op] = name
 # Build deopt mapping: specialized opcode number -> base opcode number
 # Python 3.11+ uses adaptive specialization where generic opcodes like
 # LOAD_ATTR can be replaced at runtime with specialized variants like
 # LOAD_ATTR_INSTANCE_VALUE. This mapping lets us show both forms.
 _DEOPT_MAP = {}
 if hasattr(opcode, '_specializations') and hasattr(opcode, '_specialized_opmap'):
    for base_name, variant_names in opcode._specializations.items():
        base_opcode = opcode.opmap.get(base_name)
        if base_opcode is not None:
            for variant_name in variant_names:
                variant_opcode = opcode._specialized_opmap.get(variant_name)
                if variant_opcode is not None:
                    _DEOPT_MAP[variant_opcode] = base_opcode
 def get_opcode_info(opcode_num):
    """Get opcode name and specialization info from an opcode number.
    Args:
        opcode_num: The opcode number (0-255 or higher for specialized)
    Returns:
        A dict with keys:
        - 'opname': The opcode name (e.g., 'LOAD_ATTR_INSTANCE_VALUE')
        - 'base_opname': The base opcode name (e.g., 'LOAD_ATTR')
        - 'is_specialized': True if this is a specialized instruction
    """
    opname = _OPCODE_NAMES.get(opcode_num)
    if opname is None:
        return {
            'opname': f'<{opcode_num}>',
            'base_opname': f'<{opcode_num}>',
            'is_specialized': False,
        }
    base_opcode = _DEOPT_MAP.get(opcode_num)
    if base_opcode is not None:
        base_opname = _OPCODE_NAMES.get(base_opcode, f'<{base_opcode}>')
        return {
            'opname': opname,
            'base_opname': base_opname,
            'is_specialized': True,
        }
    return {
        'opname': opname,
        'base_opname': opname,
        'is_specialized': False,
    }
 def format_opcode(opcode_num):
    """Format an opcode for display, showing base opcode for specialized ones.
    Args:
        opcode_num: The opcode number (0-255 or higher for specialized)
    Returns:
        A formatted string like 'LOAD_ATTR' or 'LOAD_ATTR_INSTANCE_VALUE (LOAD_ATTR)'
    """
    info = get_opcode_info(opcode_num)
    if info['is_specialized']:
        return f"{info['opname']} ({info['base_opname']})"
    return info['opname']
 def get_opcode_mapping():
    """Get opcode name and deopt mappings for JavaScript consumption.
    Returns:
        A dict with keys:
        - 'names': Dict mapping opcode numbers to opcode names
        - 'deopt': Dict mapping specialized opcode numbers to base opcode numbers
    """
    return {"names": _OPCODE_NAMES, "deopt": _DEOPT_MAP}
--- a/Lib/profiling/sampling/sample.py
+++ b/Lib/profiling/sampling/sample.py
@ -27,7 +27,7 @@
 class SampleProfiler:
-    def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL, native=False, gc=True, skip_non_matching_threads=True):
+    def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL, native=False, gc=True, opcodes=False, skip_non_matching_threads=True):
        self.pid = pid
        self.sample_interval_usec = sample_interval_usec
        self.all_threads = all_threads
@ -35,13 +35,13 @@ def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MOD
        if _FREE_THREADED_BUILD:
            self.unwinder = _remote_debugging.RemoteUnwinder(
                self.pid, all_threads=self.all_threads, mode=mode, native=native, gc=gc,
-                skip_non_matching_threads=skip_non_matching_threads
+                opcodes=opcodes, skip_non_matching_threads=skip_non_matching_threads
            )
        else:
            only_active_threads = bool(self.all_threads)
            self.unwinder = _remote_debugging.RemoteUnwinder(
                self.pid, only_active_thread=only_active_threads, mode=mode, native=native, gc=gc,
-                skip_non_matching_threads=skip_non_matching_threads
+                opcodes=opcodes, skip_non_matching_threads=skip_non_matching_threads
            )
        # Track sample intervals and total sample count
        self.sample_intervals = deque(maxlen=100)
@ -193,6 +193,7 @@ def sample(
    mode=PROFILING_MODE_WALL,
    native=False,
    gc=True,
    opcodes=False,
 ):
    """Sample a process using the provided collector.
@ -206,6 +207,7 @@ def sample(
              GIL (only when holding GIL), ALL (includes GIL and CPU status)
        native: Whether to include native frames
        gc: Whether to include GC frames
        opcodes: Whether to include opcode information
    Returns:
        The collector with collected samples
@ -228,6 +230,7 @@ def sample(
        mode=mode,
        native=native,
        gc=gc,
        opcodes=opcodes,
        skip_non_matching_threads=skip_non_matching_threads
    )
    profiler.realtime_stats = realtime_stats
@ -248,6 +251,7 @@ def sample_live(
    mode=PROFILING_MODE_WALL,
    native=False,
    gc=True,
    opcodes=False,
 ):
    """Sample a process in live/interactive mode with curses TUI.
@ -261,6 +265,7 @@ def sample_live(
              GIL (only when holding GIL), ALL (includes GIL and CPU status)
        native: Whether to include native frames
        gc: Whether to include GC frames
        opcodes: Whether to include opcode information
    Returns:
        The collector with collected samples
@ -283,6 +288,7 @@ def sample_live(
        mode=mode,
        native=native,
        gc=gc,
        opcodes=opcodes,
        skip_non_matching_threads=skip_non_matching_threads
    )
    profiler.realtime_stats = realtime_stats