diff --git a/Lib/profiling/sampling/cli.py b/Lib/profiling/sampling/cli.py index 5c0e39d7737..2de132f3ec1 100644 --- a/Lib/profiling/sampling/cli.py +++ b/Lib/profiling/sampling/cli.py @@ -195,6 +195,12 @@ def _add_sampling_options(parser): dest="gc", help='Don\'t include artificial "" frames to denote active garbage collection', ) + sampling_group.add_argument( + "--opcodes", + action="store_true", + help="Gather bytecode opcode information for instruction-level profiling " + "(shows which bytecode instructions are executing, including specializations).", + ) def _add_mode_options(parser): @@ -304,13 +310,15 @@ def _sort_to_mode(sort_choice): return sort_map.get(sort_choice, SORT_MODE_NSAMPLES) -def _create_collector(format_type, interval, skip_idle): +def _create_collector(format_type, interval, skip_idle, opcodes=False): """Create the appropriate collector based on format type. Args: - format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko') + format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko', 'heatmap') interval: Sampling interval in microseconds skip_idle: Whether to skip idle samples + opcodes: Whether to collect opcode information (only used by gecko format + for creating interval markers in Firefox Profiler) Returns: A collector instance of the appropriate type @@ -320,8 +328,10 @@ def _create_collector(format_type, interval, skip_idle): raise ValueError(f"Unknown format: {format_type}") # Gecko format never skips idle (it needs both GIL and CPU data) + # and is the only format that uses opcodes for interval markers if format_type == "gecko": skip_idle = False + return collector_class(interval, skip_idle=skip_idle, opcodes=opcodes) return collector_class(interval, skip_idle=skip_idle) @@ -413,6 +423,13 @@ def _validate_args(args, parser): "Gecko format automatically includes both GIL-holding and CPU status analysis." ) + # Validate --opcodes is only used with compatible formats + opcodes_compatible_formats = ("live", "gecko", "flamegraph", "heatmap") + if args.opcodes and args.format not in opcodes_compatible_formats: + parser.error( + f"--opcodes is only compatible with {', '.join('--' + f for f in opcodes_compatible_formats)}." + ) + # Validate pstats-specific options are only used with pstats format if args.format != "pstats": issues = [] @@ -560,7 +577,7 @@ def _handle_attach(args): ) # Create the appropriate collector - collector = _create_collector(args.format, args.interval, skip_idle) + collector = _create_collector(args.format, args.interval, skip_idle, args.opcodes) # Sample the process collector = sample( @@ -572,6 +589,7 @@ def _handle_attach(args): mode=mode, native=args.native, gc=args.gc, + opcodes=args.opcodes, ) # Handle output @@ -607,7 +625,7 @@ def _handle_run(args): ) # Create the appropriate collector - collector = _create_collector(args.format, args.interval, skip_idle) + collector = _create_collector(args.format, args.interval, skip_idle, args.opcodes) # Profile the subprocess try: @@ -620,6 +638,7 @@ def _handle_run(args): mode=mode, native=args.native, gc=args.gc, + opcodes=args.opcodes, ) # Handle output @@ -650,6 +669,7 @@ def _handle_live_attach(args, pid): limit=20, # Default limit pid=pid, mode=mode, + opcodes=args.opcodes, ) # Sample in live mode @@ -662,6 +682,7 @@ def _handle_live_attach(args, pid): mode=mode, native=args.native, gc=args.gc, + opcodes=args.opcodes, ) @@ -689,6 +710,7 @@ def _handle_live_run(args): limit=20, # Default limit pid=process.pid, mode=mode, + opcodes=args.opcodes, ) # Profile the subprocess in live mode @@ -702,6 +724,7 @@ def _handle_live_run(args): mode=mode, native=args.native, gc=args.gc, + opcodes=args.opcodes, ) finally: # Clean up the subprocess diff --git a/Lib/profiling/sampling/collector.py b/Lib/profiling/sampling/collector.py index 6187f351cb5..6dd3a2a288f 100644 --- a/Lib/profiling/sampling/collector.py +++ b/Lib/profiling/sampling/collector.py @@ -1,11 +1,40 @@ from abc import ABC, abstractmethod from .constants import ( + DEFAULT_LOCATION, THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED, ) + +def normalize_location(location): + """Normalize location to a 4-tuple format. + + Args: + location: tuple (lineno, end_lineno, col_offset, end_col_offset) or None + + Returns: + tuple: (lineno, end_lineno, col_offset, end_col_offset) + """ + if location is None: + return DEFAULT_LOCATION + return location + + +def extract_lineno(location): + """Extract lineno from location. + + Args: + location: tuple (lineno, end_lineno, col_offset, end_col_offset) or None + + Returns: + int: The line number (0 for synthetic frames) + """ + if location is None: + return 0 + return location[0] + class Collector(ABC): @abstractmethod def collect(self, stack_frames): diff --git a/Lib/profiling/sampling/constants.py b/Lib/profiling/sampling/constants.py index be2ae60a88f..b05f1703c85 100644 --- a/Lib/profiling/sampling/constants.py +++ b/Lib/profiling/sampling/constants.py @@ -14,6 +14,10 @@ SORT_MODE_CUMUL_PCT = 4 SORT_MODE_NSAMPLES_CUMUL = 5 +# Default location for synthetic frames (native, GC) that have no source location +# Format: (lineno, end_lineno, col_offset, end_col_offset) +DEFAULT_LOCATION = (0, 0, -1, -1) + # Thread status flags try: from _remote_debugging import ( diff --git a/Lib/profiling/sampling/opcode_utils.py b/Lib/profiling/sampling/opcode_utils.py new file mode 100644 index 00000000000..3d6a263f0b1 --- /dev/null +++ b/Lib/profiling/sampling/opcode_utils.py @@ -0,0 +1,92 @@ +"""Opcode utilities for bytecode-level profiler visualization. + +This module provides utilities to get opcode names and detect specialization +status using the opcode module's metadata. Used by heatmap and flamegraph +collectors to display which bytecode instructions are executing at each +source line, including Python's adaptive specialization optimizations. +""" + +import opcode + +# Build opcode name mapping: opcode number -> opcode name +# This includes both standard opcodes and specialized variants (Python 3.11+) +_OPCODE_NAMES = dict(enumerate(opcode.opname)) +if hasattr(opcode, '_specialized_opmap'): + for name, op in opcode._specialized_opmap.items(): + _OPCODE_NAMES[op] = name + +# Build deopt mapping: specialized opcode number -> base opcode number +# Python 3.11+ uses adaptive specialization where generic opcodes like +# LOAD_ATTR can be replaced at runtime with specialized variants like +# LOAD_ATTR_INSTANCE_VALUE. This mapping lets us show both forms. +_DEOPT_MAP = {} +if hasattr(opcode, '_specializations') and hasattr(opcode, '_specialized_opmap'): + for base_name, variant_names in opcode._specializations.items(): + base_opcode = opcode.opmap.get(base_name) + if base_opcode is not None: + for variant_name in variant_names: + variant_opcode = opcode._specialized_opmap.get(variant_name) + if variant_opcode is not None: + _DEOPT_MAP[variant_opcode] = base_opcode + + +def get_opcode_info(opcode_num): + """Get opcode name and specialization info from an opcode number. + + Args: + opcode_num: The opcode number (0-255 or higher for specialized) + + Returns: + A dict with keys: + - 'opname': The opcode name (e.g., 'LOAD_ATTR_INSTANCE_VALUE') + - 'base_opname': The base opcode name (e.g., 'LOAD_ATTR') + - 'is_specialized': True if this is a specialized instruction + """ + opname = _OPCODE_NAMES.get(opcode_num) + if opname is None: + return { + 'opname': f'<{opcode_num}>', + 'base_opname': f'<{opcode_num}>', + 'is_specialized': False, + } + + base_opcode = _DEOPT_MAP.get(opcode_num) + if base_opcode is not None: + base_opname = _OPCODE_NAMES.get(base_opcode, f'<{base_opcode}>') + return { + 'opname': opname, + 'base_opname': base_opname, + 'is_specialized': True, + } + + return { + 'opname': opname, + 'base_opname': opname, + 'is_specialized': False, + } + + +def format_opcode(opcode_num): + """Format an opcode for display, showing base opcode for specialized ones. + + Args: + opcode_num: The opcode number (0-255 or higher for specialized) + + Returns: + A formatted string like 'LOAD_ATTR' or 'LOAD_ATTR_INSTANCE_VALUE (LOAD_ATTR)' + """ + info = get_opcode_info(opcode_num) + if info['is_specialized']: + return f"{info['opname']} ({info['base_opname']})" + return info['opname'] + + +def get_opcode_mapping(): + """Get opcode name and deopt mappings for JavaScript consumption. + + Returns: + A dict with keys: + - 'names': Dict mapping opcode numbers to opcode names + - 'deopt': Dict mapping specialized opcode numbers to base opcode numbers + """ + return {"names": _OPCODE_NAMES, "deopt": _DEOPT_MAP} diff --git a/Lib/profiling/sampling/sample.py b/Lib/profiling/sampling/sample.py index 46fc1a05afa..685da1afd0b 100644 --- a/Lib/profiling/sampling/sample.py +++ b/Lib/profiling/sampling/sample.py @@ -27,7 +27,7 @@ class SampleProfiler: - def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL, native=False, gc=True, skip_non_matching_threads=True): + def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL, native=False, gc=True, opcodes=False, skip_non_matching_threads=True): self.pid = pid self.sample_interval_usec = sample_interval_usec self.all_threads = all_threads @@ -35,13 +35,13 @@ def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MOD if _FREE_THREADED_BUILD: self.unwinder = _remote_debugging.RemoteUnwinder( self.pid, all_threads=self.all_threads, mode=mode, native=native, gc=gc, - skip_non_matching_threads=skip_non_matching_threads + opcodes=opcodes, skip_non_matching_threads=skip_non_matching_threads ) else: only_active_threads = bool(self.all_threads) self.unwinder = _remote_debugging.RemoteUnwinder( self.pid, only_active_thread=only_active_threads, mode=mode, native=native, gc=gc, - skip_non_matching_threads=skip_non_matching_threads + opcodes=opcodes, skip_non_matching_threads=skip_non_matching_threads ) # Track sample intervals and total sample count self.sample_intervals = deque(maxlen=100) @@ -193,6 +193,7 @@ def sample( mode=PROFILING_MODE_WALL, native=False, gc=True, + opcodes=False, ): """Sample a process using the provided collector. @@ -206,6 +207,7 @@ def sample( GIL (only when holding GIL), ALL (includes GIL and CPU status) native: Whether to include native frames gc: Whether to include GC frames + opcodes: Whether to include opcode information Returns: The collector with collected samples @@ -228,6 +230,7 @@ def sample( mode=mode, native=native, gc=gc, + opcodes=opcodes, skip_non_matching_threads=skip_non_matching_threads ) profiler.realtime_stats = realtime_stats @@ -248,6 +251,7 @@ def sample_live( mode=PROFILING_MODE_WALL, native=False, gc=True, + opcodes=False, ): """Sample a process in live/interactive mode with curses TUI. @@ -261,6 +265,7 @@ def sample_live( GIL (only when holding GIL), ALL (includes GIL and CPU status) native: Whether to include native frames gc: Whether to include GC frames + opcodes: Whether to include opcode information Returns: The collector with collected samples @@ -283,6 +288,7 @@ def sample_live( mode=mode, native=native, gc=gc, + opcodes=opcodes, skip_non_matching_threads=skip_non_matching_threads ) profiler.realtime_stats = realtime_stats