mirror of
https://github.com/python/cpython.git
synced 2025-12-08 06:10:17 +00:00
Add opcode utilities and --opcodes CLI flag
New opcode_utils.py maps opcode numbers to names and detects specialized variants using opcode module metadata. Adds normalize_location() and extract_lineno() helpers to collector base for uniform location handling. CLI gains --opcodes flag, validated against compatible formats (gecko, flamegraph, heatmap, live).
This commit is contained in:
parent
dd27e5e679
commit
70f2ae025f
5 changed files with 161 additions and 7 deletions
|
|
@ -195,6 +195,12 @@ def _add_sampling_options(parser):
|
||||||
dest="gc",
|
dest="gc",
|
||||||
help='Don\'t include artificial "<GC>" frames to denote active garbage collection',
|
help='Don\'t include artificial "<GC>" frames to denote active garbage collection',
|
||||||
)
|
)
|
||||||
|
sampling_group.add_argument(
|
||||||
|
"--opcodes",
|
||||||
|
action="store_true",
|
||||||
|
help="Gather bytecode opcode information for instruction-level profiling "
|
||||||
|
"(shows which bytecode instructions are executing, including specializations).",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _add_mode_options(parser):
|
def _add_mode_options(parser):
|
||||||
|
|
@ -304,13 +310,15 @@ def _sort_to_mode(sort_choice):
|
||||||
return sort_map.get(sort_choice, SORT_MODE_NSAMPLES)
|
return sort_map.get(sort_choice, SORT_MODE_NSAMPLES)
|
||||||
|
|
||||||
|
|
||||||
def _create_collector(format_type, interval, skip_idle):
|
def _create_collector(format_type, interval, skip_idle, opcodes=False):
|
||||||
"""Create the appropriate collector based on format type.
|
"""Create the appropriate collector based on format type.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko')
|
format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko', 'heatmap')
|
||||||
interval: Sampling interval in microseconds
|
interval: Sampling interval in microseconds
|
||||||
skip_idle: Whether to skip idle samples
|
skip_idle: Whether to skip idle samples
|
||||||
|
opcodes: Whether to collect opcode information (only used by gecko format
|
||||||
|
for creating interval markers in Firefox Profiler)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A collector instance of the appropriate type
|
A collector instance of the appropriate type
|
||||||
|
|
@ -320,8 +328,10 @@ def _create_collector(format_type, interval, skip_idle):
|
||||||
raise ValueError(f"Unknown format: {format_type}")
|
raise ValueError(f"Unknown format: {format_type}")
|
||||||
|
|
||||||
# Gecko format never skips idle (it needs both GIL and CPU data)
|
# Gecko format never skips idle (it needs both GIL and CPU data)
|
||||||
|
# and is the only format that uses opcodes for interval markers
|
||||||
if format_type == "gecko":
|
if format_type == "gecko":
|
||||||
skip_idle = False
|
skip_idle = False
|
||||||
|
return collector_class(interval, skip_idle=skip_idle, opcodes=opcodes)
|
||||||
|
|
||||||
return collector_class(interval, skip_idle=skip_idle)
|
return collector_class(interval, skip_idle=skip_idle)
|
||||||
|
|
||||||
|
|
@ -413,6 +423,13 @@ def _validate_args(args, parser):
|
||||||
"Gecko format automatically includes both GIL-holding and CPU status analysis."
|
"Gecko format automatically includes both GIL-holding and CPU status analysis."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Validate --opcodes is only used with compatible formats
|
||||||
|
opcodes_compatible_formats = ("live", "gecko", "flamegraph", "heatmap")
|
||||||
|
if args.opcodes and args.format not in opcodes_compatible_formats:
|
||||||
|
parser.error(
|
||||||
|
f"--opcodes is only compatible with {', '.join('--' + f for f in opcodes_compatible_formats)}."
|
||||||
|
)
|
||||||
|
|
||||||
# Validate pstats-specific options are only used with pstats format
|
# Validate pstats-specific options are only used with pstats format
|
||||||
if args.format != "pstats":
|
if args.format != "pstats":
|
||||||
issues = []
|
issues = []
|
||||||
|
|
@ -560,7 +577,7 @@ def _handle_attach(args):
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create the appropriate collector
|
# Create the appropriate collector
|
||||||
collector = _create_collector(args.format, args.interval, skip_idle)
|
collector = _create_collector(args.format, args.interval, skip_idle, args.opcodes)
|
||||||
|
|
||||||
# Sample the process
|
# Sample the process
|
||||||
collector = sample(
|
collector = sample(
|
||||||
|
|
@ -572,6 +589,7 @@ def _handle_attach(args):
|
||||||
mode=mode,
|
mode=mode,
|
||||||
native=args.native,
|
native=args.native,
|
||||||
gc=args.gc,
|
gc=args.gc,
|
||||||
|
opcodes=args.opcodes,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Handle output
|
# Handle output
|
||||||
|
|
@ -607,7 +625,7 @@ def _handle_run(args):
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create the appropriate collector
|
# Create the appropriate collector
|
||||||
collector = _create_collector(args.format, args.interval, skip_idle)
|
collector = _create_collector(args.format, args.interval, skip_idle, args.opcodes)
|
||||||
|
|
||||||
# Profile the subprocess
|
# Profile the subprocess
|
||||||
try:
|
try:
|
||||||
|
|
@ -620,6 +638,7 @@ def _handle_run(args):
|
||||||
mode=mode,
|
mode=mode,
|
||||||
native=args.native,
|
native=args.native,
|
||||||
gc=args.gc,
|
gc=args.gc,
|
||||||
|
opcodes=args.opcodes,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Handle output
|
# Handle output
|
||||||
|
|
@ -650,6 +669,7 @@ def _handle_live_attach(args, pid):
|
||||||
limit=20, # Default limit
|
limit=20, # Default limit
|
||||||
pid=pid,
|
pid=pid,
|
||||||
mode=mode,
|
mode=mode,
|
||||||
|
opcodes=args.opcodes,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Sample in live mode
|
# Sample in live mode
|
||||||
|
|
@ -662,6 +682,7 @@ def _handle_live_attach(args, pid):
|
||||||
mode=mode,
|
mode=mode,
|
||||||
native=args.native,
|
native=args.native,
|
||||||
gc=args.gc,
|
gc=args.gc,
|
||||||
|
opcodes=args.opcodes,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -689,6 +710,7 @@ def _handle_live_run(args):
|
||||||
limit=20, # Default limit
|
limit=20, # Default limit
|
||||||
pid=process.pid,
|
pid=process.pid,
|
||||||
mode=mode,
|
mode=mode,
|
||||||
|
opcodes=args.opcodes,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Profile the subprocess in live mode
|
# Profile the subprocess in live mode
|
||||||
|
|
@ -702,6 +724,7 @@ def _handle_live_run(args):
|
||||||
mode=mode,
|
mode=mode,
|
||||||
native=args.native,
|
native=args.native,
|
||||||
gc=args.gc,
|
gc=args.gc,
|
||||||
|
opcodes=args.opcodes,
|
||||||
)
|
)
|
||||||
finally:
|
finally:
|
||||||
# Clean up the subprocess
|
# Clean up the subprocess
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,40 @@
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from .constants import (
|
from .constants import (
|
||||||
|
DEFAULT_LOCATION,
|
||||||
THREAD_STATUS_HAS_GIL,
|
THREAD_STATUS_HAS_GIL,
|
||||||
THREAD_STATUS_ON_CPU,
|
THREAD_STATUS_ON_CPU,
|
||||||
THREAD_STATUS_UNKNOWN,
|
THREAD_STATUS_UNKNOWN,
|
||||||
THREAD_STATUS_GIL_REQUESTED,
|
THREAD_STATUS_GIL_REQUESTED,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_location(location):
|
||||||
|
"""Normalize location to a 4-tuple format.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
location: tuple (lineno, end_lineno, col_offset, end_col_offset) or None
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple: (lineno, end_lineno, col_offset, end_col_offset)
|
||||||
|
"""
|
||||||
|
if location is None:
|
||||||
|
return DEFAULT_LOCATION
|
||||||
|
return location
|
||||||
|
|
||||||
|
|
||||||
|
def extract_lineno(location):
|
||||||
|
"""Extract lineno from location.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
location: tuple (lineno, end_lineno, col_offset, end_col_offset) or None
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
int: The line number (0 for synthetic frames)
|
||||||
|
"""
|
||||||
|
if location is None:
|
||||||
|
return 0
|
||||||
|
return location[0]
|
||||||
|
|
||||||
class Collector(ABC):
|
class Collector(ABC):
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def collect(self, stack_frames):
|
def collect(self, stack_frames):
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,10 @@
|
||||||
SORT_MODE_CUMUL_PCT = 4
|
SORT_MODE_CUMUL_PCT = 4
|
||||||
SORT_MODE_NSAMPLES_CUMUL = 5
|
SORT_MODE_NSAMPLES_CUMUL = 5
|
||||||
|
|
||||||
|
# Default location for synthetic frames (native, GC) that have no source location
|
||||||
|
# Format: (lineno, end_lineno, col_offset, end_col_offset)
|
||||||
|
DEFAULT_LOCATION = (0, 0, -1, -1)
|
||||||
|
|
||||||
# Thread status flags
|
# Thread status flags
|
||||||
try:
|
try:
|
||||||
from _remote_debugging import (
|
from _remote_debugging import (
|
||||||
|
|
|
||||||
92
Lib/profiling/sampling/opcode_utils.py
Normal file
92
Lib/profiling/sampling/opcode_utils.py
Normal file
|
|
@ -0,0 +1,92 @@
|
||||||
|
"""Opcode utilities for bytecode-level profiler visualization.
|
||||||
|
|
||||||
|
This module provides utilities to get opcode names and detect specialization
|
||||||
|
status using the opcode module's metadata. Used by heatmap and flamegraph
|
||||||
|
collectors to display which bytecode instructions are executing at each
|
||||||
|
source line, including Python's adaptive specialization optimizations.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import opcode
|
||||||
|
|
||||||
|
# Build opcode name mapping: opcode number -> opcode name
|
||||||
|
# This includes both standard opcodes and specialized variants (Python 3.11+)
|
||||||
|
_OPCODE_NAMES = dict(enumerate(opcode.opname))
|
||||||
|
if hasattr(opcode, '_specialized_opmap'):
|
||||||
|
for name, op in opcode._specialized_opmap.items():
|
||||||
|
_OPCODE_NAMES[op] = name
|
||||||
|
|
||||||
|
# Build deopt mapping: specialized opcode number -> base opcode number
|
||||||
|
# Python 3.11+ uses adaptive specialization where generic opcodes like
|
||||||
|
# LOAD_ATTR can be replaced at runtime with specialized variants like
|
||||||
|
# LOAD_ATTR_INSTANCE_VALUE. This mapping lets us show both forms.
|
||||||
|
_DEOPT_MAP = {}
|
||||||
|
if hasattr(opcode, '_specializations') and hasattr(opcode, '_specialized_opmap'):
|
||||||
|
for base_name, variant_names in opcode._specializations.items():
|
||||||
|
base_opcode = opcode.opmap.get(base_name)
|
||||||
|
if base_opcode is not None:
|
||||||
|
for variant_name in variant_names:
|
||||||
|
variant_opcode = opcode._specialized_opmap.get(variant_name)
|
||||||
|
if variant_opcode is not None:
|
||||||
|
_DEOPT_MAP[variant_opcode] = base_opcode
|
||||||
|
|
||||||
|
|
||||||
|
def get_opcode_info(opcode_num):
|
||||||
|
"""Get opcode name and specialization info from an opcode number.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
opcode_num: The opcode number (0-255 or higher for specialized)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A dict with keys:
|
||||||
|
- 'opname': The opcode name (e.g., 'LOAD_ATTR_INSTANCE_VALUE')
|
||||||
|
- 'base_opname': The base opcode name (e.g., 'LOAD_ATTR')
|
||||||
|
- 'is_specialized': True if this is a specialized instruction
|
||||||
|
"""
|
||||||
|
opname = _OPCODE_NAMES.get(opcode_num)
|
||||||
|
if opname is None:
|
||||||
|
return {
|
||||||
|
'opname': f'<{opcode_num}>',
|
||||||
|
'base_opname': f'<{opcode_num}>',
|
||||||
|
'is_specialized': False,
|
||||||
|
}
|
||||||
|
|
||||||
|
base_opcode = _DEOPT_MAP.get(opcode_num)
|
||||||
|
if base_opcode is not None:
|
||||||
|
base_opname = _OPCODE_NAMES.get(base_opcode, f'<{base_opcode}>')
|
||||||
|
return {
|
||||||
|
'opname': opname,
|
||||||
|
'base_opname': base_opname,
|
||||||
|
'is_specialized': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
'opname': opname,
|
||||||
|
'base_opname': opname,
|
||||||
|
'is_specialized': False,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def format_opcode(opcode_num):
|
||||||
|
"""Format an opcode for display, showing base opcode for specialized ones.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
opcode_num: The opcode number (0-255 or higher for specialized)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A formatted string like 'LOAD_ATTR' or 'LOAD_ATTR_INSTANCE_VALUE (LOAD_ATTR)'
|
||||||
|
"""
|
||||||
|
info = get_opcode_info(opcode_num)
|
||||||
|
if info['is_specialized']:
|
||||||
|
return f"{info['opname']} ({info['base_opname']})"
|
||||||
|
return info['opname']
|
||||||
|
|
||||||
|
|
||||||
|
def get_opcode_mapping():
|
||||||
|
"""Get opcode name and deopt mappings for JavaScript consumption.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A dict with keys:
|
||||||
|
- 'names': Dict mapping opcode numbers to opcode names
|
||||||
|
- 'deopt': Dict mapping specialized opcode numbers to base opcode numbers
|
||||||
|
"""
|
||||||
|
return {"names": _OPCODE_NAMES, "deopt": _DEOPT_MAP}
|
||||||
|
|
@ -27,7 +27,7 @@
|
||||||
|
|
||||||
|
|
||||||
class SampleProfiler:
|
class SampleProfiler:
|
||||||
def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL, native=False, gc=True, skip_non_matching_threads=True):
|
def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL, native=False, gc=True, opcodes=False, skip_non_matching_threads=True):
|
||||||
self.pid = pid
|
self.pid = pid
|
||||||
self.sample_interval_usec = sample_interval_usec
|
self.sample_interval_usec = sample_interval_usec
|
||||||
self.all_threads = all_threads
|
self.all_threads = all_threads
|
||||||
|
|
@ -35,13 +35,13 @@ def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MOD
|
||||||
if _FREE_THREADED_BUILD:
|
if _FREE_THREADED_BUILD:
|
||||||
self.unwinder = _remote_debugging.RemoteUnwinder(
|
self.unwinder = _remote_debugging.RemoteUnwinder(
|
||||||
self.pid, all_threads=self.all_threads, mode=mode, native=native, gc=gc,
|
self.pid, all_threads=self.all_threads, mode=mode, native=native, gc=gc,
|
||||||
skip_non_matching_threads=skip_non_matching_threads
|
opcodes=opcodes, skip_non_matching_threads=skip_non_matching_threads
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
only_active_threads = bool(self.all_threads)
|
only_active_threads = bool(self.all_threads)
|
||||||
self.unwinder = _remote_debugging.RemoteUnwinder(
|
self.unwinder = _remote_debugging.RemoteUnwinder(
|
||||||
self.pid, only_active_thread=only_active_threads, mode=mode, native=native, gc=gc,
|
self.pid, only_active_thread=only_active_threads, mode=mode, native=native, gc=gc,
|
||||||
skip_non_matching_threads=skip_non_matching_threads
|
opcodes=opcodes, skip_non_matching_threads=skip_non_matching_threads
|
||||||
)
|
)
|
||||||
# Track sample intervals and total sample count
|
# Track sample intervals and total sample count
|
||||||
self.sample_intervals = deque(maxlen=100)
|
self.sample_intervals = deque(maxlen=100)
|
||||||
|
|
@ -193,6 +193,7 @@ def sample(
|
||||||
mode=PROFILING_MODE_WALL,
|
mode=PROFILING_MODE_WALL,
|
||||||
native=False,
|
native=False,
|
||||||
gc=True,
|
gc=True,
|
||||||
|
opcodes=False,
|
||||||
):
|
):
|
||||||
"""Sample a process using the provided collector.
|
"""Sample a process using the provided collector.
|
||||||
|
|
||||||
|
|
@ -206,6 +207,7 @@ def sample(
|
||||||
GIL (only when holding GIL), ALL (includes GIL and CPU status)
|
GIL (only when holding GIL), ALL (includes GIL and CPU status)
|
||||||
native: Whether to include native frames
|
native: Whether to include native frames
|
||||||
gc: Whether to include GC frames
|
gc: Whether to include GC frames
|
||||||
|
opcodes: Whether to include opcode information
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
The collector with collected samples
|
The collector with collected samples
|
||||||
|
|
@ -228,6 +230,7 @@ def sample(
|
||||||
mode=mode,
|
mode=mode,
|
||||||
native=native,
|
native=native,
|
||||||
gc=gc,
|
gc=gc,
|
||||||
|
opcodes=opcodes,
|
||||||
skip_non_matching_threads=skip_non_matching_threads
|
skip_non_matching_threads=skip_non_matching_threads
|
||||||
)
|
)
|
||||||
profiler.realtime_stats = realtime_stats
|
profiler.realtime_stats = realtime_stats
|
||||||
|
|
@ -248,6 +251,7 @@ def sample_live(
|
||||||
mode=PROFILING_MODE_WALL,
|
mode=PROFILING_MODE_WALL,
|
||||||
native=False,
|
native=False,
|
||||||
gc=True,
|
gc=True,
|
||||||
|
opcodes=False,
|
||||||
):
|
):
|
||||||
"""Sample a process in live/interactive mode with curses TUI.
|
"""Sample a process in live/interactive mode with curses TUI.
|
||||||
|
|
||||||
|
|
@ -261,6 +265,7 @@ def sample_live(
|
||||||
GIL (only when holding GIL), ALL (includes GIL and CPU status)
|
GIL (only when holding GIL), ALL (includes GIL and CPU status)
|
||||||
native: Whether to include native frames
|
native: Whether to include native frames
|
||||||
gc: Whether to include GC frames
|
gc: Whether to include GC frames
|
||||||
|
opcodes: Whether to include opcode information
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
The collector with collected samples
|
The collector with collected samples
|
||||||
|
|
@ -283,6 +288,7 @@ def sample_live(
|
||||||
mode=mode,
|
mode=mode,
|
||||||
native=native,
|
native=native,
|
||||||
gc=gc,
|
gc=gc,
|
||||||
|
opcodes=opcodes,
|
||||||
skip_non_matching_threads=skip_non_matching_threads
|
skip_non_matching_threads=skip_non_matching_threads
|
||||||
)
|
)
|
||||||
profiler.realtime_stats = realtime_stats
|
profiler.realtime_stats = realtime_stats
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue