cpython/Lib/profiling/sampling/cli.py

705 lines
20 KiB
Python

"""Command-line interface for the sampling profiler."""
import argparse
import os
import socket
import subprocess
import sys
from .sample import sample, sample_live
from .pstats_collector import PstatsCollector
from .stack_collector import CollapsedStackCollector, FlamegraphCollector
from .gecko_collector import GeckoCollector
from .constants import (
PROFILING_MODE_ALL,
PROFILING_MODE_WALL,
PROFILING_MODE_CPU,
PROFILING_MODE_GIL,
SORT_MODE_NSAMPLES,
SORT_MODE_TOTTIME,
SORT_MODE_CUMTIME,
SORT_MODE_SAMPLE_PCT,
SORT_MODE_CUMUL_PCT,
SORT_MODE_NSAMPLES_CUMUL,
)
try:
from .live_collector import LiveStatsCollector
except ImportError:
LiveStatsCollector = None
class CustomFormatter(
argparse.ArgumentDefaultsHelpFormatter,
argparse.RawDescriptionHelpFormatter,
):
"""Custom formatter that combines default values display with raw description formatting."""
pass
_HELP_DESCRIPTION = """Sample a process's stack frames and generate profiling data.
Commands:
run Run and profile a script or module
attach Attach to and profile a running process
Examples:
# Run and profile a script
python -m profiling.sampling run script.py arg1 arg2
# Attach to a running process
python -m profiling.sampling attach 1234
# Live interactive mode for a script
python -m profiling.sampling run --live script.py
# Live interactive mode for a running process
python -m profiling.sampling attach --live 1234
Use 'python -m profiling.sampling <command> --help' for command-specific help."""
# Constants for socket synchronization
_SYNC_TIMEOUT = 5.0
_PROCESS_KILL_TIMEOUT = 2.0
_READY_MESSAGE = b"ready"
_RECV_BUFFER_SIZE = 1024
# Format configuration
FORMAT_EXTENSIONS = {
"pstats": "pstats",
"collapsed": "txt",
"flamegraph": "html",
"gecko": "json",
}
COLLECTOR_MAP = {
"pstats": PstatsCollector,
"collapsed": CollapsedStackCollector,
"flamegraph": FlamegraphCollector,
"gecko": GeckoCollector,
}
def _parse_mode(mode_string):
"""Convert mode string to mode constant."""
mode_map = {
"wall": PROFILING_MODE_WALL,
"cpu": PROFILING_MODE_CPU,
"gil": PROFILING_MODE_GIL,
}
return mode_map[mode_string]
def _run_with_sync(original_cmd, suppress_output=False):
"""Run a command with socket-based synchronization and return the process."""
# Create a TCP socket for synchronization with better socket options
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sync_sock:
# Set SO_REUSEADDR to avoid "Address already in use" errors
sync_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sync_sock.bind(("127.0.0.1", 0)) # Let OS choose a free port
sync_port = sync_sock.getsockname()[1]
sync_sock.listen(1)
sync_sock.settimeout(_SYNC_TIMEOUT)
# Get current working directory to preserve it
cwd = os.getcwd()
# Build command using the sync coordinator
target_args = original_cmd[1:] # Remove python executable
cmd = (
sys.executable,
"-m",
"profiling.sampling._sync_coordinator",
str(sync_port),
cwd,
) + tuple(target_args)
# Start the process with coordinator
# Suppress stdout/stderr if requested (for live mode)
popen_kwargs = {}
if suppress_output:
popen_kwargs["stdin"] = subprocess.DEVNULL
popen_kwargs["stdout"] = subprocess.DEVNULL
popen_kwargs["stderr"] = subprocess.DEVNULL
process = subprocess.Popen(cmd, **popen_kwargs)
try:
# Wait for ready signal with timeout
with sync_sock.accept()[0] as conn:
ready_signal = conn.recv(_RECV_BUFFER_SIZE)
if ready_signal != _READY_MESSAGE:
raise RuntimeError(
f"Invalid ready signal received: {ready_signal!r}"
)
except socket.timeout:
# If we timeout, kill the process and raise an error
if process.poll() is None:
process.terminate()
try:
process.wait(timeout=_PROCESS_KILL_TIMEOUT)
except subprocess.TimeoutExpired:
process.kill()
process.wait()
raise RuntimeError(
"Process failed to signal readiness within timeout"
)
return process
def _add_sampling_options(parser):
"""Add sampling configuration options to a parser."""
sampling_group = parser.add_argument_group("Sampling configuration")
sampling_group.add_argument(
"-i",
"--interval",
type=int,
default=100,
metavar="MICROSECONDS",
help="sampling interval",
)
sampling_group.add_argument(
"-d",
"--duration",
type=int,
default=10,
metavar="SECONDS",
help="Sampling duration",
)
sampling_group.add_argument(
"-a",
"--all-threads",
action="store_true",
help="Sample all threads in the process instead of just the main thread",
)
sampling_group.add_argument(
"--realtime-stats",
action="store_true",
help="Print real-time sampling statistics (Hz, mean, min, max) during profiling",
)
sampling_group.add_argument(
"--native",
action="store_true",
help='Include artificial "<native>" frames to denote calls to non-Python code',
)
sampling_group.add_argument(
"--no-gc",
action="store_false",
dest="gc",
help='Don\'t include artificial "<GC>" frames to denote active garbage collection',
)
def _add_mode_options(parser):
"""Add mode options to a parser."""
mode_group = parser.add_argument_group("Mode options")
mode_group.add_argument(
"--mode",
choices=["wall", "cpu", "gil"],
default="wall",
help="Sampling mode: wall (all samples), cpu (only samples when thread is on CPU), "
"gil (only samples when thread holds the GIL)",
)
def _add_format_options(parser):
"""Add output format options to a parser."""
output_group = parser.add_argument_group("Output options")
format_group = output_group.add_mutually_exclusive_group()
format_group.add_argument(
"--pstats",
action="store_const",
const="pstats",
dest="format",
help="Generate pstats output (default)",
)
format_group.add_argument(
"--collapsed",
action="store_const",
const="collapsed",
dest="format",
help="Generate collapsed stack traces for flamegraphs",
)
format_group.add_argument(
"--flamegraph",
action="store_const",
const="flamegraph",
dest="format",
help="Generate interactive HTML flamegraph visualization",
)
format_group.add_argument(
"--gecko",
action="store_const",
const="gecko",
dest="format",
help="Generate Gecko format for Firefox Profiler",
)
parser.set_defaults(format="pstats")
output_group.add_argument(
"-o",
"--output",
dest="outfile",
help="Save output to a file (default: stdout for pstats, "
"auto-generated filename for other formats)",
)
def _add_pstats_options(parser):
"""Add pstats-specific display options to a parser."""
pstats_group = parser.add_argument_group("pstats format options")
pstats_group.add_argument(
"--sort",
choices=[
"nsamples",
"tottime",
"cumtime",
"sample-pct",
"cumul-pct",
"nsamples-cumul",
"name",
],
default=None,
help="Sort order for pstats output (default: nsamples)",
)
pstats_group.add_argument(
"-l",
"--limit",
type=int,
default=None,
help="Limit the number of rows in the output (default: 15)",
)
pstats_group.add_argument(
"--no-summary",
action="store_true",
help="Disable the summary section in the pstats output",
)
def _sort_to_mode(sort_choice):
"""Convert sort choice string to SORT_MODE constant."""
sort_map = {
"nsamples": SORT_MODE_NSAMPLES,
"tottime": SORT_MODE_TOTTIME,
"cumtime": SORT_MODE_CUMTIME,
"sample-pct": SORT_MODE_SAMPLE_PCT,
"cumul-pct": SORT_MODE_CUMUL_PCT,
"nsamples-cumul": SORT_MODE_NSAMPLES_CUMUL,
"name": -1,
}
return sort_map.get(sort_choice, SORT_MODE_NSAMPLES)
def _create_collector(format_type, interval, skip_idle):
"""Create the appropriate collector based on format type.
Args:
format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko')
interval: Sampling interval in microseconds
skip_idle: Whether to skip idle samples
Returns:
A collector instance of the appropriate type
"""
collector_class = COLLECTOR_MAP.get(format_type)
if collector_class is None:
raise ValueError(f"Unknown format: {format_type}")
# Gecko format never skips idle (it needs both GIL and CPU data)
if format_type == "gecko":
skip_idle = False
return collector_class(interval, skip_idle=skip_idle)
def _generate_output_filename(format_type, pid):
"""Generate output filename based on format and PID.
Args:
format_type: The output format
pid: Process ID
Returns:
Generated filename
"""
extension = FORMAT_EXTENSIONS.get(format_type, "txt")
return f"{format_type}.{pid}.{extension}"
def _handle_output(collector, args, pid, mode):
"""Handle output for the collector based on format and arguments.
Args:
collector: The collector instance with profiling data
args: Parsed command-line arguments
pid: Process ID (for generating filenames)
mode: Profiling mode used
"""
if args.format == "pstats":
if args.outfile:
collector.export(args.outfile)
else:
# Print to stdout with defaults applied
sort_choice = args.sort if args.sort is not None else "nsamples"
limit = args.limit if args.limit is not None else 15
sort_mode = _sort_to_mode(sort_choice)
collector.print_stats(
sort_mode, limit, not args.no_summary, mode
)
else:
# Export to file
filename = args.outfile or _generate_output_filename(args.format, pid)
collector.export(filename)
def _validate_args(args, parser):
"""Validate format-specific options and live mode requirements.
Args:
args: Parsed command-line arguments
parser: ArgumentParser instance for error reporting
"""
# Check if live mode is available
if hasattr(args, 'live') and args.live and LiveStatsCollector is None:
parser.error(
"Live mode requires the curses module, which is not available."
)
# Live mode is incompatible with format options
if hasattr(args, 'live') and args.live:
if args.format != "pstats":
format_flag = f"--{args.format}"
parser.error(
f"--live is incompatible with {format_flag}. Live mode uses a TUI interface."
)
# Live mode is also incompatible with pstats-specific options
issues = []
if args.sort is not None:
issues.append("--sort")
if args.limit is not None:
issues.append("--limit")
if args.no_summary:
issues.append("--no-summary")
if issues:
parser.error(
f"Options {', '.join(issues)} are incompatible with --live. "
"Live mode uses a TUI interface with its own controls."
)
return
# Validate gecko mode doesn't use non-wall mode
if args.format == "gecko" and args.mode != "wall":
parser.error(
"--mode option is incompatible with --gecko. "
"Gecko format automatically includes both GIL-holding and CPU status analysis."
)
# Validate pstats-specific options are only used with pstats format
if args.format != "pstats":
issues = []
if args.sort is not None:
issues.append("--sort")
if args.limit is not None:
issues.append("--limit")
if args.no_summary:
issues.append("--no-summary")
if issues:
format_flag = f"--{args.format}"
parser.error(
f"Options {', '.join(issues)} are only valid with --pstats, not {format_flag}"
)
def main():
"""Main entry point for the CLI."""
# Create the main parser
parser = argparse.ArgumentParser(
description=_HELP_DESCRIPTION,
formatter_class=CustomFormatter,
)
# Create subparsers for commands
subparsers = parser.add_subparsers(
dest="command", required=True, help="Command to run"
)
# === RUN COMMAND ===
run_parser = subparsers.add_parser(
"run",
help="Run and profile a script or module",
formatter_class=CustomFormatter,
description="""Run and profile a Python script or module
Examples:
# Run and profile a module
python -m profiling.sampling run -m mymodule arg1 arg2
# Generate flamegraph from a script
python -m profiling.sampling run --flamegraph -o output.html script.py
# Profile with custom interval and duration
python -m profiling.sampling run -i 50 -d 30 script.py
# Save collapsed stacks to file
python -m profiling.sampling run --collapsed -o stacks.txt script.py
# Live interactive mode for a script
python -m profiling.sampling run --live script.py""",
)
run_parser.add_argument(
"-m",
"--module",
action="store_true",
help="Run target as a module (like python -m)",
)
run_parser.add_argument(
"target",
help="Script file or module name to profile",
)
run_parser.add_argument(
"args",
nargs=argparse.REMAINDER,
help="Arguments to pass to the script or module",
)
run_parser.add_argument(
"--live",
action="store_true",
help="Interactive TUI profiler (top-like interface, press 'q' to quit, 's' to cycle sort)",
)
_add_sampling_options(run_parser)
_add_mode_options(run_parser)
_add_format_options(run_parser)
_add_pstats_options(run_parser)
# === ATTACH COMMAND ===
attach_parser = subparsers.add_parser(
"attach",
help="Attach to and profile a running process",
formatter_class=CustomFormatter,
description="""Attach to a running process and profile it
Examples:
# Profile all threads, sort by total time
python -m profiling.sampling attach -a --sort tottime 1234
# Live interactive mode for a running process
python -m profiling.sampling attach --live 1234""",
)
attach_parser.add_argument(
"pid",
type=int,
help="Process ID to attach to",
)
attach_parser.add_argument(
"--live",
action="store_true",
help="Interactive TUI profiler (top-like interface, press 'q' to quit, 's' to cycle sort)",
)
_add_sampling_options(attach_parser)
_add_mode_options(attach_parser)
_add_format_options(attach_parser)
_add_pstats_options(attach_parser)
# Parse arguments
args = parser.parse_args()
# Validate arguments
_validate_args(args, parser)
# Command dispatch table
command_handlers = {
"run": _handle_run,
"attach": _handle_attach,
}
# Execute the appropriate command
handler = command_handlers.get(args.command)
if handler:
handler(args)
else:
parser.error(f"Unknown command: {args.command}")
def _handle_attach(args):
"""Handle the 'attach' command."""
# Check if live mode is requested
if args.live:
_handle_live_attach(args, args.pid)
return
# Use PROFILING_MODE_ALL for gecko format
mode = (
PROFILING_MODE_ALL
if args.format == "gecko"
else _parse_mode(args.mode)
)
# Determine skip_idle based on mode
skip_idle = (
mode != PROFILING_MODE_WALL if mode != PROFILING_MODE_ALL else False
)
# Create the appropriate collector
collector = _create_collector(args.format, args.interval, skip_idle)
# Sample the process
collector = sample(
args.pid,
collector,
duration_sec=args.duration,
all_threads=args.all_threads,
realtime_stats=args.realtime_stats,
mode=mode,
native=args.native,
gc=args.gc,
)
# Handle output
_handle_output(collector, args, args.pid, mode)
def _handle_run(args):
"""Handle the 'run' command."""
# Check if live mode is requested
if args.live:
_handle_live_run(args)
return
# Build the command to run
if args.module:
cmd = (sys.executable, "-m", args.target, *args.args)
else:
cmd = (sys.executable, args.target, *args.args)
# Run with synchronization
process = _run_with_sync(cmd, suppress_output=False)
# Use PROFILING_MODE_ALL for gecko format
mode = (
PROFILING_MODE_ALL
if args.format == "gecko"
else _parse_mode(args.mode)
)
# Determine skip_idle based on mode
skip_idle = (
mode != PROFILING_MODE_WALL if mode != PROFILING_MODE_ALL else False
)
# Create the appropriate collector
collector = _create_collector(args.format, args.interval, skip_idle)
# Profile the subprocess
try:
collector = sample(
process.pid,
collector,
duration_sec=args.duration,
all_threads=args.all_threads,
realtime_stats=args.realtime_stats,
mode=mode,
native=args.native,
gc=args.gc,
)
# Handle output
_handle_output(collector, args, process.pid, mode)
finally:
# Clean up the subprocess
if process.poll() is None:
process.terminate()
try:
process.wait(timeout=_PROCESS_KILL_TIMEOUT)
except subprocess.TimeoutExpired:
process.kill()
process.wait()
def _handle_live_attach(args, pid):
"""Handle live mode for an existing process."""
mode = _parse_mode(args.mode)
# Determine skip_idle based on mode
skip_idle = mode != PROFILING_MODE_WALL
# Create live collector with default settings
collector = LiveStatsCollector(
args.interval,
skip_idle=skip_idle,
sort_by="tottime", # Default initial sort
limit=20, # Default limit
pid=pid,
mode=mode,
)
# Sample in live mode
sample_live(
pid,
collector,
duration_sec=args.duration,
all_threads=args.all_threads,
realtime_stats=args.realtime_stats,
mode=mode,
native=args.native,
gc=args.gc,
)
def _handle_live_run(args):
"""Handle live mode for running a script/module."""
# Build the command to run
if args.module:
cmd = (sys.executable, "-m", args.target, *args.args)
else:
cmd = (sys.executable, args.target, *args.args)
# Run with synchronization, suppressing output for live mode
process = _run_with_sync(cmd, suppress_output=True)
mode = _parse_mode(args.mode)
# Determine skip_idle based on mode
skip_idle = mode != PROFILING_MODE_WALL
# Create live collector with default settings
collector = LiveStatsCollector(
args.interval,
skip_idle=skip_idle,
sort_by="tottime", # Default initial sort
limit=20, # Default limit
pid=process.pid,
mode=mode,
)
# Profile the subprocess in live mode
try:
sample_live(
process.pid,
collector,
duration_sec=args.duration,
all_threads=args.all_threads,
realtime_stats=args.realtime_stats,
mode=mode,
native=args.native,
gc=args.gc,
)
finally:
# Clean up the subprocess
if process.poll() is None:
process.terminate()
try:
process.wait(timeout=_PROCESS_KILL_TIMEOUT)
except subprocess.TimeoutExpired:
process.kill()
process.wait()
if __name__ == "__main__":
main()