mirror of
https://github.com/python/cpython.git
synced 2025-12-08 06:10:17 +00:00
705 lines
20 KiB
Python
705 lines
20 KiB
Python
"""Command-line interface for the sampling profiler."""
|
|
|
|
import argparse
|
|
import os
|
|
import socket
|
|
import subprocess
|
|
import sys
|
|
|
|
from .sample import sample, sample_live
|
|
from .pstats_collector import PstatsCollector
|
|
from .stack_collector import CollapsedStackCollector, FlamegraphCollector
|
|
from .gecko_collector import GeckoCollector
|
|
from .constants import (
|
|
PROFILING_MODE_ALL,
|
|
PROFILING_MODE_WALL,
|
|
PROFILING_MODE_CPU,
|
|
PROFILING_MODE_GIL,
|
|
SORT_MODE_NSAMPLES,
|
|
SORT_MODE_TOTTIME,
|
|
SORT_MODE_CUMTIME,
|
|
SORT_MODE_SAMPLE_PCT,
|
|
SORT_MODE_CUMUL_PCT,
|
|
SORT_MODE_NSAMPLES_CUMUL,
|
|
)
|
|
|
|
try:
|
|
from .live_collector import LiveStatsCollector
|
|
except ImportError:
|
|
LiveStatsCollector = None
|
|
|
|
|
|
class CustomFormatter(
|
|
argparse.ArgumentDefaultsHelpFormatter,
|
|
argparse.RawDescriptionHelpFormatter,
|
|
):
|
|
"""Custom formatter that combines default values display with raw description formatting."""
|
|
pass
|
|
|
|
|
|
_HELP_DESCRIPTION = """Sample a process's stack frames and generate profiling data.
|
|
|
|
Commands:
|
|
run Run and profile a script or module
|
|
attach Attach to and profile a running process
|
|
|
|
Examples:
|
|
# Run and profile a script
|
|
python -m profiling.sampling run script.py arg1 arg2
|
|
|
|
# Attach to a running process
|
|
python -m profiling.sampling attach 1234
|
|
|
|
# Live interactive mode for a script
|
|
python -m profiling.sampling run --live script.py
|
|
|
|
# Live interactive mode for a running process
|
|
python -m profiling.sampling attach --live 1234
|
|
|
|
Use 'python -m profiling.sampling <command> --help' for command-specific help."""
|
|
|
|
|
|
# Constants for socket synchronization
|
|
_SYNC_TIMEOUT = 5.0
|
|
_PROCESS_KILL_TIMEOUT = 2.0
|
|
_READY_MESSAGE = b"ready"
|
|
_RECV_BUFFER_SIZE = 1024
|
|
|
|
# Format configuration
|
|
FORMAT_EXTENSIONS = {
|
|
"pstats": "pstats",
|
|
"collapsed": "txt",
|
|
"flamegraph": "html",
|
|
"gecko": "json",
|
|
}
|
|
|
|
COLLECTOR_MAP = {
|
|
"pstats": PstatsCollector,
|
|
"collapsed": CollapsedStackCollector,
|
|
"flamegraph": FlamegraphCollector,
|
|
"gecko": GeckoCollector,
|
|
}
|
|
|
|
|
|
def _parse_mode(mode_string):
|
|
"""Convert mode string to mode constant."""
|
|
mode_map = {
|
|
"wall": PROFILING_MODE_WALL,
|
|
"cpu": PROFILING_MODE_CPU,
|
|
"gil": PROFILING_MODE_GIL,
|
|
}
|
|
return mode_map[mode_string]
|
|
|
|
|
|
def _run_with_sync(original_cmd, suppress_output=False):
|
|
"""Run a command with socket-based synchronization and return the process."""
|
|
# Create a TCP socket for synchronization with better socket options
|
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sync_sock:
|
|
# Set SO_REUSEADDR to avoid "Address already in use" errors
|
|
sync_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
|
sync_sock.bind(("127.0.0.1", 0)) # Let OS choose a free port
|
|
sync_port = sync_sock.getsockname()[1]
|
|
sync_sock.listen(1)
|
|
sync_sock.settimeout(_SYNC_TIMEOUT)
|
|
|
|
# Get current working directory to preserve it
|
|
cwd = os.getcwd()
|
|
|
|
# Build command using the sync coordinator
|
|
target_args = original_cmd[1:] # Remove python executable
|
|
cmd = (
|
|
sys.executable,
|
|
"-m",
|
|
"profiling.sampling._sync_coordinator",
|
|
str(sync_port),
|
|
cwd,
|
|
) + tuple(target_args)
|
|
|
|
# Start the process with coordinator
|
|
# Suppress stdout/stderr if requested (for live mode)
|
|
popen_kwargs = {}
|
|
if suppress_output:
|
|
popen_kwargs["stdin"] = subprocess.DEVNULL
|
|
popen_kwargs["stdout"] = subprocess.DEVNULL
|
|
popen_kwargs["stderr"] = subprocess.DEVNULL
|
|
|
|
process = subprocess.Popen(cmd, **popen_kwargs)
|
|
|
|
try:
|
|
# Wait for ready signal with timeout
|
|
with sync_sock.accept()[0] as conn:
|
|
ready_signal = conn.recv(_RECV_BUFFER_SIZE)
|
|
|
|
if ready_signal != _READY_MESSAGE:
|
|
raise RuntimeError(
|
|
f"Invalid ready signal received: {ready_signal!r}"
|
|
)
|
|
|
|
except socket.timeout:
|
|
# If we timeout, kill the process and raise an error
|
|
if process.poll() is None:
|
|
process.terminate()
|
|
try:
|
|
process.wait(timeout=_PROCESS_KILL_TIMEOUT)
|
|
except subprocess.TimeoutExpired:
|
|
process.kill()
|
|
process.wait()
|
|
raise RuntimeError(
|
|
"Process failed to signal readiness within timeout"
|
|
)
|
|
|
|
return process
|
|
|
|
|
|
def _add_sampling_options(parser):
|
|
"""Add sampling configuration options to a parser."""
|
|
sampling_group = parser.add_argument_group("Sampling configuration")
|
|
sampling_group.add_argument(
|
|
"-i",
|
|
"--interval",
|
|
type=int,
|
|
default=100,
|
|
metavar="MICROSECONDS",
|
|
help="sampling interval",
|
|
)
|
|
sampling_group.add_argument(
|
|
"-d",
|
|
"--duration",
|
|
type=int,
|
|
default=10,
|
|
metavar="SECONDS",
|
|
help="Sampling duration",
|
|
)
|
|
sampling_group.add_argument(
|
|
"-a",
|
|
"--all-threads",
|
|
action="store_true",
|
|
help="Sample all threads in the process instead of just the main thread",
|
|
)
|
|
sampling_group.add_argument(
|
|
"--realtime-stats",
|
|
action="store_true",
|
|
help="Print real-time sampling statistics (Hz, mean, min, max) during profiling",
|
|
)
|
|
sampling_group.add_argument(
|
|
"--native",
|
|
action="store_true",
|
|
help='Include artificial "<native>" frames to denote calls to non-Python code',
|
|
)
|
|
sampling_group.add_argument(
|
|
"--no-gc",
|
|
action="store_false",
|
|
dest="gc",
|
|
help='Don\'t include artificial "<GC>" frames to denote active garbage collection',
|
|
)
|
|
|
|
|
|
def _add_mode_options(parser):
|
|
"""Add mode options to a parser."""
|
|
mode_group = parser.add_argument_group("Mode options")
|
|
mode_group.add_argument(
|
|
"--mode",
|
|
choices=["wall", "cpu", "gil"],
|
|
default="wall",
|
|
help="Sampling mode: wall (all samples), cpu (only samples when thread is on CPU), "
|
|
"gil (only samples when thread holds the GIL)",
|
|
)
|
|
|
|
|
|
def _add_format_options(parser):
|
|
"""Add output format options to a parser."""
|
|
output_group = parser.add_argument_group("Output options")
|
|
format_group = output_group.add_mutually_exclusive_group()
|
|
format_group.add_argument(
|
|
"--pstats",
|
|
action="store_const",
|
|
const="pstats",
|
|
dest="format",
|
|
help="Generate pstats output (default)",
|
|
)
|
|
format_group.add_argument(
|
|
"--collapsed",
|
|
action="store_const",
|
|
const="collapsed",
|
|
dest="format",
|
|
help="Generate collapsed stack traces for flamegraphs",
|
|
)
|
|
format_group.add_argument(
|
|
"--flamegraph",
|
|
action="store_const",
|
|
const="flamegraph",
|
|
dest="format",
|
|
help="Generate interactive HTML flamegraph visualization",
|
|
)
|
|
format_group.add_argument(
|
|
"--gecko",
|
|
action="store_const",
|
|
const="gecko",
|
|
dest="format",
|
|
help="Generate Gecko format for Firefox Profiler",
|
|
)
|
|
parser.set_defaults(format="pstats")
|
|
|
|
output_group.add_argument(
|
|
"-o",
|
|
"--output",
|
|
dest="outfile",
|
|
help="Save output to a file (default: stdout for pstats, "
|
|
"auto-generated filename for other formats)",
|
|
)
|
|
|
|
|
|
def _add_pstats_options(parser):
|
|
"""Add pstats-specific display options to a parser."""
|
|
pstats_group = parser.add_argument_group("pstats format options")
|
|
pstats_group.add_argument(
|
|
"--sort",
|
|
choices=[
|
|
"nsamples",
|
|
"tottime",
|
|
"cumtime",
|
|
"sample-pct",
|
|
"cumul-pct",
|
|
"nsamples-cumul",
|
|
"name",
|
|
],
|
|
default=None,
|
|
help="Sort order for pstats output (default: nsamples)",
|
|
)
|
|
pstats_group.add_argument(
|
|
"-l",
|
|
"--limit",
|
|
type=int,
|
|
default=None,
|
|
help="Limit the number of rows in the output (default: 15)",
|
|
)
|
|
pstats_group.add_argument(
|
|
"--no-summary",
|
|
action="store_true",
|
|
help="Disable the summary section in the pstats output",
|
|
)
|
|
|
|
|
|
def _sort_to_mode(sort_choice):
|
|
"""Convert sort choice string to SORT_MODE constant."""
|
|
sort_map = {
|
|
"nsamples": SORT_MODE_NSAMPLES,
|
|
"tottime": SORT_MODE_TOTTIME,
|
|
"cumtime": SORT_MODE_CUMTIME,
|
|
"sample-pct": SORT_MODE_SAMPLE_PCT,
|
|
"cumul-pct": SORT_MODE_CUMUL_PCT,
|
|
"nsamples-cumul": SORT_MODE_NSAMPLES_CUMUL,
|
|
"name": -1,
|
|
}
|
|
return sort_map.get(sort_choice, SORT_MODE_NSAMPLES)
|
|
|
|
|
|
def _create_collector(format_type, interval, skip_idle):
|
|
"""Create the appropriate collector based on format type.
|
|
|
|
Args:
|
|
format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko')
|
|
interval: Sampling interval in microseconds
|
|
skip_idle: Whether to skip idle samples
|
|
|
|
Returns:
|
|
A collector instance of the appropriate type
|
|
"""
|
|
collector_class = COLLECTOR_MAP.get(format_type)
|
|
if collector_class is None:
|
|
raise ValueError(f"Unknown format: {format_type}")
|
|
|
|
# Gecko format never skips idle (it needs both GIL and CPU data)
|
|
if format_type == "gecko":
|
|
skip_idle = False
|
|
|
|
return collector_class(interval, skip_idle=skip_idle)
|
|
|
|
|
|
def _generate_output_filename(format_type, pid):
|
|
"""Generate output filename based on format and PID.
|
|
|
|
Args:
|
|
format_type: The output format
|
|
pid: Process ID
|
|
|
|
Returns:
|
|
Generated filename
|
|
"""
|
|
extension = FORMAT_EXTENSIONS.get(format_type, "txt")
|
|
return f"{format_type}.{pid}.{extension}"
|
|
|
|
|
|
def _handle_output(collector, args, pid, mode):
|
|
"""Handle output for the collector based on format and arguments.
|
|
|
|
Args:
|
|
collector: The collector instance with profiling data
|
|
args: Parsed command-line arguments
|
|
pid: Process ID (for generating filenames)
|
|
mode: Profiling mode used
|
|
"""
|
|
if args.format == "pstats":
|
|
if args.outfile:
|
|
collector.export(args.outfile)
|
|
else:
|
|
# Print to stdout with defaults applied
|
|
sort_choice = args.sort if args.sort is not None else "nsamples"
|
|
limit = args.limit if args.limit is not None else 15
|
|
sort_mode = _sort_to_mode(sort_choice)
|
|
collector.print_stats(
|
|
sort_mode, limit, not args.no_summary, mode
|
|
)
|
|
else:
|
|
# Export to file
|
|
filename = args.outfile or _generate_output_filename(args.format, pid)
|
|
collector.export(filename)
|
|
|
|
|
|
def _validate_args(args, parser):
|
|
"""Validate format-specific options and live mode requirements.
|
|
|
|
Args:
|
|
args: Parsed command-line arguments
|
|
parser: ArgumentParser instance for error reporting
|
|
"""
|
|
# Check if live mode is available
|
|
if hasattr(args, 'live') and args.live and LiveStatsCollector is None:
|
|
parser.error(
|
|
"Live mode requires the curses module, which is not available."
|
|
)
|
|
|
|
# Live mode is incompatible with format options
|
|
if hasattr(args, 'live') and args.live:
|
|
if args.format != "pstats":
|
|
format_flag = f"--{args.format}"
|
|
parser.error(
|
|
f"--live is incompatible with {format_flag}. Live mode uses a TUI interface."
|
|
)
|
|
|
|
# Live mode is also incompatible with pstats-specific options
|
|
issues = []
|
|
if args.sort is not None:
|
|
issues.append("--sort")
|
|
if args.limit is not None:
|
|
issues.append("--limit")
|
|
if args.no_summary:
|
|
issues.append("--no-summary")
|
|
|
|
if issues:
|
|
parser.error(
|
|
f"Options {', '.join(issues)} are incompatible with --live. "
|
|
"Live mode uses a TUI interface with its own controls."
|
|
)
|
|
return
|
|
|
|
# Validate gecko mode doesn't use non-wall mode
|
|
if args.format == "gecko" and args.mode != "wall":
|
|
parser.error(
|
|
"--mode option is incompatible with --gecko. "
|
|
"Gecko format automatically includes both GIL-holding and CPU status analysis."
|
|
)
|
|
|
|
# Validate pstats-specific options are only used with pstats format
|
|
if args.format != "pstats":
|
|
issues = []
|
|
if args.sort is not None:
|
|
issues.append("--sort")
|
|
if args.limit is not None:
|
|
issues.append("--limit")
|
|
if args.no_summary:
|
|
issues.append("--no-summary")
|
|
|
|
if issues:
|
|
format_flag = f"--{args.format}"
|
|
parser.error(
|
|
f"Options {', '.join(issues)} are only valid with --pstats, not {format_flag}"
|
|
)
|
|
|
|
|
|
def main():
|
|
"""Main entry point for the CLI."""
|
|
# Create the main parser
|
|
parser = argparse.ArgumentParser(
|
|
description=_HELP_DESCRIPTION,
|
|
formatter_class=CustomFormatter,
|
|
)
|
|
|
|
# Create subparsers for commands
|
|
subparsers = parser.add_subparsers(
|
|
dest="command", required=True, help="Command to run"
|
|
)
|
|
|
|
# === RUN COMMAND ===
|
|
run_parser = subparsers.add_parser(
|
|
"run",
|
|
help="Run and profile a script or module",
|
|
formatter_class=CustomFormatter,
|
|
description="""Run and profile a Python script or module
|
|
|
|
Examples:
|
|
# Run and profile a module
|
|
python -m profiling.sampling run -m mymodule arg1 arg2
|
|
|
|
# Generate flamegraph from a script
|
|
python -m profiling.sampling run --flamegraph -o output.html script.py
|
|
|
|
# Profile with custom interval and duration
|
|
python -m profiling.sampling run -i 50 -d 30 script.py
|
|
|
|
# Save collapsed stacks to file
|
|
python -m profiling.sampling run --collapsed -o stacks.txt script.py
|
|
|
|
# Live interactive mode for a script
|
|
python -m profiling.sampling run --live script.py""",
|
|
)
|
|
run_parser.add_argument(
|
|
"-m",
|
|
"--module",
|
|
action="store_true",
|
|
help="Run target as a module (like python -m)",
|
|
)
|
|
run_parser.add_argument(
|
|
"target",
|
|
help="Script file or module name to profile",
|
|
)
|
|
run_parser.add_argument(
|
|
"args",
|
|
nargs=argparse.REMAINDER,
|
|
help="Arguments to pass to the script or module",
|
|
)
|
|
run_parser.add_argument(
|
|
"--live",
|
|
action="store_true",
|
|
help="Interactive TUI profiler (top-like interface, press 'q' to quit, 's' to cycle sort)",
|
|
)
|
|
_add_sampling_options(run_parser)
|
|
_add_mode_options(run_parser)
|
|
_add_format_options(run_parser)
|
|
_add_pstats_options(run_parser)
|
|
|
|
# === ATTACH COMMAND ===
|
|
attach_parser = subparsers.add_parser(
|
|
"attach",
|
|
help="Attach to and profile a running process",
|
|
formatter_class=CustomFormatter,
|
|
description="""Attach to a running process and profile it
|
|
|
|
Examples:
|
|
# Profile all threads, sort by total time
|
|
python -m profiling.sampling attach -a --sort tottime 1234
|
|
|
|
# Live interactive mode for a running process
|
|
python -m profiling.sampling attach --live 1234""",
|
|
)
|
|
attach_parser.add_argument(
|
|
"pid",
|
|
type=int,
|
|
help="Process ID to attach to",
|
|
)
|
|
attach_parser.add_argument(
|
|
"--live",
|
|
action="store_true",
|
|
help="Interactive TUI profiler (top-like interface, press 'q' to quit, 's' to cycle sort)",
|
|
)
|
|
_add_sampling_options(attach_parser)
|
|
_add_mode_options(attach_parser)
|
|
_add_format_options(attach_parser)
|
|
_add_pstats_options(attach_parser)
|
|
|
|
# Parse arguments
|
|
args = parser.parse_args()
|
|
|
|
# Validate arguments
|
|
_validate_args(args, parser)
|
|
|
|
# Command dispatch table
|
|
command_handlers = {
|
|
"run": _handle_run,
|
|
"attach": _handle_attach,
|
|
}
|
|
|
|
# Execute the appropriate command
|
|
handler = command_handlers.get(args.command)
|
|
if handler:
|
|
handler(args)
|
|
else:
|
|
parser.error(f"Unknown command: {args.command}")
|
|
|
|
|
|
def _handle_attach(args):
|
|
"""Handle the 'attach' command."""
|
|
# Check if live mode is requested
|
|
if args.live:
|
|
_handle_live_attach(args, args.pid)
|
|
return
|
|
|
|
# Use PROFILING_MODE_ALL for gecko format
|
|
mode = (
|
|
PROFILING_MODE_ALL
|
|
if args.format == "gecko"
|
|
else _parse_mode(args.mode)
|
|
)
|
|
|
|
# Determine skip_idle based on mode
|
|
skip_idle = (
|
|
mode != PROFILING_MODE_WALL if mode != PROFILING_MODE_ALL else False
|
|
)
|
|
|
|
# Create the appropriate collector
|
|
collector = _create_collector(args.format, args.interval, skip_idle)
|
|
|
|
# Sample the process
|
|
collector = sample(
|
|
args.pid,
|
|
collector,
|
|
duration_sec=args.duration,
|
|
all_threads=args.all_threads,
|
|
realtime_stats=args.realtime_stats,
|
|
mode=mode,
|
|
native=args.native,
|
|
gc=args.gc,
|
|
)
|
|
|
|
# Handle output
|
|
_handle_output(collector, args, args.pid, mode)
|
|
|
|
|
|
def _handle_run(args):
|
|
"""Handle the 'run' command."""
|
|
# Check if live mode is requested
|
|
if args.live:
|
|
_handle_live_run(args)
|
|
return
|
|
|
|
# Build the command to run
|
|
if args.module:
|
|
cmd = (sys.executable, "-m", args.target, *args.args)
|
|
else:
|
|
cmd = (sys.executable, args.target, *args.args)
|
|
|
|
# Run with synchronization
|
|
process = _run_with_sync(cmd, suppress_output=False)
|
|
|
|
# Use PROFILING_MODE_ALL for gecko format
|
|
mode = (
|
|
PROFILING_MODE_ALL
|
|
if args.format == "gecko"
|
|
else _parse_mode(args.mode)
|
|
)
|
|
|
|
# Determine skip_idle based on mode
|
|
skip_idle = (
|
|
mode != PROFILING_MODE_WALL if mode != PROFILING_MODE_ALL else False
|
|
)
|
|
|
|
# Create the appropriate collector
|
|
collector = _create_collector(args.format, args.interval, skip_idle)
|
|
|
|
# Profile the subprocess
|
|
try:
|
|
collector = sample(
|
|
process.pid,
|
|
collector,
|
|
duration_sec=args.duration,
|
|
all_threads=args.all_threads,
|
|
realtime_stats=args.realtime_stats,
|
|
mode=mode,
|
|
native=args.native,
|
|
gc=args.gc,
|
|
)
|
|
|
|
# Handle output
|
|
_handle_output(collector, args, process.pid, mode)
|
|
finally:
|
|
# Clean up the subprocess
|
|
if process.poll() is None:
|
|
process.terminate()
|
|
try:
|
|
process.wait(timeout=_PROCESS_KILL_TIMEOUT)
|
|
except subprocess.TimeoutExpired:
|
|
process.kill()
|
|
process.wait()
|
|
|
|
|
|
def _handle_live_attach(args, pid):
|
|
"""Handle live mode for an existing process."""
|
|
mode = _parse_mode(args.mode)
|
|
|
|
# Determine skip_idle based on mode
|
|
skip_idle = mode != PROFILING_MODE_WALL
|
|
|
|
# Create live collector with default settings
|
|
collector = LiveStatsCollector(
|
|
args.interval,
|
|
skip_idle=skip_idle,
|
|
sort_by="tottime", # Default initial sort
|
|
limit=20, # Default limit
|
|
pid=pid,
|
|
mode=mode,
|
|
)
|
|
|
|
# Sample in live mode
|
|
sample_live(
|
|
pid,
|
|
collector,
|
|
duration_sec=args.duration,
|
|
all_threads=args.all_threads,
|
|
realtime_stats=args.realtime_stats,
|
|
mode=mode,
|
|
native=args.native,
|
|
gc=args.gc,
|
|
)
|
|
|
|
|
|
def _handle_live_run(args):
|
|
"""Handle live mode for running a script/module."""
|
|
# Build the command to run
|
|
if args.module:
|
|
cmd = (sys.executable, "-m", args.target, *args.args)
|
|
else:
|
|
cmd = (sys.executable, args.target, *args.args)
|
|
|
|
# Run with synchronization, suppressing output for live mode
|
|
process = _run_with_sync(cmd, suppress_output=True)
|
|
|
|
mode = _parse_mode(args.mode)
|
|
|
|
# Determine skip_idle based on mode
|
|
skip_idle = mode != PROFILING_MODE_WALL
|
|
|
|
# Create live collector with default settings
|
|
collector = LiveStatsCollector(
|
|
args.interval,
|
|
skip_idle=skip_idle,
|
|
sort_by="tottime", # Default initial sort
|
|
limit=20, # Default limit
|
|
pid=process.pid,
|
|
mode=mode,
|
|
)
|
|
|
|
# Profile the subprocess in live mode
|
|
try:
|
|
sample_live(
|
|
process.pid,
|
|
collector,
|
|
duration_sec=args.duration,
|
|
all_threads=args.all_threads,
|
|
realtime_stats=args.realtime_stats,
|
|
mode=mode,
|
|
native=args.native,
|
|
gc=args.gc,
|
|
)
|
|
finally:
|
|
# Clean up the subprocess
|
|
if process.poll() is None:
|
|
process.terminate()
|
|
try:
|
|
process.wait(timeout=_PROCESS_KILL_TIMEOUT)
|
|
except subprocess.TimeoutExpired:
|
|
process.kill()
|
|
process.wait()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|