"""Command-line interface for the sampling profiler.""" import argparse import os import socket import subprocess import sys from .sample import sample, sample_live from .pstats_collector import PstatsCollector from .stack_collector import CollapsedStackCollector, FlamegraphCollector from .gecko_collector import GeckoCollector from .constants import ( PROFILING_MODE_ALL, PROFILING_MODE_WALL, PROFILING_MODE_CPU, PROFILING_MODE_GIL, SORT_MODE_NSAMPLES, SORT_MODE_TOTTIME, SORT_MODE_CUMTIME, SORT_MODE_SAMPLE_PCT, SORT_MODE_CUMUL_PCT, SORT_MODE_NSAMPLES_CUMUL, ) try: from .live_collector import LiveStatsCollector except ImportError: LiveStatsCollector = None class CustomFormatter( argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptionHelpFormatter, ): """Custom formatter that combines default values display with raw description formatting.""" pass _HELP_DESCRIPTION = """Sample a process's stack frames and generate profiling data. Commands: run Run and profile a script or module attach Attach to and profile a running process Examples: # Run and profile a script python -m profiling.sampling run script.py arg1 arg2 # Attach to a running process python -m profiling.sampling attach 1234 # Live interactive mode for a script python -m profiling.sampling run --live script.py # Live interactive mode for a running process python -m profiling.sampling attach --live 1234 Use 'python -m profiling.sampling --help' for command-specific help.""" # Constants for socket synchronization _SYNC_TIMEOUT = 5.0 _PROCESS_KILL_TIMEOUT = 2.0 _READY_MESSAGE = b"ready" _RECV_BUFFER_SIZE = 1024 # Format configuration FORMAT_EXTENSIONS = { "pstats": "pstats", "collapsed": "txt", "flamegraph": "html", "gecko": "json", } COLLECTOR_MAP = { "pstats": PstatsCollector, "collapsed": CollapsedStackCollector, "flamegraph": FlamegraphCollector, "gecko": GeckoCollector, } def _parse_mode(mode_string): """Convert mode string to mode constant.""" mode_map = { "wall": PROFILING_MODE_WALL, "cpu": PROFILING_MODE_CPU, "gil": PROFILING_MODE_GIL, } return mode_map[mode_string] def _run_with_sync(original_cmd, suppress_output=False): """Run a command with socket-based synchronization and return the process.""" # Create a TCP socket for synchronization with better socket options with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sync_sock: # Set SO_REUSEADDR to avoid "Address already in use" errors sync_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) sync_sock.bind(("127.0.0.1", 0)) # Let OS choose a free port sync_port = sync_sock.getsockname()[1] sync_sock.listen(1) sync_sock.settimeout(_SYNC_TIMEOUT) # Get current working directory to preserve it cwd = os.getcwd() # Build command using the sync coordinator target_args = original_cmd[1:] # Remove python executable cmd = ( sys.executable, "-m", "profiling.sampling._sync_coordinator", str(sync_port), cwd, ) + tuple(target_args) # Start the process with coordinator # Suppress stdout/stderr if requested (for live mode) popen_kwargs = {} if suppress_output: popen_kwargs["stdin"] = subprocess.DEVNULL popen_kwargs["stdout"] = subprocess.DEVNULL popen_kwargs["stderr"] = subprocess.DEVNULL process = subprocess.Popen(cmd, **popen_kwargs) try: # Wait for ready signal with timeout with sync_sock.accept()[0] as conn: ready_signal = conn.recv(_RECV_BUFFER_SIZE) if ready_signal != _READY_MESSAGE: raise RuntimeError( f"Invalid ready signal received: {ready_signal!r}" ) except socket.timeout: # If we timeout, kill the process and raise an error if process.poll() is None: process.terminate() try: process.wait(timeout=_PROCESS_KILL_TIMEOUT) except subprocess.TimeoutExpired: process.kill() process.wait() raise RuntimeError( "Process failed to signal readiness within timeout" ) return process def _add_sampling_options(parser): """Add sampling configuration options to a parser.""" sampling_group = parser.add_argument_group("Sampling configuration") sampling_group.add_argument( "-i", "--interval", type=int, default=100, metavar="MICROSECONDS", help="sampling interval", ) sampling_group.add_argument( "-d", "--duration", type=int, default=10, metavar="SECONDS", help="Sampling duration", ) sampling_group.add_argument( "-a", "--all-threads", action="store_true", help="Sample all threads in the process instead of just the main thread", ) sampling_group.add_argument( "--realtime-stats", action="store_true", help="Print real-time sampling statistics (Hz, mean, min, max) during profiling", ) sampling_group.add_argument( "--native", action="store_true", help='Include artificial "" frames to denote calls to non-Python code', ) sampling_group.add_argument( "--no-gc", action="store_false", dest="gc", help='Don\'t include artificial "" frames to denote active garbage collection', ) def _add_mode_options(parser): """Add mode options to a parser.""" mode_group = parser.add_argument_group("Mode options") mode_group.add_argument( "--mode", choices=["wall", "cpu", "gil"], default="wall", help="Sampling mode: wall (all samples), cpu (only samples when thread is on CPU), " "gil (only samples when thread holds the GIL)", ) def _add_format_options(parser): """Add output format options to a parser.""" output_group = parser.add_argument_group("Output options") format_group = output_group.add_mutually_exclusive_group() format_group.add_argument( "--pstats", action="store_const", const="pstats", dest="format", help="Generate pstats output (default)", ) format_group.add_argument( "--collapsed", action="store_const", const="collapsed", dest="format", help="Generate collapsed stack traces for flamegraphs", ) format_group.add_argument( "--flamegraph", action="store_const", const="flamegraph", dest="format", help="Generate interactive HTML flamegraph visualization", ) format_group.add_argument( "--gecko", action="store_const", const="gecko", dest="format", help="Generate Gecko format for Firefox Profiler", ) parser.set_defaults(format="pstats") output_group.add_argument( "-o", "--output", dest="outfile", help="Save output to a file (default: stdout for pstats, " "auto-generated filename for other formats)", ) def _add_pstats_options(parser): """Add pstats-specific display options to a parser.""" pstats_group = parser.add_argument_group("pstats format options") pstats_group.add_argument( "--sort", choices=[ "nsamples", "tottime", "cumtime", "sample-pct", "cumul-pct", "nsamples-cumul", "name", ], default=None, help="Sort order for pstats output (default: nsamples)", ) pstats_group.add_argument( "-l", "--limit", type=int, default=None, help="Limit the number of rows in the output (default: 15)", ) pstats_group.add_argument( "--no-summary", action="store_true", help="Disable the summary section in the pstats output", ) def _sort_to_mode(sort_choice): """Convert sort choice string to SORT_MODE constant.""" sort_map = { "nsamples": SORT_MODE_NSAMPLES, "tottime": SORT_MODE_TOTTIME, "cumtime": SORT_MODE_CUMTIME, "sample-pct": SORT_MODE_SAMPLE_PCT, "cumul-pct": SORT_MODE_CUMUL_PCT, "nsamples-cumul": SORT_MODE_NSAMPLES_CUMUL, "name": -1, } return sort_map.get(sort_choice, SORT_MODE_NSAMPLES) def _create_collector(format_type, interval, skip_idle): """Create the appropriate collector based on format type. Args: format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko') interval: Sampling interval in microseconds skip_idle: Whether to skip idle samples Returns: A collector instance of the appropriate type """ collector_class = COLLECTOR_MAP.get(format_type) if collector_class is None: raise ValueError(f"Unknown format: {format_type}") # Gecko format never skips idle (it needs both GIL and CPU data) if format_type == "gecko": skip_idle = False return collector_class(interval, skip_idle=skip_idle) def _generate_output_filename(format_type, pid): """Generate output filename based on format and PID. Args: format_type: The output format pid: Process ID Returns: Generated filename """ extension = FORMAT_EXTENSIONS.get(format_type, "txt") return f"{format_type}.{pid}.{extension}" def _handle_output(collector, args, pid, mode): """Handle output for the collector based on format and arguments. Args: collector: The collector instance with profiling data args: Parsed command-line arguments pid: Process ID (for generating filenames) mode: Profiling mode used """ if args.format == "pstats": if args.outfile: collector.export(args.outfile) else: # Print to stdout with defaults applied sort_choice = args.sort if args.sort is not None else "nsamples" limit = args.limit if args.limit is not None else 15 sort_mode = _sort_to_mode(sort_choice) collector.print_stats( sort_mode, limit, not args.no_summary, mode ) else: # Export to file filename = args.outfile or _generate_output_filename(args.format, pid) collector.export(filename) def _validate_args(args, parser): """Validate format-specific options and live mode requirements. Args: args: Parsed command-line arguments parser: ArgumentParser instance for error reporting """ # Check if live mode is available if hasattr(args, 'live') and args.live and LiveStatsCollector is None: parser.error( "Live mode requires the curses module, which is not available." ) # Live mode is incompatible with format options if hasattr(args, 'live') and args.live: if args.format != "pstats": format_flag = f"--{args.format}" parser.error( f"--live is incompatible with {format_flag}. Live mode uses a TUI interface." ) # Live mode is also incompatible with pstats-specific options issues = [] if args.sort is not None: issues.append("--sort") if args.limit is not None: issues.append("--limit") if args.no_summary: issues.append("--no-summary") if issues: parser.error( f"Options {', '.join(issues)} are incompatible with --live. " "Live mode uses a TUI interface with its own controls." ) return # Validate gecko mode doesn't use non-wall mode if args.format == "gecko" and args.mode != "wall": parser.error( "--mode option is incompatible with --gecko. " "Gecko format automatically includes both GIL-holding and CPU status analysis." ) # Validate pstats-specific options are only used with pstats format if args.format != "pstats": issues = [] if args.sort is not None: issues.append("--sort") if args.limit is not None: issues.append("--limit") if args.no_summary: issues.append("--no-summary") if issues: format_flag = f"--{args.format}" parser.error( f"Options {', '.join(issues)} are only valid with --pstats, not {format_flag}" ) def main(): """Main entry point for the CLI.""" # Create the main parser parser = argparse.ArgumentParser( description=_HELP_DESCRIPTION, formatter_class=CustomFormatter, ) # Create subparsers for commands subparsers = parser.add_subparsers( dest="command", required=True, help="Command to run" ) # === RUN COMMAND === run_parser = subparsers.add_parser( "run", help="Run and profile a script or module", formatter_class=CustomFormatter, description="""Run and profile a Python script or module Examples: # Run and profile a module python -m profiling.sampling run -m mymodule arg1 arg2 # Generate flamegraph from a script python -m profiling.sampling run --flamegraph -o output.html script.py # Profile with custom interval and duration python -m profiling.sampling run -i 50 -d 30 script.py # Save collapsed stacks to file python -m profiling.sampling run --collapsed -o stacks.txt script.py # Live interactive mode for a script python -m profiling.sampling run --live script.py""", ) run_parser.add_argument( "-m", "--module", action="store_true", help="Run target as a module (like python -m)", ) run_parser.add_argument( "target", help="Script file or module name to profile", ) run_parser.add_argument( "args", nargs=argparse.REMAINDER, help="Arguments to pass to the script or module", ) run_parser.add_argument( "--live", action="store_true", help="Interactive TUI profiler (top-like interface, press 'q' to quit, 's' to cycle sort)", ) _add_sampling_options(run_parser) _add_mode_options(run_parser) _add_format_options(run_parser) _add_pstats_options(run_parser) # === ATTACH COMMAND === attach_parser = subparsers.add_parser( "attach", help="Attach to and profile a running process", formatter_class=CustomFormatter, description="""Attach to a running process and profile it Examples: # Profile all threads, sort by total time python -m profiling.sampling attach -a --sort tottime 1234 # Live interactive mode for a running process python -m profiling.sampling attach --live 1234""", ) attach_parser.add_argument( "pid", type=int, help="Process ID to attach to", ) attach_parser.add_argument( "--live", action="store_true", help="Interactive TUI profiler (top-like interface, press 'q' to quit, 's' to cycle sort)", ) _add_sampling_options(attach_parser) _add_mode_options(attach_parser) _add_format_options(attach_parser) _add_pstats_options(attach_parser) # Parse arguments args = parser.parse_args() # Validate arguments _validate_args(args, parser) # Command dispatch table command_handlers = { "run": _handle_run, "attach": _handle_attach, } # Execute the appropriate command handler = command_handlers.get(args.command) if handler: handler(args) else: parser.error(f"Unknown command: {args.command}") def _handle_attach(args): """Handle the 'attach' command.""" # Check if live mode is requested if args.live: _handle_live_attach(args, args.pid) return # Use PROFILING_MODE_ALL for gecko format mode = ( PROFILING_MODE_ALL if args.format == "gecko" else _parse_mode(args.mode) ) # Determine skip_idle based on mode skip_idle = ( mode != PROFILING_MODE_WALL if mode != PROFILING_MODE_ALL else False ) # Create the appropriate collector collector = _create_collector(args.format, args.interval, skip_idle) # Sample the process collector = sample( args.pid, collector, duration_sec=args.duration, all_threads=args.all_threads, realtime_stats=args.realtime_stats, mode=mode, native=args.native, gc=args.gc, ) # Handle output _handle_output(collector, args, args.pid, mode) def _handle_run(args): """Handle the 'run' command.""" # Check if live mode is requested if args.live: _handle_live_run(args) return # Build the command to run if args.module: cmd = (sys.executable, "-m", args.target, *args.args) else: cmd = (sys.executable, args.target, *args.args) # Run with synchronization process = _run_with_sync(cmd, suppress_output=False) # Use PROFILING_MODE_ALL for gecko format mode = ( PROFILING_MODE_ALL if args.format == "gecko" else _parse_mode(args.mode) ) # Determine skip_idle based on mode skip_idle = ( mode != PROFILING_MODE_WALL if mode != PROFILING_MODE_ALL else False ) # Create the appropriate collector collector = _create_collector(args.format, args.interval, skip_idle) # Profile the subprocess try: collector = sample( process.pid, collector, duration_sec=args.duration, all_threads=args.all_threads, realtime_stats=args.realtime_stats, mode=mode, native=args.native, gc=args.gc, ) # Handle output _handle_output(collector, args, process.pid, mode) finally: # Clean up the subprocess if process.poll() is None: process.terminate() try: process.wait(timeout=_PROCESS_KILL_TIMEOUT) except subprocess.TimeoutExpired: process.kill() process.wait() def _handle_live_attach(args, pid): """Handle live mode for an existing process.""" mode = _parse_mode(args.mode) # Determine skip_idle based on mode skip_idle = mode != PROFILING_MODE_WALL # Create live collector with default settings collector = LiveStatsCollector( args.interval, skip_idle=skip_idle, sort_by="tottime", # Default initial sort limit=20, # Default limit pid=pid, mode=mode, ) # Sample in live mode sample_live( pid, collector, duration_sec=args.duration, all_threads=args.all_threads, realtime_stats=args.realtime_stats, mode=mode, native=args.native, gc=args.gc, ) def _handle_live_run(args): """Handle live mode for running a script/module.""" # Build the command to run if args.module: cmd = (sys.executable, "-m", args.target, *args.args) else: cmd = (sys.executable, args.target, *args.args) # Run with synchronization, suppressing output for live mode process = _run_with_sync(cmd, suppress_output=True) mode = _parse_mode(args.mode) # Determine skip_idle based on mode skip_idle = mode != PROFILING_MODE_WALL # Create live collector with default settings collector = LiveStatsCollector( args.interval, skip_idle=skip_idle, sort_by="tottime", # Default initial sort limit=20, # Default limit pid=process.pid, mode=mode, ) # Profile the subprocess in live mode try: sample_live( process.pid, collector, duration_sec=args.duration, all_threads=args.all_threads, realtime_stats=args.realtime_stats, mode=mode, native=args.native, gc=args.gc, ) finally: # Clean up the subprocess if process.poll() is None: process.terminate() try: process.wait(timeout=_PROCESS_KILL_TIMEOUT) except subprocess.TimeoutExpired: process.kill() process.wait() if __name__ == "__main__": main()