gh-138122: Add exception profiling mode to the sampling profiler (#142561)

2025-12-31 12:33:28 +00:00 · 2025-12-11 20:46:34 +00:00 · 2025-12-11 20:46:34 +00:00 · 6a0135a392
commit 6a0135a392
parent e38967ed60
19 changed files with 802 additions and 17 deletions
--- a/Doc/library/profiling.sampling.rst
+++ b/Doc/library/profiling.sampling.rst
@ -470,9 +470,10 @@ which you can use to judge whether the data is sufficient for your analysis.
 Profiling modes
 ===============

-The sampling profiler supports three modes that control which samples are
+The sampling profiler supports four modes that control which samples are
 recorded. The mode determines what the profile measures: total elapsed time,
-CPU execution time, or time spent holding the global interpreter lock.
+CPU execution time, time spent holding the global interpreter lock, or
+exception handling.


 Wall-clock mode
@ -553,6 +554,67 @@ single-threaded programs to distinguish Python execution time from time spent
 in C extensions or I/O.


+Exception mode
+--------------
+
+Exception mode (``--mode=exception``) records samples only when a thread has
+an active exception::
+
+   python -m profiling.sampling run --mode=exception script.py
+
+Samples are recorded in two situations: when an exception is being propagated
+up the call stack (after ``raise`` but before being caught), or when code is
+executing inside an ``except`` block where exception information is still
+present in the thread state.
+
+The following example illustrates which code regions are captured:
+
+.. code-block:: python
+
+   def example():
+       try:
+           raise ValueError("error")    # Captured: exception being raised
+       except ValueError:
+           process_error()              # Captured: inside except block
+       finally:
+           cleanup()                    # NOT captured: exception already handled
+
+   def example_propagating():
+       try:
+           try:
+               raise ValueError("error")
+           finally:
+               cleanup()                # Captured: exception propagating through
+       except ValueError:
+           pass
+
+   def example_no_exception():
+       try:
+           do_work()
+       finally:
+           cleanup()                    # NOT captured: no exception involved
+
+Note that ``finally`` blocks are only captured when an exception is actively
+propagating through them. Once an ``except`` block finishes executing, Python
+clears the exception information before running any subsequent ``finally``
+block. Similarly, ``finally`` blocks that run during normal execution (when no
+exception was raised) are not captured because no exception state is present.
+
+This mode is useful for understanding where your program spends time handling
+errors. Exception handling can be a significant source of overhead in code
+that uses exceptions for flow control (such as ``StopIteration`` in iterators)
+or in applications that process many error conditions (such as network servers
+handling connection failures).
+
+Exception mode helps answer questions like "how much time is spent handling
+exceptions?" and "which exception handlers are the most expensive?" It can
+reveal hidden performance costs in code that catches and processes many
+exceptions, even when those exceptions are handled gracefully. For example,
+if a parsing library uses exceptions internally to signal format errors, this
+mode will capture time spent in those handlers even if the calling code never
+sees the exceptions.
+
+
 Output formats
 ==============

@ -1006,8 +1068,9 @@ Mode options

 .. option:: --mode <mode>

-   Sampling mode: ``wall`` (default), ``cpu``, or ``gil``.
-   The ``cpu`` and ``gil`` modes are incompatible with ``--async-aware``.
+   Sampling mode: ``wall`` (default), ``cpu``, ``gil``, or ``exception``.
+   The ``cpu``, ``gil``, and ``exception`` modes are incompatible with
+   ``--async-aware``.

 .. option:: --async-mode <mode>

--- a/Doc/whatsnew/3.15.rst
+++ b/Doc/whatsnew/3.15.rst
@ -146,6 +146,8 @@ Key features include:
    and blocking. Use this to identify CPU-bound bottlenecks and optimize computational work.
  * **GIL-holding time** (``--mode gil``): Measures time spent holding Python's Global Interpreter
    Lock. Use this to identify which threads dominate GIL usage in multi-threaded applications.
+  * **Exception handling time** (``--mode exception``): Captures samples only from threads with
+    an active exception. Use this to analyze exception handling overhead.

 * **Thread-aware profiling**: Option to profile all threads (``-a``) or just the main thread,
  essential for understanding multi-threaded application behavior.
@ -175,6 +177,10 @@ Key features include:
  (``--async-aware``). See which coroutines are consuming time, with options to show only
  running tasks or all tasks including those waiting.

+* **Opcode-level profiling**: Gather bytecode opcode information for instruction-level
+  profiling (``--opcodes``). Shows which bytecode instructions are executing, including
+  specializations from the adaptive interpreter.
+
 See :mod:`profiling.sampling` for the complete documentation, including all
 available output formats, profiling modes, and configuration options.

--- a/Include/internal/pycore_debug_offsets.h
+++ b/Include/internal/pycore_debug_offsets.h
@ -110,8 +110,15 @@ typedef struct _Py_DebugOffsets {
        uint64_t status;
        uint64_t holds_gil;
        uint64_t gil_requested;
+        uint64_t current_exception;
+        uint64_t exc_state;
    } thread_state;

+    // Exception stack item offset
+    struct {
+        uint64_t exc_value;
+    } err_stackitem;
+
    // InterpreterFrame offset;
    struct _interpreter_frame {
        uint64_t size;
@ -282,6 +289,11 @@ typedef struct _Py_DebugOffsets {
        .status = offsetof(PyThreadState, _status), \
        .holds_gil = offsetof(PyThreadState, holds_gil), \
        .gil_requested = offsetof(PyThreadState, gil_requested), \
+        .current_exception = offsetof(PyThreadState, current_exception), \
+        .exc_state = offsetof(PyThreadState, exc_state), \
+    }, \
+    .err_stackitem = { \
+        .exc_value = offsetof(_PyErr_StackItem, exc_value), \
    }, \
    .interpreter_frame = { \
        .size = sizeof(_PyInterpreterFrame), \
--- a/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js
+++ b/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js
@ -717,6 +717,10 @@ function populateThreadStats(data, selectedThreadId = null) {

  const gcPctElem = document.getElementById('gc-pct');
  if (gcPctElem) gcPctElem.textContent = `${(threadStats.gc_pct || 0).toFixed(1)}%`;
+
+  // Exception stats
+  const excPctElem = document.getElementById('exc-pct');
+  if (excPctElem) excPctElem.textContent = `${(threadStats.has_exception_pct || 0).toFixed(1)}%`;
 }

 // ============================================================================
--- a/Lib/profiling/sampling/_flamegraph_assets/flamegraph_template.html
+++ b/Lib/profiling/sampling/_flamegraph_assets/flamegraph_template.html
@ -161,6 +161,10 @@ <h3 class="section-title">Runtime Stats</h3>
                    <div class="stat-tile-value" id="gc-pct">--</div>
                    <div class="stat-tile-label">GC</div>
                  </div>
+                  <div class="stat-tile stat-tile--red" id="exc-stat">
+                    <div class="stat-tile-value" id="exc-pct">--</div>
+                    <div class="stat-tile-label">Exception</div>
+                  </div>
                </div>
              </div>
            </section>
--- a/Lib/profiling/sampling/cli.py
+++ b/Lib/profiling/sampling/cli.py
@ -16,6 +16,7 @@
    PROFILING_MODE_WALL,
    PROFILING_MODE_CPU,
    PROFILING_MODE_GIL,
+    PROFILING_MODE_EXCEPTION,
    SORT_MODE_NSAMPLES,
    SORT_MODE_TOTTIME,
    SORT_MODE_CUMTIME,
@ -90,6 +91,7 @@ def _parse_mode(mode_string):
        "wall": PROFILING_MODE_WALL,
        "cpu": PROFILING_MODE_CPU,
        "gil": PROFILING_MODE_GIL,
+        "exception": PROFILING_MODE_EXCEPTION,
    }
    return mode_map[mode_string]

@ -213,10 +215,12 @@ def _add_mode_options(parser):
    mode_group = parser.add_argument_group("Mode options")
    mode_group.add_argument(
        "--mode",
-        choices=["wall", "cpu", "gil"],
+        choices=["wall", "cpu", "gil", "exception"],
        default="wall",
        help="Sampling mode: wall (all samples), cpu (only samples when thread is on CPU), "
-        "gil (only samples when thread holds the GIL). Incompatible with --async-aware",
+        "gil (only samples when thread holds the GIL), "
+        "exception (only samples when thread has an active exception). "
+        "Incompatible with --async-aware",
    )
    mode_group.add_argument(
        "--async-mode",
--- a/Lib/profiling/sampling/collector.py
+++ b/Lib/profiling/sampling/collector.py
@ -5,6 +5,7 @@
    THREAD_STATUS_ON_CPU,
    THREAD_STATUS_GIL_REQUESTED,
    THREAD_STATUS_UNKNOWN,
+    THREAD_STATUS_HAS_EXCEPTION,
 )

 try:
@ -170,7 +171,7 @@ def _collect_thread_status_stats(self, stack_frames):

        Returns:
            tuple: (aggregate_status_counts, has_gc_frame, per_thread_stats)
-                - aggregate_status_counts: dict with has_gil, on_cpu, etc.
+                - aggregate_status_counts: dict with has_gil, on_cpu, has_exception, etc.
                - has_gc_frame: bool indicating if any thread has GC frames
                - per_thread_stats: dict mapping thread_id to per-thread counts
        """
@ -179,6 +180,7 @@ def _collect_thread_status_stats(self, stack_frames):
            "on_cpu": 0,
            "gil_requested": 0,
            "unknown": 0,
+            "has_exception": 0,
            "total": 0,
        }
        has_gc_frame = False
@ -200,6 +202,8 @@ def _collect_thread_status_stats(self, stack_frames):
                    status_counts["gil_requested"] += 1
                if status_flags & THREAD_STATUS_UNKNOWN:
                    status_counts["unknown"] += 1
+                if status_flags & THREAD_STATUS_HAS_EXCEPTION:
+                    status_counts["has_exception"] += 1

                # Track per-thread statistics
                thread_id = getattr(thread_info, "thread_id", None)
@ -210,6 +214,7 @@ def _collect_thread_status_stats(self, stack_frames):
                            "on_cpu": 0,
                            "gil_requested": 0,
                            "unknown": 0,
+                            "has_exception": 0,
                            "total": 0,
                            "gc_samples": 0,
                        }
@ -225,6 +230,8 @@ def _collect_thread_status_stats(self, stack_frames):
                        thread_stats["gil_requested"] += 1
                    if status_flags & THREAD_STATUS_UNKNOWN:
                        thread_stats["unknown"] += 1
+                    if status_flags & THREAD_STATUS_HAS_EXCEPTION:
+                        thread_stats["has_exception"] += 1

                    # Check for GC frames in this thread
                    frames = getattr(thread_info, "frame_info", None)
--- a/Lib/profiling/sampling/constants.py
+++ b/Lib/profiling/sampling/constants.py
@ -5,6 +5,7 @@
 PROFILING_MODE_CPU = 1
 PROFILING_MODE_GIL = 2
 PROFILING_MODE_ALL = 3  # Combines GIL + CPU checks
+PROFILING_MODE_EXCEPTION = 4  # Only samples when thread has an active exception

 # Sort mode constants
 SORT_MODE_NSAMPLES = 0
@ -25,6 +26,7 @@
        THREAD_STATUS_ON_CPU,
        THREAD_STATUS_UNKNOWN,
        THREAD_STATUS_GIL_REQUESTED,
+        THREAD_STATUS_HAS_EXCEPTION,
    )
 except ImportError:
    # Fallback for tests or when module is not available
@ -32,3 +34,4 @@
    THREAD_STATUS_ON_CPU = (1 << 1)
    THREAD_STATUS_UNKNOWN = (1 << 2)
    THREAD_STATUS_GIL_REQUESTED = (1 << 3)
+    THREAD_STATUS_HAS_EXCEPTION = (1 << 4)
--- a/Lib/profiling/sampling/gecko_collector.py
+++ b/Lib/profiling/sampling/gecko_collector.py
@ -9,13 +9,14 @@
 from .collector import Collector
 from .opcode_utils import get_opcode_info, format_opcode
 try:
-    from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED
+    from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED, THREAD_STATUS_HAS_EXCEPTION
 except ImportError:
    # Fallback if module not available (shouldn't happen in normal use)
    THREAD_STATUS_HAS_GIL = (1 << 0)
    THREAD_STATUS_ON_CPU = (1 << 1)
    THREAD_STATUS_UNKNOWN = (1 << 2)
    THREAD_STATUS_GIL_REQUESTED = (1 << 3)
+    THREAD_STATUS_HAS_EXCEPTION = (1 << 4)


 # Categories matching Firefox Profiler expectations
@ -28,6 +29,7 @@
    {"name": "CPU", "color": "purple", "subcategories": ["Other"]},
    {"name": "Code Type", "color": "red", "subcategories": ["Other"]},
    {"name": "Opcodes", "color": "magenta", "subcategories": ["Other"]},
+    {"name": "Exception", "color": "lightblue", "subcategories": ["Other"]},
 ]

 # Category indices
@ -39,6 +41,7 @@
 CATEGORY_CPU = 5
 CATEGORY_CODE_TYPE = 6
 CATEGORY_OPCODES = 7
+CATEGORY_EXCEPTION = 8

 # Subcategory indices
 DEFAULT_SUBCATEGORY = 0
@ -88,6 +91,8 @@ def __init__(self, sample_interval_usec, *, skip_idle=False, opcodes=False):
        self.python_code_start = {}       # Thread running Python code (has GIL)
        self.native_code_start = {}       # Thread running native code (on CPU without GIL)
        self.gil_wait_start = {}          # Thread waiting for GIL
+        self.exception_start = {}         # Thread has an exception set
+        self.no_exception_start = {}      # Thread has no exception set

        # GC event tracking: track GC start time per thread
        self.gc_start_per_thread = {}  # tid -> start_time
@ -204,6 +209,13 @@ def collect(self, stack_frames):
                    self._add_marker(tid, "Waiting for GIL", self.gil_wait_start.pop(tid),
                                   current_time, CATEGORY_GIL)

+                # Track exception state (Has Exception / No Exception)
+                has_exception = bool(status_flags & THREAD_STATUS_HAS_EXCEPTION)
+                self._track_state_transition(
+                    tid, has_exception, self.exception_start, self.no_exception_start,
+                    "Has Exception", "No Exception", CATEGORY_EXCEPTION, current_time
+                )
+
                # Track GC events by detecting <GC> frames in the stack trace
                # This leverages the improved GC frame tracking from commit 336366fd7ca
                # which precisely identifies the thread that initiated GC collection
@ -622,6 +634,8 @@ def _finalize_markers(self):
            (self.native_code_start, "Native Code", CATEGORY_CODE_TYPE),
            (self.gil_wait_start, "Waiting for GIL", CATEGORY_GIL),
            (self.gc_start_per_thread, "GC Collecting", CATEGORY_GC),
+            (self.exception_start, "Has Exception", CATEGORY_EXCEPTION),
+            (self.no_exception_start, "No Exception", CATEGORY_EXCEPTION),
        ]

        for state_dict, marker_name, category in marker_states:
--- a/Lib/profiling/sampling/live_collector/collector.py
+++ b/Lib/profiling/sampling/live_collector/collector.py
@ -17,6 +17,7 @@
    THREAD_STATUS_ON_CPU,
    THREAD_STATUS_UNKNOWN,
    THREAD_STATUS_GIL_REQUESTED,
+    THREAD_STATUS_HAS_EXCEPTION,
    PROFILING_MODE_CPU,
    PROFILING_MODE_GIL,
    PROFILING_MODE_WALL,
@ -61,6 +62,7 @@ class ThreadData:
    on_cpu: int = 0
    gil_requested: int = 0
    unknown: int = 0
+    has_exception: int = 0
    total: int = 0  # Total status samples for this thread

    # Sample counts
@ -82,6 +84,8 @@ def increment_status_flag(self, status_flags):
            self.gil_requested += 1
        if status_flags & THREAD_STATUS_UNKNOWN:
            self.unknown += 1
+        if status_flags & THREAD_STATUS_HAS_EXCEPTION:
+            self.has_exception += 1
        self.total += 1

    def as_status_dict(self):
@ -91,6 +95,7 @@ def as_status_dict(self):
            "on_cpu": self.on_cpu,
            "gil_requested": self.gil_requested,
            "unknown": self.unknown,
+            "has_exception": self.has_exception,
            "total": self.total,
        }

@ -160,6 +165,7 @@ def __init__(
            "on_cpu": 0,
            "gil_requested": 0,
            "unknown": 0,
+            "has_exception": 0,
            "total": 0,  # Total thread count across all samples
        }
        self.gc_frame_samples = 0  # Track samples with GC frames
@ -359,6 +365,7 @@ def collect(self, stack_frames):
                thread_data.on_cpu += stats.get("on_cpu", 0)
                thread_data.gil_requested += stats.get("gil_requested", 0)
                thread_data.unknown += stats.get("unknown", 0)
+                thread_data.has_exception += stats.get("has_exception", 0)
                thread_data.total += stats.get("total", 0)
                if stats.get("gc_samples", 0):
                    thread_data.gc_frame_samples += stats["gc_samples"]
@ -723,6 +730,7 @@ def reset_stats(self):
            "on_cpu": 0,
            "gil_requested": 0,
            "unknown": 0,
+            "has_exception": 0,
            "total": 0,
        }
        self.gc_frame_samples = 0
--- a/Lib/profiling/sampling/live_collector/widgets.py
+++ b/Lib/profiling/sampling/live_collector/widgets.py
@ -389,6 +389,7 @@ def draw_thread_status(self, line, width):
        pct_on_gil = (status_counts["has_gil"] / total_threads) * 100
        pct_off_gil = 100.0 - pct_on_gil
        pct_gil_requested = (status_counts["gil_requested"] / total_threads) * 100
+        pct_exception = (status_counts.get("has_exception", 0) / total_threads) * 100

        # Get GC percentage based on view mode
        if thread_data:
@ -427,6 +428,17 @@ def draw_thread_status(self, line, width):
                add_separator=True,
            )

+        # Show exception stats
+        if col < width - 15:
+            col = self._add_percentage_stat(
+                line,
+                col,
+                pct_exception,
+                "exc",
+                self.colors["red"],
+                add_separator=(col > 11),
+            )
+
        # Always show GC stats
        if col < width - 15:
            col = self._add_percentage_stat(
--- a/Lib/profiling/sampling/sample.py
+++ b/Lib/profiling/sampling/sample.py
@ -17,6 +17,7 @@
    PROFILING_MODE_CPU,
    PROFILING_MODE_GIL,
    PROFILING_MODE_ALL,
+    PROFILING_MODE_EXCEPTION,
 )
 try:
    from .live_collector import LiveStatsCollector
@ -300,7 +301,8 @@ def sample(
        all_threads: Whether to sample all threads
        realtime_stats: Whether to print real-time sampling statistics
        mode: Profiling mode - WALL (all samples), CPU (only when on CPU),
-              GIL (only when holding GIL), ALL (includes GIL and CPU status)
+              GIL (only when holding GIL), ALL (includes GIL and CPU status),
+              EXCEPTION (only when thread has an active exception)
        native: Whether to include native frames
        gc: Whether to include GC frames
        opcodes: Whether to include opcode information
@ -360,7 +362,8 @@ def sample_live(
        all_threads: Whether to sample all threads
        realtime_stats: Whether to print real-time sampling statistics
        mode: Profiling mode - WALL (all samples), CPU (only when on CPU),
-              GIL (only when holding GIL), ALL (includes GIL and CPU status)
+              GIL (only when holding GIL), ALL (includes GIL and CPU status),
+              EXCEPTION (only when thread has an active exception)
        native: Whether to include native frames
        gc: Whether to include GC frames
        opcodes: Whether to include opcode information
--- a/Lib/profiling/sampling/stack_collector.py
+++ b/Lib/profiling/sampling/stack_collector.py
@ -87,12 +87,13 @@ def __init__(self, *args, **kwargs):
            "on_cpu": 0,
            "gil_requested": 0,
            "unknown": 0,
+            "has_exception": 0,
            "total": 0,
        }
        self.samples_with_gc_frames = 0

        # Per-thread statistics
-        self.per_thread_stats = {}  # {thread_id: {has_gil, on_cpu, gil_requested, unknown, total, gc_samples}}
+        self.per_thread_stats = {}  # {thread_id: {has_gil, on_cpu, gil_requested, unknown, has_exception, total, gc_samples}}

    def collect(self, stack_frames, skip_idle=False):
        """Override to track thread status statistics before processing frames."""
@ -118,6 +119,7 @@ def collect(self, stack_frames, skip_idle=False):
                    "on_cpu": 0,
                    "gil_requested": 0,
                    "unknown": 0,
+                    "has_exception": 0,
                    "total": 0,
                    "gc_samples": 0,
                }
@ -247,12 +249,16 @@ def convert_children(children, min_samples):
            }

        # Calculate thread status percentages for display
+        import sysconfig
+        is_free_threaded = bool(sysconfig.get_config_var("Py_GIL_DISABLED"))
        total_threads = max(1, self.thread_status_counts["total"])
        thread_stats = {
            "has_gil_pct": (self.thread_status_counts["has_gil"] / total_threads) * 100,
            "on_cpu_pct": (self.thread_status_counts["on_cpu"] / total_threads) * 100,
            "gil_requested_pct": (self.thread_status_counts["gil_requested"] / total_threads) * 100,
+            "has_exception_pct": (self.thread_status_counts["has_exception"] / total_threads) * 100,
            "gc_pct": (self.samples_with_gc_frames / max(1, self._sample_count)) * 100,
+            "free_threaded": is_free_threaded,
            **self.thread_status_counts
        }

@ -265,6 +271,7 @@ def convert_children(children, min_samples):
                "has_gil_pct": (stats["has_gil"] / total) * 100,
                "on_cpu_pct": (stats["on_cpu"] / total) * 100,
                "gil_requested_pct": (stats["gil_requested"] / total) * 100,
+                "has_exception_pct": (stats["has_exception"] / total) * 100,
                "gc_pct": (stats["gc_samples"] / total_samples_denominator) * 100,
                **stats
            }
--- a/Lib/test/test_external_inspection.py
+++ b/Lib/test/test_external_inspection.py
@ -26,11 +26,13 @@
 PROFILING_MODE_CPU = 1
 PROFILING_MODE_GIL = 2
 PROFILING_MODE_ALL = 3
+PROFILING_MODE_EXCEPTION = 4

 # Thread status flags
 THREAD_STATUS_HAS_GIL = 1 << 0
 THREAD_STATUS_ON_CPU = 1 << 1
 THREAD_STATUS_UNKNOWN = 1 << 2
+THREAD_STATUS_HAS_EXCEPTION = 1 << 4

 # Maximum number of retry attempts for operations that may fail transiently
 MAX_TRIES = 10
@ -2260,6 +2262,412 @@ def busy_thread():
            finally:
                _cleanup_sockets(*client_sockets, server_socket)

+    def _make_exception_test_script(self, port):
+        """Create script with exception and normal threads for testing."""
+        return textwrap.dedent(
+            f"""\
+            import socket
+            import threading
+            import time
+
+            def exception_thread():
+                conn = socket.create_connection(("localhost", {port}))
+                conn.sendall(b"exception:" + str(threading.get_native_id()).encode())
+                try:
+                    raise ValueError("test exception")
+                except ValueError:
+                    while True:
+                        time.sleep(0.01)
+
+            def normal_thread():
+                conn = socket.create_connection(("localhost", {port}))
+                conn.sendall(b"normal:" + str(threading.get_native_id()).encode())
+                while True:
+                    sum(range(1000))
+
+            t1 = threading.Thread(target=exception_thread)
+            t2 = threading.Thread(target=normal_thread)
+            t1.start()
+            t2.start()
+            t1.join()
+            t2.join()
+            """
+        )
+
+    @contextmanager
+    def _run_exception_test_process(self):
+        """Context manager to run exception test script and yield thread IDs and process."""
+        port = find_unused_port()
+        script = self._make_exception_test_script(port)
+
+        with os_helper.temp_dir() as tmp_dir:
+            script_file = make_script(tmp_dir, "script", script)
+            server_socket = _create_server_socket(port, backlog=2)
+            client_sockets = []
+
+            try:
+                with _managed_subprocess([sys.executable, script_file]) as p:
+                    exception_tid = None
+                    normal_tid = None
+
+                    for _ in range(2):
+                        client_socket, _ = server_socket.accept()
+                        client_sockets.append(client_socket)
+                        line = client_socket.recv(1024)
+                        if line:
+                            if line.startswith(b"exception:"):
+                                try:
+                                    exception_tid = int(line.split(b":")[-1])
+                                except (ValueError, IndexError):
+                                    pass
+                            elif line.startswith(b"normal:"):
+                                try:
+                                    normal_tid = int(line.split(b":")[-1])
+                                except (ValueError, IndexError):
+                                    pass
+
+                    server_socket.close()
+                    server_socket = None
+
+                    yield p, exception_tid, normal_tid
+            finally:
+                _cleanup_sockets(*client_sockets, server_socket)
+
+    @unittest.skipIf(
+        sys.platform not in ("linux", "darwin", "win32"),
+        "Test only runs on supported platforms (Linux, macOS, or Windows)",
+    )
+    @unittest.skipIf(
+        sys.platform == "android", "Android raises Linux-specific exception"
+    )
+    def test_thread_status_exception_detection(self):
+        """Test that THREAD_STATUS_HAS_EXCEPTION is set when thread has an active exception."""
+        with self._run_exception_test_process() as (p, exception_tid, normal_tid):
+            self.assertIsNotNone(exception_tid, "Exception thread id not received")
+            self.assertIsNotNone(normal_tid, "Normal thread id not received")
+
+            statuses = {}
+            try:
+                unwinder = RemoteUnwinder(
+                    p.pid,
+                    all_threads=True,
+                    mode=PROFILING_MODE_ALL,
+                    skip_non_matching_threads=False,
+                )
+                for _ in range(MAX_TRIES):
+                    traces = unwinder.get_stack_trace()
+                    statuses = self._get_thread_statuses(traces)
+
+                    if (
+                        exception_tid in statuses
+                        and normal_tid in statuses
+                        and (statuses[exception_tid] & THREAD_STATUS_HAS_EXCEPTION)
+                        and not (statuses[normal_tid] & THREAD_STATUS_HAS_EXCEPTION)
+                    ):
+                        break
+                    time.sleep(0.5)
+            except PermissionError:
+                self.skipTest("Insufficient permissions to read the stack trace")
+
+            self.assertIn(exception_tid, statuses)
+            self.assertIn(normal_tid, statuses)
+            self.assertTrue(
+                statuses[exception_tid] & THREAD_STATUS_HAS_EXCEPTION,
+                "Exception thread should have HAS_EXCEPTION flag",
+            )
+            self.assertFalse(
+                statuses[normal_tid] & THREAD_STATUS_HAS_EXCEPTION,
+                "Normal thread should not have HAS_EXCEPTION flag",
+            )
+
+    @unittest.skipIf(
+        sys.platform not in ("linux", "darwin", "win32"),
+        "Test only runs on supported platforms (Linux, macOS, or Windows)",
+    )
+    @unittest.skipIf(
+        sys.platform == "android", "Android raises Linux-specific exception"
+    )
+    def test_thread_status_exception_mode_filtering(self):
+        """Test that PROFILING_MODE_EXCEPTION correctly filters threads."""
+        with self._run_exception_test_process() as (p, exception_tid, normal_tid):
+            self.assertIsNotNone(exception_tid, "Exception thread id not received")
+            self.assertIsNotNone(normal_tid, "Normal thread id not received")
+
+            try:
+                unwinder = RemoteUnwinder(
+                    p.pid,
+                    all_threads=True,
+                    mode=PROFILING_MODE_EXCEPTION,
+                    skip_non_matching_threads=True,
+                )
+                for _ in range(MAX_TRIES):
+                    traces = unwinder.get_stack_trace()
+                    statuses = self._get_thread_statuses(traces)
+
+                    if exception_tid in statuses:
+                        self.assertNotIn(
+                            normal_tid,
+                            statuses,
+                            "Normal thread should be filtered out in exception mode",
+                        )
+                        return
+                    time.sleep(0.5)
+            except PermissionError:
+                self.skipTest("Insufficient permissions to read the stack trace")
+
+            self.fail("Never found exception thread in exception mode")
+
+class TestExceptionDetectionScenarios(RemoteInspectionTestBase):
+    """Test exception detection across all scenarios.
+
+    This class verifies the exact conditions under which THREAD_STATUS_HAS_EXCEPTION
+    is set. Each test covers a specific scenario:
+
+    1. except_block: Thread inside except block
+       -> SHOULD have HAS_EXCEPTION (exc_info->exc_value is set)
+
+    2. finally_propagating: Exception propagating through finally block
+       -> SHOULD have HAS_EXCEPTION (current_exception is set)
+
+    3. finally_after_except: Finally block after except handled exception
+       -> Should NOT have HAS_EXCEPTION (exc_info cleared after except)
+
+    4. finally_no_exception: Finally block with no exception raised
+       -> Should NOT have HAS_EXCEPTION (no exception state)
+    """
+
+    def _make_single_scenario_script(self, port, scenario):
+        """Create script for a single exception scenario."""
+        scenarios = {
+            "except_block": f"""\
+import socket
+import threading
+import time
+
+def target_thread():
+    '''Inside except block - exception info is present'''
+    conn = socket.create_connection(("localhost", {port}))
+    conn.sendall(b"ready:" + str(threading.get_native_id()).encode())
+    try:
+        raise ValueError("test")
+    except ValueError:
+        while True:
+            time.sleep(0.01)
+
+t = threading.Thread(target=target_thread)
+t.start()
+t.join()
+""",
+            "finally_propagating": f"""\
+import socket
+import threading
+import time
+
+def target_thread():
+    '''Exception propagating through finally - current_exception is set'''
+    conn = socket.create_connection(("localhost", {port}))
+    conn.sendall(b"ready:" + str(threading.get_native_id()).encode())
+    try:
+        try:
+            raise ValueError("propagating")
+        finally:
+            # Exception is propagating through here
+            while True:
+                time.sleep(0.01)
+    except:
+        pass  # Never reached due to infinite loop
+
+t = threading.Thread(target=target_thread)
+t.start()
+t.join()
+""",
+            "finally_after_except": f"""\
+import socket
+import threading
+import time
+
+def target_thread():
+    '''Finally runs after except handled - exc_info is cleared'''
+    conn = socket.create_connection(("localhost", {port}))
+    conn.sendall(b"ready:" + str(threading.get_native_id()).encode())
+    try:
+        raise ValueError("test")
+    except ValueError:
+        pass  # Exception caught and handled
+    finally:
+        while True:
+            time.sleep(0.01)
+
+t = threading.Thread(target=target_thread)
+t.start()
+t.join()
+""",
+            "finally_no_exception": f"""\
+import socket
+import threading
+import time
+
+def target_thread():
+    '''Finally with no exception at all'''
+    conn = socket.create_connection(("localhost", {port}))
+    conn.sendall(b"ready:" + str(threading.get_native_id()).encode())
+    try:
+        pass  # No exception
+    finally:
+        while True:
+            time.sleep(0.01)
+
+t = threading.Thread(target=target_thread)
+t.start()
+t.join()
+""",
+        }
+
+        return scenarios[scenario]
+
+    @contextmanager
+    def _run_scenario_process(self, scenario):
+        """Context manager to run a single scenario and yield thread ID and process."""
+        port = find_unused_port()
+        script = self._make_single_scenario_script(port, scenario)
+
+        with os_helper.temp_dir() as tmp_dir:
+            script_file = make_script(tmp_dir, "script", script)
+            server_socket = _create_server_socket(port, backlog=1)
+            client_socket = None
+
+            try:
+                with _managed_subprocess([sys.executable, script_file]) as p:
+                    thread_tid = None
+
+                    client_socket, _ = server_socket.accept()
+                    line = client_socket.recv(1024)
+                    if line and line.startswith(b"ready:"):
+                        try:
+                            thread_tid = int(line.split(b":")[-1])
+                        except (ValueError, IndexError):
+                            pass
+
+                    server_socket.close()
+                    server_socket = None
+
+                    yield p, thread_tid
+            finally:
+                _cleanup_sockets(client_socket, server_socket)
+
+    def _check_exception_status(self, p, thread_tid, expect_exception):
+        """Helper to check if thread has expected exception status."""
+        try:
+            unwinder = RemoteUnwinder(
+                p.pid,
+                all_threads=True,
+                mode=PROFILING_MODE_ALL,
+                skip_non_matching_threads=False,
+            )
+
+            # Collect multiple samples for reliability
+            results = []
+            for _ in range(MAX_TRIES):
+                traces = unwinder.get_stack_trace()
+                statuses = self._get_thread_statuses(traces)
+
+                if thread_tid in statuses:
+                    has_exc = bool(statuses[thread_tid] & THREAD_STATUS_HAS_EXCEPTION)
+                    results.append(has_exc)
+
+                    if len(results) >= 3:
+                        break
+
+                time.sleep(0.2)
+
+            # Check majority of samples match expected
+            if not results:
+                self.fail("Never found target thread in stack traces")
+
+            majority = sum(results) > len(results) // 2
+            if expect_exception:
+                self.assertTrue(
+                    majority,
+                    f"Thread should have HAS_EXCEPTION flag, got {results}"
+                )
+            else:
+                self.assertFalse(
+                    majority,
+                    f"Thread should NOT have HAS_EXCEPTION flag, got {results}"
+                )
+
+        except PermissionError:
+            self.skipTest("Insufficient permissions to read the stack trace")
+
+    @unittest.skipIf(
+        sys.platform not in ("linux", "darwin", "win32"),
+        "Test only runs on supported platforms (Linux, macOS, or Windows)",
+    )
+    @unittest.skipIf(
+        sys.platform == "android", "Android raises Linux-specific exception"
+    )
+    def test_except_block_has_exception(self):
+        """Test that thread inside except block has HAS_EXCEPTION flag.
+
+        When a thread is executing inside an except block, exc_info->exc_value
+        is set, so THREAD_STATUS_HAS_EXCEPTION should be True.
+        """
+        with self._run_scenario_process("except_block") as (p, thread_tid):
+            self.assertIsNotNone(thread_tid, "Thread ID not received")
+            self._check_exception_status(p, thread_tid, expect_exception=True)
+
+    @unittest.skipIf(
+        sys.platform not in ("linux", "darwin", "win32"),
+        "Test only runs on supported platforms (Linux, macOS, or Windows)",
+    )
+    @unittest.skipIf(
+        sys.platform == "android", "Android raises Linux-specific exception"
+    )
+    def test_finally_propagating_has_exception(self):
+        """Test that finally block with propagating exception has HAS_EXCEPTION flag.
+
+        When an exception is propagating through a finally block (not yet caught),
+        current_exception is set, so THREAD_STATUS_HAS_EXCEPTION should be True.
+        """
+        with self._run_scenario_process("finally_propagating") as (p, thread_tid):
+            self.assertIsNotNone(thread_tid, "Thread ID not received")
+            self._check_exception_status(p, thread_tid, expect_exception=True)
+
+    @unittest.skipIf(
+        sys.platform not in ("linux", "darwin", "win32"),
+        "Test only runs on supported platforms (Linux, macOS, or Windows)",
+    )
+    @unittest.skipIf(
+        sys.platform == "android", "Android raises Linux-specific exception"
+    )
+    def test_finally_after_except_no_exception(self):
+        """Test that finally block after except has NO HAS_EXCEPTION flag.
+
+        When a finally block runs after an except block has handled the exception,
+        Python clears exc_info before entering finally, so THREAD_STATUS_HAS_EXCEPTION
+        should be False.
+        """
+        with self._run_scenario_process("finally_after_except") as (p, thread_tid):
+            self.assertIsNotNone(thread_tid, "Thread ID not received")
+            self._check_exception_status(p, thread_tid, expect_exception=False)
+
+    @unittest.skipIf(
+        sys.platform not in ("linux", "darwin", "win32"),
+        "Test only runs on supported platforms (Linux, macOS, or Windows)",
+    )
+    @unittest.skipIf(
+        sys.platform == "android", "Android raises Linux-specific exception"
+    )
+    def test_finally_no_exception_no_flag(self):
+        """Test that finally block with no exception has NO HAS_EXCEPTION flag.
+
+        When a finally block runs during normal execution (no exception raised),
+        there is no exception state, so THREAD_STATUS_HAS_EXCEPTION should be False.
+        """
+        with self._run_scenario_process("finally_no_exception") as (p, thread_tid):
+            self.assertIsNotNone(thread_tid, "Thread ID not received")
+            self._check_exception_status(p, thread_tid, expect_exception=False)
+

 class TestFrameCaching(RemoteInspectionTestBase):
    """Test that frame caching produces correct results.
--- a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
@ -427,7 +427,198 @@ def test_parse_mode_function(self):
        self.assertEqual(_parse_mode("wall"), 0)
        self.assertEqual(_parse_mode("cpu"), 1)
        self.assertEqual(_parse_mode("gil"), 2)
+        self.assertEqual(_parse_mode("exception"), 4)

        # Test invalid mode raises KeyError
        with self.assertRaises(KeyError):
            _parse_mode("invalid")
+
+
+class TestExceptionModeFiltering(unittest.TestCase):
+    """Test exception mode filtering functionality (--mode=exception)."""
+
+    def test_exception_mode_validation(self):
+        """Test that CLI accepts exception mode choice correctly."""
+        from profiling.sampling.cli import main
+
+        test_args = [
+            "profiling.sampling.cli",
+            "attach",
+            "12345",
+            "--mode",
+            "exception",
+        ]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("profiling.sampling.cli.sample") as mock_sample,
+        ):
+            try:
+                main()
+            except (SystemExit, OSError, RuntimeError):
+                pass  # Expected due to invalid PID
+
+        # Should have attempted to call sample with mode=4 (exception mode)
+        mock_sample.assert_called_once()
+        call_args = mock_sample.call_args
+        # Check the mode parameter (should be in kwargs)
+        self.assertEqual(call_args.kwargs.get("mode"), 4)  # PROFILING_MODE_EXCEPTION
+
+    def test_exception_mode_sample_function_call(self):
+        """Test that sample() function correctly uses exception mode."""
+        with (
+            mock.patch(
+                "profiling.sampling.sample.SampleProfiler"
+            ) as mock_profiler,
+        ):
+            # Mock the profiler instance
+            mock_instance = mock.Mock()
+            mock_profiler.return_value = mock_instance
+
+            # Create a real collector instance
+            collector = PstatsCollector(sample_interval_usec=1000, skip_idle=True)
+
+            # Call sample with exception mode
+            profiling.sampling.sample.sample(
+                12345,
+                collector,
+                mode=4,  # PROFILING_MODE_EXCEPTION
+                duration_sec=1,
+            )
+
+            # Verify SampleProfiler was created with correct mode
+            mock_profiler.assert_called_once()
+            call_args = mock_profiler.call_args
+            self.assertEqual(call_args[1]["mode"], 4)  # mode parameter
+
+            # Verify profiler.sample was called
+            mock_instance.sample.assert_called_once()
+
+    def test_exception_mode_cli_argument_parsing(self):
+        """Test CLI argument parsing for exception mode with various options."""
+        from profiling.sampling.cli import main
+
+        test_args = [
+            "profiling.sampling.cli",
+            "attach",
+            "12345",
+            "--mode",
+            "exception",
+            "-i",
+            "500",
+            "-d",
+            "5",
+        ]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("profiling.sampling.cli.sample") as mock_sample,
+        ):
+            try:
+                main()
+            except (SystemExit, OSError, RuntimeError):
+                pass  # Expected due to invalid PID
+
+        # Verify all arguments were parsed correctly
+        mock_sample.assert_called_once()
+        call_args = mock_sample.call_args
+        self.assertEqual(call_args.kwargs.get("mode"), 4)  # exception mode
+        self.assertEqual(call_args.kwargs.get("duration_sec"), 5)
+
+    def test_exception_mode_constants_are_defined(self):
+        """Test that exception mode constant is properly defined."""
+        from profiling.sampling.constants import PROFILING_MODE_EXCEPTION
+        self.assertEqual(PROFILING_MODE_EXCEPTION, 4)
+
+    @requires_subprocess()
+    def test_exception_mode_integration_filtering(self):
+        """Integration test: Exception mode should only capture threads with active exceptions."""
+        # Script with one thread handling an exception and one normal thread
+        exception_vs_normal_script = """
+import time
+import threading
+
+exception_ready = threading.Event()
+
+def normal_worker():
+    x = 0
+    while True:
+        x += 1
+
+def exception_handling_worker():
+    try:
+        raise ValueError("test exception")
+    except ValueError:
+        # Signal AFTER entering except block, then do CPU work
+        exception_ready.set()
+        x = 0
+        while True:
+            x += 1
+
+normal_thread = threading.Thread(target=normal_worker)
+exception_thread = threading.Thread(target=exception_handling_worker)
+normal_thread.start()
+exception_thread.start()
+exception_ready.wait()
+_test_sock.sendall(b"working")
+normal_thread.join()
+exception_thread.join()
+"""
+        with test_subprocess(exception_vs_normal_script, wait_for_working=True) as subproc:
+
+            with (
+                io.StringIO() as captured_output,
+                mock.patch("sys.stdout", captured_output),
+            ):
+                try:
+                    collector = PstatsCollector(sample_interval_usec=5000, skip_idle=True)
+                    profiling.sampling.sample.sample(
+                        subproc.process.pid,
+                        collector,
+                        duration_sec=2.0,
+                        mode=4,  # Exception mode
+                        all_threads=True,
+                    )
+                    collector.print_stats(show_summary=False, mode=4)
+                except (PermissionError, RuntimeError) as e:
+                    self.skipTest(
+                        "Insufficient permissions for remote profiling"
+                    )
+
+                exception_mode_output = captured_output.getvalue()
+
+            # Test wall-clock mode (mode=0) - should capture both functions
+            with (
+                io.StringIO() as captured_output,
+                mock.patch("sys.stdout", captured_output),
+            ):
+                try:
+                    collector = PstatsCollector(sample_interval_usec=5000, skip_idle=False)
+                    profiling.sampling.sample.sample(
+                        subproc.process.pid,
+                        collector,
+                        duration_sec=2.0,
+                        mode=0,  # Wall-clock mode
+                        all_threads=True,
+                    )
+                    collector.print_stats(show_summary=False)
+                except (PermissionError, RuntimeError) as e:
+                    self.skipTest(
+                        "Insufficient permissions for remote profiling"
+                    )
+
+                wall_mode_output = captured_output.getvalue()
+
+            # Verify both modes captured samples
+            self.assertIn("Captured", exception_mode_output)
+            self.assertIn("samples", exception_mode_output)
+            self.assertIn("Captured", wall_mode_output)
+            self.assertIn("samples", wall_mode_output)
+
+            # Exception mode should strongly favor exception_handling_worker over normal_worker
+            self.assertIn("exception_handling_worker", exception_mode_output)
+            self.assertNotIn("normal_worker", exception_mode_output)
+
+            # Wall-clock mode should capture both types of work
+            self.assertIn("exception_handling_worker", wall_mode_output)
+            self.assertIn("normal_worker", wall_mode_output)
--- a/Misc/NEWS.d/next/Library/2025-12-11-04-18-49.gh-issue-138122.m3EF9E.rst
+++ b/Misc/NEWS.d/next/Library/2025-12-11-04-18-49.gh-issue-138122.m3EF9E.rst
@ -0,0 +1,3 @@
+Add ``--mode=exception`` to the sampling profiler to capture samples only from
+threads with an active exception, useful for analyzing exception handling
+overhead. Patch by Pablo Galindo.
--- a/Modules/_remote_debugging/_remote_debugging.h
+++ b/Modules/_remote_debugging/_remote_debugging.h
@ -109,10 +109,11 @@ typedef enum _WIN32_THREADSTATE {
 #define MAX_TLBC_SIZE 2048

 /* Thread status flags */
-#define THREAD_STATUS_HAS_GIL        (1 << 0)
-#define THREAD_STATUS_ON_CPU         (1 << 1)
-#define THREAD_STATUS_UNKNOWN        (1 << 2)
-#define THREAD_STATUS_GIL_REQUESTED  (1 << 3)
+#define THREAD_STATUS_HAS_GIL             (1 << 0)
+#define THREAD_STATUS_ON_CPU              (1 << 1)
+#define THREAD_STATUS_UNKNOWN             (1 << 2)
+#define THREAD_STATUS_GIL_REQUESTED       (1 << 3)
+#define THREAD_STATUS_HAS_EXCEPTION       (1 << 4)

 /* Exception cause macro */
 #define set_exception_cause(unwinder, exc_type, message) \
@ -209,7 +210,8 @@ enum _ProfilingMode {
    PROFILING_MODE_WALL = 0,
    PROFILING_MODE_CPU = 1,
    PROFILING_MODE_GIL = 2,
-    PROFILING_MODE_ALL = 3
+    PROFILING_MODE_ALL = 3,
+    PROFILING_MODE_EXCEPTION = 4
 };

 typedef struct {
--- a/Modules/_remote_debugging/module.c
+++ b/Modules/_remote_debugging/module.c
@ -568,7 +568,8 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self
                                                           gc_frame);
            if (!frame_info) {
                // Check if this was an intentional skip due to mode-based filtering
-                if ((self->mode == PROFILING_MODE_CPU || self->mode == PROFILING_MODE_GIL) && !PyErr_Occurred()) {
+                if ((self->mode == PROFILING_MODE_CPU || self->mode == PROFILING_MODE_GIL ||
+                     self->mode == PROFILING_MODE_EXCEPTION) && !PyErr_Occurred()) {
                    // Thread was skipped due to mode filtering, continue to next thread
                    continue;
                }
@ -1068,6 +1069,9 @@ _remote_debugging_exec(PyObject *m)
    if (PyModule_AddIntConstant(m, "THREAD_STATUS_GIL_REQUESTED", THREAD_STATUS_GIL_REQUESTED) < 0) {
        return -1;
    }
+    if (PyModule_AddIntConstant(m, "THREAD_STATUS_HAS_EXCEPTION", THREAD_STATUS_HAS_EXCEPTION) < 0) {
+        return -1;
+    }

    if (RemoteDebugging_InitState(st) < 0) {
        return -1;
--- a/Modules/_remote_debugging/threads.c
+++ b/Modules/_remote_debugging/threads.c
@ -344,6 +344,33 @@ unwind_stack_for_thread(
        gil_requested = 0;
    }

+    // Check exception state (both raised and handled exceptions)
+    int has_exception = 0;
+
+    // Check current_exception (exception being raised/propagated)
+    uintptr_t current_exception = GET_MEMBER(uintptr_t, ts,
+        unwinder->debug_offsets.thread_state.current_exception);
+    if (current_exception != 0) {
+        has_exception = 1;
+    }
+
+    // Check exc_state.exc_value (exception being handled in except block)
+    // exc_state is embedded in PyThreadState, so we read it directly from
+    // the thread state buffer. This catches most cases; nested exception
+    // handlers where exc_info points elsewhere are rare.
+    if (!has_exception) {
+        uintptr_t exc_value = GET_MEMBER(uintptr_t, ts,
+            unwinder->debug_offsets.thread_state.exc_state +
+            unwinder->debug_offsets.err_stackitem.exc_value);
+        if (exc_value != 0) {
+            has_exception = 1;
+        }
+    }
+
+    if (has_exception) {
+        status_flags |= THREAD_STATUS_HAS_EXCEPTION;
+    }
+
    // Check CPU status
    long pthread_id = GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.thread_id);

@ -368,6 +395,9 @@ unwind_stack_for_thread(
        } else if (unwinder->mode == PROFILING_MODE_GIL) {
            // Skip if doesn't have GIL
            should_skip = !(status_flags & THREAD_STATUS_HAS_GIL);
+        } else if (unwinder->mode == PROFILING_MODE_EXCEPTION) {
+            // Skip if thread doesn't have an exception active
+            should_skip = !(status_flags & THREAD_STATUS_HAS_EXCEPTION);
        }
        // PROFILING_MODE_WALL and PROFILING_MODE_ALL never skip
    }