diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 1086b426204..6acc156ebff 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -126,6 +126,9 @@ Doc/howto/clinic.rst          @erlend-aasland @AA-Turner
 # C Analyser
 Tools/c-analyzer/             @ericsnowcurrently
 
+# C API Documentation Checks
+Tools/check-c-api-docs/       @ZeroIntensity
+
 # Fuzzing
 Modules/_xxtestfuzz/          @ammaraskar
 
diff --git a/.github/CONTRIBUTING.rst b/.github/CONTRIBUTING.rst
index 224061f2c5a..94b67ce3dbe 100644
--- a/.github/CONTRIBUTING.rst
+++ b/.github/CONTRIBUTING.rst
@@ -28,13 +28,12 @@ Please be aware that our workflow does deviate slightly from the typical GitHub
 project. Details on how to properly submit a pull request are covered in
 `Lifecycle of a Pull Request <https://devguide.python.org/getting-started/pull-request-lifecycle.html>`_.
 We utilize various bots and status checks to help with this, so do follow the
-comments they leave and their "Details" links, respectively. The key points of
-our workflow that are not covered by a bot or status check are:
+comments they leave and their "Details" links, respectively.
 
-- All discussions that are not directly related to the code in the pull request
-  should happen on `GitHub Issues <https://github.com/python/cpython/issues>`_.
-- Upon your first non-trivial pull request (which includes documentation changes),
-  feel free to add yourself to ``Misc/ACKS``.
+The final key part of our workflow is that all discussions that are not
+directly related to the code in the pull request should happen on
+`GitHub Issues <https://github.com/python/cpython/issues>`__, generally in the
+pull request's parent issue.
 
 
 Setting Expectations
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
index 75d174307ce..de6e8756b03 100644
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -5,3 +5,6 @@ contact_links:
   - name: "Proposing new features"
     about: "Submit major feature proposal (e.g. syntax changes) to an ideas forum first."
     url: "https://discuss.python.org/c/ideas/6"
+  - name: "Python Install Manager issues"
+    about: "Report issues with the Python Install Manager (for Windows)"
+    url: "https://github.com/python/pymanager/issues"
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index c8a3165d690..7f3376f8ddb 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -12,6 +12,11 @@ updates:
         update-types:
           - "version-update:semver-minor"
           - "version-update:semver-patch"
+    cooldown:
+      # https://blog.yossarian.net/2025/11/21/We-should-all-be-using-dependency-cooldowns
+      # Cooldowns protect against supply chain attacks by avoiding the
+      # highest-risk window immediately after new releases.
+      default-days: 14
   - package-ecosystem: "pip"
     directory: "/Tools/"
     schedule:
@@ -19,3 +24,5 @@ updates:
     labels:
       - "skip issue"
       - "skip news"
+    cooldown:
+      default-days: 14
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 8e15400e497..3d889fa128e 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -142,6 +142,9 @@ jobs:
       - name: Check for unsupported C global variables
         if: github.event_name == 'pull_request'  # $GITHUB_EVENT_NAME
         run: make check-c-globals
+      - name: Check for undocumented C APIs
+        run: make check-c-api-docs
+
 
   build-windows:
     name: >-
diff --git a/.github/workflows/reusable-wasi.yml b/.github/workflows/reusable-wasi.yml
index a309ef4e7f4..91d76fd1b5f 100644
--- a/.github/workflows/reusable-wasi.yml
+++ b/.github/workflows/reusable-wasi.yml
@@ -13,7 +13,7 @@ jobs:
     timeout-minutes: 60
     env:
       WASMTIME_VERSION: 38.0.3
-      WASI_SDK_VERSION: 25
+      WASI_SDK_VERSION: 29
       WASI_SDK_PATH: /opt/wasi-sdk
       CROSS_BUILD_PYTHON: cross-build/build
       CROSS_BUILD_WASI: cross-build/wasm32-wasip1
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b0311f05279..c5767ee841e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -2,6 +2,10 @@ repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
     rev: v0.13.2
     hooks:
+      - id: ruff-check
+        name: Run Ruff (lint) on Apple/
+        args: [--exit-non-zero-on-fix, --config=Apple/.ruff.toml]
+        files: ^Apple/
       - id: ruff-check
         name: Run Ruff (lint) on Doc/
         args: [--exit-non-zero-on-fix]
@@ -30,6 +34,10 @@ repos:
         name: Run Ruff (lint) on Tools/wasm/
         args: [--exit-non-zero-on-fix, --config=Tools/wasm/.ruff.toml]
         files: ^Tools/wasm/
+      - id: ruff-format
+        name: Run Ruff (format) on Apple/
+        args: [--exit-non-zero-on-fix, --config=Apple/.ruff.toml]
+        files: ^Apple
       - id: ruff-format
         name: Run Ruff (format) on Doc/
         args: [--check]
diff --git a/Android/android.py b/Android/android.py
index 25bb4ca70b5..d1a10be776e 100755
--- a/Android/android.py
+++ b/Android/android.py
@@ -29,6 +29,7 @@
     ANDROID_DIR.name == "Android" and (PYTHON_DIR / "pyconfig.h.in").exists()
 )
 
+ENV_SCRIPT = ANDROID_DIR / "android-env.sh"
 TESTBED_DIR = ANDROID_DIR / "testbed"
 CROSS_BUILD_DIR = PYTHON_DIR / "cross-build"
 
@@ -129,12 +130,11 @@ def android_env(host):
         sysconfig_filename = next(sysconfig_files).name
         host = re.fullmatch(r"_sysconfigdata__android_(.+).py", sysconfig_filename)[1]
 
-    env_script = ANDROID_DIR / "android-env.sh"
     env_output = subprocess.run(
         f"set -eu; "
         f"HOST={host}; "
         f"PREFIX={prefix}; "
-        f". {env_script}; "
+        f". {ENV_SCRIPT}; "
         f"export",
         check=True, shell=True, capture_output=True, encoding='utf-8',
     ).stdout
@@ -151,7 +151,7 @@ def android_env(host):
                 env[key] = value
 
     if not env:
-        raise ValueError(f"Found no variables in {env_script.name} output:\n"
+        raise ValueError(f"Found no variables in {ENV_SCRIPT.name} output:\n"
                          + env_output)
     return env
 
@@ -281,15 +281,30 @@ def clean_all(context):
 
 
 def setup_ci():
-    # https://github.blog/changelog/2024-04-02-github-actions-hardware-accelerated-android-virtualization-now-available/
-    if "GITHUB_ACTIONS" in os.environ and platform.system() == "Linux":
-        run(
-            ["sudo", "tee", "/etc/udev/rules.d/99-kvm4all.rules"],
-            input='KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"\n',
-            text=True,
-        )
-        run(["sudo", "udevadm", "control", "--reload-rules"])
-        run(["sudo", "udevadm", "trigger", "--name-match=kvm"])
+    if "GITHUB_ACTIONS" in os.environ:
+        # Enable emulator hardware acceleration
+        # (https://github.blog/changelog/2024-04-02-github-actions-hardware-accelerated-android-virtualization-now-available/).
+        if platform.system() == "Linux":
+            run(
+                ["sudo", "tee", "/etc/udev/rules.d/99-kvm4all.rules"],
+                input='KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"\n',
+                text=True,
+            )
+            run(["sudo", "udevadm", "control", "--reload-rules"])
+            run(["sudo", "udevadm", "trigger", "--name-match=kvm"])
+
+        # Free up disk space by deleting unused versions of the NDK
+        # (https://github.com/freakboy3742/pyspamsum/pull/108).
+        for line in ENV_SCRIPT.read_text().splitlines():
+            if match := re.fullmatch(r"ndk_version=(.+)", line):
+                ndk_version = match[1]
+                break
+        else:
+            raise ValueError(f"Failed to find NDK version in {ENV_SCRIPT.name}")
+
+        for item in (android_home / "ndk").iterdir():
+            if item.name[0].isdigit() and item.name != ndk_version:
+                delete_glob(item)
 
 
 def setup_sdk():
diff --git a/Android/testbed/app/build.gradle.kts b/Android/testbed/app/build.gradle.kts
index 4de628a279c..14d43d8c4d5 100644
--- a/Android/testbed/app/build.gradle.kts
+++ b/Android/testbed/app/build.gradle.kts
@@ -79,7 +79,7 @@ android {
     val androidEnvFile = file("../../android-env.sh").absoluteFile
 
     namespace = "org.python.testbed"
-    compileSdk = 34
+    compileSdk = 35
 
     defaultConfig {
         applicationId = "org.python.testbed"
@@ -92,7 +92,7 @@ android {
             }
             throw GradleException("Failed to find API level in $androidEnvFile")
         }
-        targetSdk = 34
+        targetSdk = 35
 
         versionCode = 1
         versionName = "1.0"
diff --git a/Apple/.ruff.toml b/Apple/.ruff.toml
new file mode 100644
index 00000000000..4cdc39ebee4
--- /dev/null
+++ b/Apple/.ruff.toml
@@ -0,0 +1,22 @@
+extend = "../.ruff.toml"  # Inherit the project-wide settings
+
+[format]
+preview = true
+docstring-code-format = true
+
+[lint]
+select = [
+    "C4",      # flake8-comprehensions
+    "E",       # pycodestyle
+    "F",       # pyflakes
+    "I",       # isort
+    "ISC",     # flake8-implicit-str-concat
+    "LOG",     # flake8-logging
+    "PGH",     # pygrep-hooks
+    "PT",      # flake8-pytest-style
+    "PYI",     # flake8-pyi
+    "RUF100",  # Ban unused `# noqa` comments
+    "UP",      # pyupgrade
+    "W",       # pycodestyle
+    "YTT",     # flake8-2020
+]
diff --git a/Apple/__main__.py b/Apple/__main__.py
index e76fc351798..256966e76c2 100644
--- a/Apple/__main__.py
+++ b/Apple/__main__.py
@@ -46,13 +46,12 @@
 import sys
 import sysconfig
 import time
-from collections.abc import Sequence
+from collections.abc import Callable, Sequence
 from contextlib import contextmanager
 from datetime import datetime, timezone
 from os.path import basename, relpath
 from pathlib import Path
 from subprocess import CalledProcessError
-from typing import Callable
 
 EnvironmentT = dict[str, str]
 ArgsT = Sequence[str | Path]
@@ -140,17 +139,15 @@ def print_env(env: EnvironmentT) -> None:
 def apple_env(host: str) -> EnvironmentT:
     """Construct an Apple development environment for the given host."""
     env = {
-        "PATH": ":".join(
-            [
-                str(PYTHON_DIR / "Apple/iOS/Resources/bin"),
-                str(subdir(host) / "prefix"),
-                "/usr/bin",
-                "/bin",
-                "/usr/sbin",
-                "/sbin",
-                "/Library/Apple/usr/bin",
-            ]
-        ),
+        "PATH": ":".join([
+            str(PYTHON_DIR / "Apple/iOS/Resources/bin"),
+            str(subdir(host) / "prefix"),
+            "/usr/bin",
+            "/bin",
+            "/usr/sbin",
+            "/sbin",
+            "/Library/Apple/usr/bin",
+        ]),
     }
 
     return env
@@ -196,14 +193,10 @@ def clean(context: argparse.Namespace, target: str = "all") -> None:
         paths.append(target)
 
     if target in {"all", "hosts", "test"}:
-        paths.extend(
-            [
-                path.name
-                for path in CROSS_BUILD_DIR.glob(
-                    f"{context.platform}-testbed.*"
-                )
-            ]
-        )
+        paths.extend([
+            path.name
+            for path in CROSS_BUILD_DIR.glob(f"{context.platform}-testbed.*")
+        ])
 
     for path in paths:
         delete_path(path)
@@ -352,18 +345,16 @@ def download(url: str, target_dir: Path) -> Path:
 
     out_path = target_path / basename(url)
     if not Path(out_path).is_file():
-        run(
-            [
-                "curl",
-                "-Lf",
-                "--retry",
-                "5",
-                "--retry-all-errors",
-                "-o",
-                out_path,
-                url,
-            ]
-        )
+        run([
+            "curl",
+            "-Lf",
+            "--retry",
+            "5",
+            "--retry-all-errors",
+            "-o",
+            out_path,
+            url,
+        ])
     else:
         print(f"Using cached version of {basename(url)}")
     return out_path
@@ -468,8 +459,7 @@ def package_version(prefix_path: Path) -> str:
 
 
 def lib_platform_files(dirname, names):
-    """A file filter that ignores platform-specific files in the lib directory.
-    """
+    """A file filter that ignores platform-specific files in lib."""
     path = Path(dirname)
     if (
         path.parts[-3] == "lib"
@@ -478,7 +468,7 @@ def lib_platform_files(dirname, names):
     ):
         return names
     elif path.parts[-2] == "lib" and path.parts[-1].startswith("python"):
-        ignored_names = set(
+        ignored_names = {
             name
             for name in names
             if (
@@ -486,7 +476,13 @@ def lib_platform_files(dirname, names):
                 or name.startswith("_sysconfig_vars_")
                 or name == "build-details.json"
             )
-        )
+        }
+    elif path.parts[-1] == "lib":
+        ignored_names = {
+            name
+            for name in names
+            if name.startswith("libpython") and name.endswith(".dylib")
+        }
     else:
         ignored_names = set()
 
@@ -499,7 +495,9 @@ def lib_non_platform_files(dirname, names):
     """
     path = Path(dirname)
     if path.parts[-2] == "lib" and path.parts[-1].startswith("python"):
-        return set(names) - lib_platform_files(dirname, names) - {"lib-dynload"}
+        return (
+            set(names) - lib_platform_files(dirname, names) - {"lib-dynload"}
+        )
     else:
         return set()
 
@@ -514,7 +512,8 @@ def create_xcframework(platform: str) -> str:
         package_path.mkdir()
     except FileExistsError:
         raise RuntimeError(
-            f"{platform} XCframework already exists; do you need to run with --clean?"
+            f"{platform} XCframework already exists; do you need to run "
+            "with --clean?"
         ) from None
 
     frameworks = []
@@ -607,7 +606,7 @@ def create_xcframework(platform: str) -> str:
         print(f" - {slice_name} binaries")
         shutil.copytree(first_path / "bin", slice_path / "bin")
 
-        # Copy the include path (this will be a symlink to the framework headers)
+        # Copy the include path (a symlink to the framework headers)
         print(f" - {slice_name} include files")
         shutil.copytree(
             first_path / "include",
@@ -621,6 +620,12 @@ def create_xcframework(platform: str) -> str:
             slice_framework / "Headers/pyconfig.h",
         )
 
+        print(f" - {slice_name} shared library")
+        # Create a simlink for the fat library
+        shared_lib = slice_path / f"lib/libpython{version_tag}.dylib"
+        shared_lib.parent.mkdir()
+        shared_lib.symlink_to("../Python.framework/Python")
+
         print(f" - {slice_name} architecture-specific files")
         for host_triple, multiarch in slice_parts.items():
             print(f"   - {multiarch} standard library")
@@ -632,6 +637,7 @@ def create_xcframework(platform: str) -> str:
                     framework_path(host_triple, multiarch) / "lib",
                     package_path / "Python.xcframework/lib",
                     ignore=lib_platform_files,
+                    symlinks=True,
                 )
                 has_common_stdlib = True
 
@@ -639,6 +645,7 @@ def create_xcframework(platform: str) -> str:
                 framework_path(host_triple, multiarch) / "lib",
                 slice_path / f"lib-{arch}",
                 ignore=lib_non_platform_files,
+                symlinks=True,
             )
 
             # Copy the host's pyconfig.h to an architecture-specific name.
@@ -659,7 +666,8 @@ def create_xcframework(platform: str) -> str:
             # statically link those libraries into a Framework, you become
             # responsible for providing a privacy manifest for that framework.
             xcprivacy_file = {
-                "OpenSSL": subdir(host_triple) / "prefix/share/OpenSSL.xcprivacy"
+                "OpenSSL": subdir(host_triple)
+                / "prefix/share/OpenSSL.xcprivacy"
             }
             print(f"   - {multiarch} xcprivacy files")
             for module, lib in [
@@ -669,7 +677,8 @@ def create_xcframework(platform: str) -> str:
                 shutil.copy(
                     xcprivacy_file[lib],
                     slice_path
-                    / f"lib-{arch}/python{version_tag}/lib-dynload/{module}.xcprivacy",
+                    / f"lib-{arch}/python{version_tag}"
+                    / f"lib-dynload/{module}.xcprivacy",
                 )
 
     print(" - build tools")
@@ -692,18 +701,16 @@ def package(context: argparse.Namespace) -> None:
 
         # Clone testbed
         print()
-        run(
-            [
-                sys.executable,
-                "Apple/testbed",
-                "clone",
-                "--platform",
-                context.platform,
-                "--framework",
-                CROSS_BUILD_DIR / context.platform / "Python.xcframework",
-                CROSS_BUILD_DIR / context.platform / "testbed",
-            ]
-        )
+        run([
+            sys.executable,
+            "Apple/testbed",
+            "clone",
+            "--platform",
+            context.platform,
+            "--framework",
+            CROSS_BUILD_DIR / context.platform / "Python.xcframework",
+            CROSS_BUILD_DIR / context.platform / "testbed",
+        ])
 
         # Build the final archive
         archive_name = (
@@ -757,7 +764,7 @@ def build(context: argparse.Namespace, host: str | None = None) -> None:
         package(context)
 
 
-def test(context: argparse.Namespace, host: str | None = None) -> None:
+def test(context: argparse.Namespace, host: str | None = None) -> None:  # noqa: PT028
     """The implementation of the "test" command."""
     if host is None:
         host = context.host
@@ -795,18 +802,16 @@ def test(context: argparse.Namespace, host: str | None = None) -> None:
                     / f"Frameworks/{apple_multiarch(host)}"
                 )
 
-        run(
-            [
-                sys.executable,
-                "Apple/testbed",
-                "clone",
-                "--platform",
-                context.platform,
-                "--framework",
-                framework_path,
-                testbed_dir,
-            ]
-        )
+        run([
+            sys.executable,
+            "Apple/testbed",
+            "clone",
+            "--platform",
+            context.platform,
+            "--framework",
+            framework_path,
+            testbed_dir,
+        ])
 
         run(
             [
@@ -840,7 +845,7 @@ def apple_sim_host(platform_name: str) -> str:
     """Determine the native simulator target for this platform."""
     for _, slice_parts in HOSTS[platform_name].items():
         for host_triple in slice_parts:
-            parts = host_triple.split('-')
+            parts = host_triple.split("-")
             if parts[0] == platform.machine() and parts[-1] == "simulator":
                 return host_triple
 
@@ -968,20 +973,29 @@ def parse_args() -> argparse.Namespace:
         cmd.add_argument(
             "--simulator",
             help=(
-                "The name of the simulator to use (eg: 'iPhone 16e'). Defaults to "
-                "the most recently released 'entry level' iPhone device. Device "
-                "architecture and OS version can also be specified; e.g., "
-                "`--simulator 'iPhone 16 Pro,arch=arm64,OS=26.0'` would run on "
-                "an ARM64 iPhone 16 Pro simulator running iOS 26.0."
+                "The name of the simulator to use (eg: 'iPhone 16e'). "
+                "Defaults to the most recently released 'entry level' "
+                "iPhone device. Device architecture and OS version can also "
+                "be specified; e.g., "
+                "`--simulator 'iPhone 16 Pro,arch=arm64,OS=26.0'` would "
+                "run on an ARM64 iPhone 16 Pro simulator running iOS 26.0."
             ),
         )
         group = cmd.add_mutually_exclusive_group()
         group.add_argument(
-            "--fast-ci", action="store_const", dest="ci_mode", const="fast",
-            help="Add test arguments for GitHub Actions")
+            "--fast-ci",
+            action="store_const",
+            dest="ci_mode",
+            const="fast",
+            help="Add test arguments for GitHub Actions",
+        )
         group.add_argument(
-            "--slow-ci", action="store_const", dest="ci_mode", const="slow",
-            help="Add test arguments for buildbots")
+            "--slow-ci",
+            action="store_const",
+            dest="ci_mode",
+            const="slow",
+            help="Add test arguments for buildbots",
+        )
 
     for subcommand in [configure_build, configure_host, build, ci]:
         subcommand.add_argument(
diff --git a/Apple/testbed/Python.xcframework/build/utils.sh b/Apple/testbed/Python.xcframework/build/utils.sh
index 961c46d014b..e7155d8b30e 100755
--- a/Apple/testbed/Python.xcframework/build/utils.sh
+++ b/Apple/testbed/Python.xcframework/build/utils.sh
@@ -46,7 +46,8 @@ install_stdlib() {
         rsync -au --delete "$PROJECT_DIR/$PYTHON_XCFRAMEWORK_PATH/lib/" "$CODESIGNING_FOLDER_PATH/python/lib/"
         rsync -au "$PROJECT_DIR/$PYTHON_XCFRAMEWORK_PATH/$SLICE_FOLDER/lib-$ARCHS/" "$CODESIGNING_FOLDER_PATH/python/lib/"
     else
-        rsync -au --delete "$PROJECT_DIR/$PYTHON_XCFRAMEWORK_PATH/$SLICE_FOLDER/lib/" "$CODESIGNING_FOLDER_PATH/python/lib/"
+        # A single-arch framework will have a libpython symlink; that can't be included at runtime
+        rsync -au --delete "$PROJECT_DIR/$PYTHON_XCFRAMEWORK_PATH/$SLICE_FOLDER/lib/" "$CODESIGNING_FOLDER_PATH/python/lib/" --exclude 'libpython*.dylib'
     fi
 }
 
diff --git a/Apple/testbed/__main__.py b/Apple/testbed/__main__.py
index 49974cb1428..0dd77ab8b82 100644
--- a/Apple/testbed/__main__.py
+++ b/Apple/testbed/__main__.py
@@ -32,15 +32,15 @@ def select_simulator_device(platform):
     json_data = json.loads(raw_json)
 
     if platform == "iOS":
-        # Any iOS device will do; we'll look for "SE" devices - but the name isn't
-        # consistent over time. Older Xcode versions will use "iPhone SE (Nth
-        # generation)"; As of 2025, they've started using "iPhone 16e".
+        # Any iOS device will do; we'll look for "SE" devices - but the name
+        # isn't consistent over time. Older Xcode versions will use "iPhone SE
+        # (Nth generation)"; As of 2025, they've started using "iPhone 16e".
         #
-        # When Xcode is updated after a new release, new devices will be available
-        # and old ones will be dropped from the set available on the latest iOS
-        # version. Select the one with the highest minimum runtime version - this
-        # is an indicator of the "newest" released device, which should always be
-        # supported on the "most recent" iOS version.
+        # When Xcode is updated after a new release, new devices will be
+        # available and old ones will be dropped from the set available on the
+        # latest iOS version. Select the one with the highest minimum runtime
+        # version - this is an indicator of the "newest" released device, which
+        # should always be supported on the "most recent" iOS version.
         se_simulators = sorted(
             (devicetype["minRuntimeVersion"], devicetype["name"])
             for devicetype in json_data["devicetypes"]
@@ -295,7 +295,8 @@ def main():
 
     parser = argparse.ArgumentParser(
         description=(
-            "Manages the process of testing an Apple Python project through Xcode."
+            "Manages the process of testing an Apple Python project "
+            "through Xcode."
         ),
     )
 
@@ -336,7 +337,10 @@ def main():
 
     run = subcommands.add_parser(
         "run",
-        usage="%(prog)s [-h] [--simulator SIMULATOR] -- <test arg> [<test arg> ...]",
+        usage=(
+            "%(prog)s [-h] [--simulator SIMULATOR] -- "
+            "<test arg> [<test arg> ...]"
+        ),
         description=(
             "Run a testbed project. The arguments provided after `--` will be "
             "passed to the running iOS process as if they were arguments to "
@@ -397,9 +401,9 @@ def main():
                 / "bin"
             ).is_dir():
                 print(
-                    f"Testbed does not contain a compiled Python framework. Use "
-                    f"`python {sys.argv[0]} clone ...` to create a runnable "
-                    f"clone of this testbed."
+                    "Testbed does not contain a compiled Python framework. "
+                    f"Use `python {sys.argv[0]} clone ...` to create a "
+                    "runnable clone of this testbed."
                 )
                 sys.exit(20)
 
@@ -411,7 +415,8 @@ def main():
             )
         else:
             print(
-                f"Must specify test arguments (e.g., {sys.argv[0]} run -- test)"
+                "Must specify test arguments "
+                f"(e.g., {sys.argv[0]} run -- test)"
             )
             print()
             parser.print_help(sys.stderr)
diff --git a/Doc/about.rst b/Doc/about.rst
index 8f635d7f743..5c1b497ca6b 100644
--- a/Doc/about.rst
+++ b/Doc/about.rst
@@ -32,8 +32,9 @@ Contributors to the Python documentation
 ----------------------------------------
 
 Many people have contributed to the Python language, the Python standard
-library, and the Python documentation.  See :source:`Misc/ACKS` in the Python
-source distribution for a partial list of contributors.
+library, and the Python documentation.  See the `CPython
+GitHub repository <https://github.com/python/cpython/graphs/contributors>`__
+for a partial list of contributors.
 
 It is only with the input and contributions of the Python community
 that Python has such wonderful documentation -- Thank You!
diff --git a/Doc/c-api/conversion.rst b/Doc/c-api/conversion.rst
index a18bbf4e0e3..96078d22710 100644
--- a/Doc/c-api/conversion.rst
+++ b/Doc/c-api/conversion.rst
@@ -162,16 +162,33 @@ The following functions provide locale-independent string to number conversions.
    .. versionadded:: 3.1
 
 
-.. c:function:: int PyOS_stricmp(const char *s1, const char *s2)
+.. c:function:: int PyOS_mystricmp(const char *str1, const char *str2)
+                int PyOS_mystrnicmp(const char *str1, const char *str2, Py_ssize_t size)
 
-   Case insensitive comparison of strings. The function works almost
-   identically to :c:func:`!strcmp` except that it ignores the case.
+   Case insensitive comparison of strings. These functions work almost
+   identically to :c:func:`!strcmp` and :c:func:`!strncmp` (respectively),
+   except that they ignore the case of ASCII characters.
+
+   Return ``0`` if the strings are equal, a negative value if *str1* sorts
+   lexicographically before *str2*, or a positive value if it sorts after.
+
+   In the *str1* or *str2* arguments, a NUL byte marks the end of the string.
+   For :c:func:`!PyOS_mystrnicmp`, the *size* argument gives the maximum size
+   of the string, as if NUL was present at the index given by *size*.
+
+   These functions do not use the locale.
 
 
-.. c:function:: int PyOS_strnicmp(const char *s1, const char *s2, Py_ssize_t  size)
+.. c:function:: int PyOS_stricmp(const char *str1, const char *str2)
+                int PyOS_strnicmp(const char *str1, const char *str2, Py_ssize_t  size)
 
-   Case insensitive comparison of strings. The function works almost
-   identically to :c:func:`!strncmp` except that it ignores the case.
+   Case insensitive comparison of strings.
+
+   On Windows, these are aliases of :c:func:`!stricmp` and :c:func:`!strnicmp`,
+   respectively.
+
+   On other platforms, they are aliases of :c:func:`PyOS_mystricmp` and
+   :c:func:`PyOS_mystrnicmp`, respectively.
 
 
 Character classification and conversion
diff --git a/Doc/c-api/datetime.rst b/Doc/c-api/datetime.rst
index f311aad5f15..127d7c9c91a 100644
--- a/Doc/c-api/datetime.rst
+++ b/Doc/c-api/datetime.rst
@@ -8,11 +8,42 @@ DateTime Objects
 Various date and time objects are supplied by the :mod:`datetime` module.
 Before using any of these functions, the header file :file:`datetime.h` must be
 included in your source (note that this is not included by :file:`Python.h`),
-and the macro :c:macro:`!PyDateTime_IMPORT` must be invoked, usually as part of
+and the macro :c:macro:`PyDateTime_IMPORT` must be invoked, usually as part of
 the module initialisation function.  The macro puts a pointer to a C structure
-into a static variable, :c:data:`!PyDateTimeAPI`, that is used by the following
+into a static variable, :c:data:`PyDateTimeAPI`, that is used by the following
 macros.
 
+.. c:macro:: PyDateTime_IMPORT()
+
+   Import the datetime C API.
+
+   On success, populate the :c:var:`PyDateTimeAPI` pointer.
+   On failure, set :c:var:`PyDateTimeAPI` to ``NULL`` and set an exception.
+   The caller must check if an error occurred via :c:func:`PyErr_Occurred`:
+
+   .. code-block::
+
+      PyDateTime_IMPORT;
+      if (PyErr_Occurred()) { /* cleanup */ }
+
+   .. warning::
+
+      This is not compatible with subinterpreters.
+
+.. c:type:: PyDateTime_CAPI
+
+   Structure containing the fields for the datetime C API.
+
+   The fields of this structure are private and subject to change.
+
+   Do not use this directly; prefer ``PyDateTime_*`` APIs instead.
+
+.. c:var:: PyDateTime_CAPI *PyDateTimeAPI
+
+   Dynamically allocated object containing the datetime C API.
+
+   This variable is only available once :c:macro:`PyDateTime_IMPORT` succeeds.
+
 .. c:type:: PyDateTime_Date
 
    This subtype of :c:type:`PyObject` represents a Python date object.
@@ -325,3 +356,16 @@ Macros for the convenience of modules implementing the DB API:
 
    Create and return a new :class:`datetime.date` object given an argument
    tuple suitable for passing to :meth:`datetime.date.fromtimestamp`.
+
+
+Internal data
+-------------
+
+The following symbols are exposed by the C API but should be considered
+internal-only.
+
+.. c:macro:: PyDateTime_CAPSULE_NAME
+
+   Name of the datetime capsule to pass to :c:func:`PyCapsule_Import`.
+
+   Internal usage only. Use :c:macro:`PyDateTime_IMPORT` instead.
diff --git a/Doc/c-api/descriptor.rst b/Doc/c-api/descriptor.rst
index 22c3b790cc3..313c534545a 100644
--- a/Doc/c-api/descriptor.rst
+++ b/Doc/c-api/descriptor.rst
@@ -21,12 +21,46 @@ found in the dictionary of type objects.
 .. c:function:: PyObject* PyDescr_NewMember(PyTypeObject *type, struct PyMemberDef *meth)
 
 
+.. c:var:: PyTypeObject PyMemberDescr_Type
+
+   The type object for member descriptor objects created from
+   :c:type:`PyMemberDef` structures. These descriptors expose fields of a
+   C struct as attributes on a type, and correspond
+   to :class:`types.MemberDescriptorType` objects in Python.
+
+
+
+.. c:var:: PyTypeObject PyGetSetDescr_Type
+
+   The type object for get/set descriptor objects created from
+   :c:type:`PyGetSetDef` structures. These descriptors implement attributes
+   whose value is computed by C getter and setter functions, and are used
+   for many built-in type attributes.
+
+
 .. c:function:: PyObject* PyDescr_NewMethod(PyTypeObject *type, struct PyMethodDef *meth)
 
 
+.. c:var:: PyTypeObject PyMethodDescr_Type
+
+   The type object for method descriptor objects created from
+   :c:type:`PyMethodDef` structures. These descriptors expose C functions as
+   methods on a type, and correspond to :class:`types.MemberDescriptorType`
+   objects in Python.
+
+
 .. c:function:: PyObject* PyDescr_NewWrapper(PyTypeObject *type, struct wrapperbase *wrapper, void *wrapped)
 
 
+.. c:var:: PyTypeObject PyWrapperDescr_Type
+
+   The type object for wrapper descriptor objects created by
+   :c:func:`PyDescr_NewWrapper` and :c:func:`PyWrapper_New`. Wrapper
+   descriptors are used internally to expose special methods implemented
+   via wrapper structures, and appear in Python as
+   :class:`types.WrapperDescriptorType` objects.
+
+
 .. c:function:: PyObject* PyDescr_NewClassMethod(PyTypeObject *type, PyMethodDef *method)
 
 
@@ -55,6 +89,14 @@ Built-in descriptors
    :class:`classmethod` in the Python layer.
 
 
+.. c:var:: PyTypeObject PyClassMethodDescr_Type
+
+   The type object for C-level class method descriptor objects.
+   This is the type of the descriptors created for :func:`classmethod` defined in
+   C extension types, and is the same object as :class:`classmethod`
+   in Python.
+
+
 .. c:function:: PyObject *PyClassMethod_New(PyObject *callable)
 
    Create a new :class:`classmethod` object wrapping *callable*.
diff --git a/Doc/c-api/dict.rst b/Doc/c-api/dict.rst
index ede1699cfeb..9c4428ced41 100644
--- a/Doc/c-api/dict.rst
+++ b/Doc/c-api/dict.rst
@@ -43,6 +43,17 @@ Dictionary Objects
    prevent modification of the dictionary for non-dynamic class types.
 
 
+.. c:var:: PyTypeObject PyDictProxy_Type
+
+   The type object for mapping proxy objects created by
+   :c:func:`PyDictProxy_New` and for the read-only ``__dict__`` attribute
+   of many built-in types. A :c:type:`PyDictProxy_Type` instance provides a
+   dynamic, read-only view of an underlying dictionary: changes to the
+   underlying dictionary are reflected in the proxy, but the proxy itself
+   does not support mutation operations. This corresponds to
+   :class:`types.MappingProxyType` in Python.
+
+
 .. c:function:: void PyDict_Clear(PyObject *p)
 
    Empty an existing dictionary of all key-value pairs.
diff --git a/Doc/c-api/extension-modules.rst b/Doc/c-api/extension-modules.rst
index 3d331e6ec12..0ce173b4bfe 100644
--- a/Doc/c-api/extension-modules.rst
+++ b/Doc/c-api/extension-modules.rst
@@ -8,7 +8,8 @@ Defining extension modules
 A C extension for CPython is a shared library (for example, a ``.so`` file
 on Linux, ``.pyd`` DLL on Windows), which is loadable into the Python process
 (for example, it is compiled with compatible compiler settings), and which
-exports an :ref:`initialization function <extension-export-hook>`.
+exports an :dfn:`export hook` function (or an
+old-style :ref:`initialization function <extension-pyinit>`).
 
 To be importable by default (that is, by
 :py:class:`importlib.machinery.ExtensionFileLoader`),
@@ -23,25 +24,127 @@ and must be named after the module name plus an extension listed in
    One suitable tool is Setuptools, whose documentation can be found at
    https://setuptools.pypa.io/en/latest/setuptools.html.
 
-Normally, the initialization function returns a module definition initialized
-using :c:func:`PyModuleDef_Init`.
-This allows splitting the creation process into several phases:
+.. _extension-export-hook:
 
+Extension export hook
+.....................
+
+.. versionadded:: next
+
+   Support for the :samp:`PyModExport_{<name>}` export hook was added in Python
+   3.15. The older way of defining modules is still available: consult either
+   the :ref:`extension-pyinit` section or earlier versions of this
+   documentation if you plan to support earlier Python versions.
+
+The export hook must be an exported function with the following signature:
+
+.. c:function:: PyModuleDef_Slot *PyModExport_modulename(void)
+
+For modules with ASCII-only names, the :ref:`export hook <extension-export-hook>`
+must be named :samp:`PyModExport_{<name>}`,
+with ``<name>`` replaced by the module's name.
+
+For non-ASCII module names, the export hook must instead be named
+:samp:`PyModExportU_{<name>}` (note the ``U``), with ``<name>`` encoded using
+Python's *punycode* encoding with hyphens replaced by underscores. In Python:
+
+.. code-block:: python
+
+    def hook_name(name):
+        try:
+            suffix = b'_' + name.encode('ascii')
+        except UnicodeEncodeError:
+            suffix = b'U_' + name.encode('punycode').replace(b'-', b'_')
+        return b'PyModExport' + suffix
+
+The export hook returns an array of :c:type:`PyModuleDef_Slot` entries,
+terminated by an entry with a slot ID of ``0``.
+These slots describe how the module should be created and initialized.
+
+This array must remain valid and constant until interpreter shutdown.
+Typically, it should use ``static`` storage.
+Prefer using the :c:macro:`Py_mod_create` and :c:macro:`Py_mod_exec` slots
+for any dynamic behavior.
+
+The export hook may return ``NULL`` with an exception set to signal failure.
+
+It is recommended to define the export hook function using a helper macro:
+
+.. c:macro:: PyMODEXPORT_FUNC
+
+   Declare an extension module export hook.
+   This macro:
+
+   * specifies the :c:expr:`PyModuleDef_Slot*` return type,
+   * adds any special linkage declarations required by the platform, and
+   * for C++, declares the function as ``extern "C"``.
+
+For example, a module called ``spam`` would be defined like this::
+
+   PyABIInfo_VAR(abi_info);
+
+   static PyModuleDef_Slot spam_slots[] = {
+       {Py_mod_abi, &abi_info},
+       {Py_mod_name, "spam"},
+       {Py_mod_init, spam_init_function},
+       ...
+       {0, NULL},
+   };
+
+   PyMODEXPORT_FUNC
+   PyModExport_spam(void)
+   {
+       return spam_slots;
+   }
+
+The export hook is typically the only non-\ ``static``
+item defined in the module's C source.
+
+The hook should be kept short -- ideally, one line as above.
+If you do need to use Python C API in this function, it is recommended to call
+``PyABIInfo_Check(&abi_info, "modulename")`` first to raise an exception,
+rather than crash, in common cases of ABI mismatch.
+
+
+.. note::
+
+   It is possible to export multiple modules from a single shared library by
+   defining multiple export hooks.
+   However, importing  them requires a custom importer or suitably named
+   copies/links of the extension file, because Python's import machinery only
+   finds the function corresponding to the filename.
+   See the `Multiple modules in one library <https://peps.python.org/pep-0489/#multiple-modules-in-one-library>`__
+   section in :pep:`489` for details.
+
+
+.. _multi-phase-initialization:
+
+Multi-phase initialization
+..........................
+
+The process of creating an extension module follows several phases:
+
+- Python finds and calls the export hook to get information on how to
+  create the module.
 - Before any substantial code is executed, Python can determine which
   capabilities the module supports, and it can adjust the environment or
   refuse loading an incompatible extension.
-- By default, Python itself creates the module object -- that is, it does
-  the equivalent of :py:meth:`object.__new__` for classes.
-  It also sets initial attributes like :attr:`~module.__package__` and
-  :attr:`~module.__loader__`.
-- Afterwards, the module object is initialized using extension-specific
-  code -- the equivalent of :py:meth:`~object.__init__` on classes.
+  Slots like :c:data:`Py_mod_abi`, :c:data:`Py_mod_gil` and
+  :c:data:`Py_mod_multiple_interpreters` influence this step.
+- By default, Python itself then creates the module object -- that is, it does
+  the equivalent of calling :py:meth:`~object.__new__` when creating an object.
+  This step can be overridden using the :c:data:`Py_mod_create` slot.
+- Python sets initial module attributes like :attr:`~module.__package__` and
+  :attr:`~module.__loader__`, and inserts the module object into
+  :py:attr:`sys.modules`.
+- Afterwards, the module object is initialized in an extension-specific way
+  -- the equivalent of :py:meth:`~object.__init__` when creating an object,
+  or of executing top-level code in a Python-language module.
+  The behavior is specified using the :c:data:`Py_mod_exec` slot.
 
 This is called *multi-phase initialization* to distinguish it from the legacy
-(but still supported) *single-phase initialization* scheme,
-where the initialization function returns a fully constructed module.
-See the :ref:`single-phase-initialization section below <single-phase-initialization>`
-for details.
+(but still supported) :ref:`single-phase initialization <single-phase-initialization>`,
+where an initialization function returns a fully constructed module.
 
 .. versionchanged:: 3.5
 
@@ -53,7 +156,7 @@ Multiple module instances
 
 By default, extension modules are not singletons.
 For example, if the :py:attr:`sys.modules` entry is removed and the module
-is re-imported, a new module object is created, and typically populated with
+is re-imported, a new module object is created and, typically, populated with
 fresh method and type objects.
 The old module is subject to normal garbage collection.
 This mirrors the behavior of pure-Python modules.
@@ -83,36 +186,34 @@ A module may also be limited to the main interpreter using
 the :c:data:`Py_mod_multiple_interpreters` slot.
 
 
-.. _extension-export-hook:
+.. _extension-pyinit:
 
-Initialization function
-.......................
+``PyInit`` function
+...................
 
-The initialization function defined by an extension module has the
-following signature:
+.. deprecated:: next
+
+   This functionality is :term:`soft deprecated`.
+   It will not get new features, but there are no plans to remove it.
+
+Instead of :c:func:`PyModExport_modulename`, an extension module can define
+an older-style :dfn:`initialization function` with the signature:
 
 .. c:function:: PyObject* PyInit_modulename(void)
 
 Its name should be :samp:`PyInit_{<name>}`, with ``<name>`` replaced by the
 name of the module.
+For non-ASCII module names, use :samp:`PyInitU_{<name>}` instead, with
+``<name>`` encoded in the same way as for the
+:ref:`export hook <extension-export-hook>` (that is, using Punycode
+with underscores).
 
-For modules with ASCII-only names, the function must instead be named
-:samp:`PyInit_{<name>}`, with ``<name>`` replaced by the name of the module.
-When using :ref:`multi-phase-initialization`, non-ASCII module names
-are allowed. In this case, the initialization function name is
-:samp:`PyInitU_{<name>}`, with ``<name>`` encoded using Python's
-*punycode* encoding with hyphens replaced by underscores. In Python:
+If a module exports both :samp:`PyInit_{<name>}` and
+:samp:`PyModExport_{<name>}`, the :samp:`PyInit_{<name>}` function
+is ignored.
 
-.. code-block:: python
-
-    def initfunc_name(name):
-        try:
-            suffix = b'_' + name.encode('ascii')
-        except UnicodeEncodeError:
-            suffix = b'U_' + name.encode('punycode').replace(b'-', b'_')
-        return b'PyInit' + suffix
-
-It is recommended to define the initialization function using a helper macro:
+Like with :c:macro:`PyMODEXPORT_FUNC`, it is recommended to define the
+initialization function using a helper macro:
 
 .. c:macro:: PyMODINIT_FUNC
 
@@ -123,6 +224,34 @@ It is recommended to define the initialization function using a helper macro:
    * adds any special linkage declarations required by the platform, and
    * for C++, declares the function as ``extern "C"``.
 
+
+Normally, the initialization function (``PyInit_modulename``) returns
+a :c:type:`PyModuleDef` instance with non-``NULL``
+:c:member:`~PyModuleDef.m_slots`. This allows Python to use
+:ref:`multi-phase initialization <multi-phase-initialization>`.
+
+Before it is returned, the ``PyModuleDef`` instance must be initialized
+using the following function:
+
+.. c:function:: PyObject* PyModuleDef_Init(PyModuleDef *def)
+
+   Ensure a module definition is a properly initialized Python object that
+   correctly reports its type and a reference count.
+
+   Return *def* cast to ``PyObject*``, or ``NULL`` if an error occurred.
+
+   Calling this function is required before returning a :c:type:`PyModuleDef`
+   from a module initialization function.
+   It should not be used in other contexts.
+
+   Note that Python assumes that ``PyModuleDef`` structures are statically
+   allocated.
+   This function may return either a new reference or a borrowed one;
+   this reference must not be released.
+
+   .. versionadded:: 3.5
+
+
 For example, a module called ``spam`` would be defined like this::
 
    static struct PyModuleDef spam_module = {
@@ -137,59 +266,23 @@ For example, a module called ``spam`` would be defined like this::
        return PyModuleDef_Init(&spam_module);
    }
 
-It is possible to export multiple modules from a single shared library by
-defining multiple initialization functions. However, importing them requires
-using symbolic links or a custom importer, because by default only the
-function corresponding to the filename is found.
-See the `Multiple modules in one library <https://peps.python.org/pep-0489/#multiple-modules-in-one-library>`__
-section in :pep:`489` for details.
-
-The initialization function is typically the only non-\ ``static``
-item defined in the module's C source.
-
-
-.. _multi-phase-initialization:
-
-Multi-phase initialization
-..........................
-
-Normally, the :ref:`initialization function <extension-export-hook>`
-(``PyInit_modulename``) returns a :c:type:`PyModuleDef` instance with
-non-``NULL`` :c:member:`~PyModuleDef.m_slots`.
-Before it is returned, the ``PyModuleDef`` instance must be initialized
-using the following function:
-
-
-.. c:function:: PyObject* PyModuleDef_Init(PyModuleDef *def)
-
-   Ensure a module definition is a properly initialized Python object that
-   correctly reports its type and a reference count.
-
-   Return *def* cast to ``PyObject*``, or ``NULL`` if an error occurred.
-
-   Calling this function is required for :ref:`multi-phase-initialization`.
-   It should not be used in other contexts.
-
-   Note that Python assumes that ``PyModuleDef`` structures are statically
-   allocated.
-   This function may return either a new reference or a borrowed one;
-   this reference must not be released.
-
-   .. versionadded:: 3.5
-
 
 .. _single-phase-initialization:
 
 Legacy single-phase initialization
-..................................
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-.. attention::
-   Single-phase initialization is a legacy mechanism to initialize extension
+.. deprecated:: next
+
+   Single-phase initialization is :term:`soft deprecated`.
+   It is a legacy mechanism to initialize extension
    modules, with known drawbacks and design flaws. Extension module authors
    are encouraged to use multi-phase initialization instead.
 
-In single-phase initialization, the
-:ref:`initialization function <extension-export-hook>` (``PyInit_modulename``)
+   However, there are no plans to remove support for it.
+
+In single-phase initialization, the old-style
+:ref:`initializaton function <extension-pyinit>` (``PyInit_modulename``)
 should create, populate and return a module object.
 This is typically done using :c:func:`PyModule_Create` and functions like
 :c:func:`PyModule_AddObjectRef`.
@@ -242,6 +335,8 @@ in the following ways:
 * Single-phase modules support module lookup functions like
   :c:func:`PyState_FindModule`.
 
+* The module's :c:member:`PyModuleDef.m_slots` must be NULL.
+
 .. [#testsinglephase] ``_testsinglephase`` is an internal module used
    in CPython's self-test suite; your installation may or may not
    include it.
diff --git a/Doc/c-api/gen.rst b/Doc/c-api/gen.rst
index 0eb5922f6da..44f3bdbf959 100644
--- a/Doc/c-api/gen.rst
+++ b/Doc/c-api/gen.rst
@@ -44,3 +44,41 @@ than explicitly calling :c:func:`PyGen_New` or :c:func:`PyGen_NewWithQualName`.
    with ``__name__`` and ``__qualname__`` set to *name* and *qualname*.
    A reference to *frame* is stolen by this function.  The *frame* argument
    must not be ``NULL``.
+
+.. c:function:: PyCodeObject* PyGen_GetCode(PyGenObject *gen)
+
+   Return a new :term:`strong reference` to the code object wrapped by *gen*.
+   This function always succeeds.
+
+
+Asynchronous Generator Objects
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. seealso::
+   :pep:`525`
+
+.. c:var:: PyTypeObject PyAsyncGen_Type
+
+   The type object corresponding to asynchronous generator objects. This is
+   available as :class:`types.AsyncGeneratorType` in the Python layer.
+
+   .. versionadded:: 3.6
+
+.. c:function:: PyObject *PyAsyncGen_New(PyFrameObject *frame, PyObject *name, PyObject *qualname)
+
+   Create a new asynchronous generator wrapping *frame*, with ``__name__`` and
+   ``__qualname__`` set to *name* and *qualname*. *frame* is stolen by this
+   function and must not be ``NULL``.
+
+   On success, this function returns a :term:`strong reference` to the
+   new asynchronous generator. On failure, this function returns ``NULL``
+   with an exception set.
+
+   .. versionadded:: 3.6
+
+.. c:function:: int PyAsyncGen_CheckExact(PyObject *op)
+
+   Return true if *op* is an asynchronous generator object, false otherwise.
+   This function always succeeds.
+
+   .. versionadded:: 3.6
diff --git a/Doc/c-api/import.rst b/Doc/c-api/import.rst
index 971c6a69e5d..1786ac6b503 100644
--- a/Doc/c-api/import.rst
+++ b/Doc/c-api/import.rst
@@ -314,6 +314,13 @@ Importing Modules
    initialization.
 
 
+.. c:var:: struct _inittab *PyImport_Inittab
+
+   The table of built-in modules used by Python initialization. Do not use this directly;
+   use :c:func:`PyImport_AppendInittab` and :c:func:`PyImport_ExtendInittab`
+   instead.
+
+
 .. c:function:: PyObject* PyImport_ImportModuleAttr(PyObject *mod_name, PyObject *attr_name)
 
    Import the module *mod_name* and get its attribute *attr_name*.
@@ -346,7 +353,7 @@ Importing Modules
    the same as for :c:func:`PyImport_AppendInittab`.
 
    On success, create and return a module object.
-   This module will not be initialized; call :c:func:`!PyModule_Exec`
+   This module will not be initialized; call :c:func:`PyModule_Exec`
    to initialize it.
    (Custom importers should do this in their
    :py:meth:`~importlib.abc.Loader.exec_module` method.)
diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst
index 3cac2c8b213..7411644f9e1 100644
--- a/Doc/c-api/init.rst
+++ b/Doc/c-api/init.rst
@@ -1717,7 +1717,8 @@ function. You can create and destroy them using the following functions:
       Only C-level static and global variables are shared between these
       module objects.
 
-   *  For modules using single-phase initialization,
+   *  For modules using legacy
+      :ref:`single-phase initialization <single-phase-initialization>`,
       e.g. :c:func:`PyModule_Create`, the first time a particular extension
       is imported, it is initialized normally, and a (shallow) copy of its
       module's dictionary is squirreled away.
@@ -1891,6 +1892,25 @@ pointer and a void pointer argument.
       This function now always schedules *func* to be run in the main
       interpreter.
 
+
+.. c:function:: int Py_MakePendingCalls(void)
+
+   Execute all pending calls. This is usually executed automatically by the
+   interpreter.
+
+   This function returns ``0`` on success, and returns ``-1`` with an exception
+   set on failure.
+
+   If this is not called in the main thread of the main
+   interpreter, this function does nothing and returns ``0``.
+   The caller must hold an :term:`attached thread state`.
+
+   .. versionadded:: 3.1
+
+   .. versionchanged:: 3.12
+      This function only runs pending calls in the main interpreter.
+
+
 .. _profiling:
 
 Profiling and Tracing
@@ -2520,3 +2540,220 @@ code triggered by the finalizer blocks and calls :c:func:`PyEval_SaveThread`.
    In the default build, this macro expands to ``}``.
 
    .. versionadded:: 3.13
+
+
+Legacy Locking APIs
+-------------------
+
+These APIs are obsolete since Python 3.13 with the introduction of
+:c:type:`PyMutex`.
+
+.. versionchanged:: 3.15
+   These APIs are now a simple wrapper around ``PyMutex``.
+
+
+.. c:type:: PyThread_type_lock
+
+   A pointer to a mutual exclusion lock.
+
+
+.. c:type:: PyLockStatus
+
+   The result of acquiring a lock with a timeout.
+
+   .. c:namespace:: NULL
+
+   .. c:enumerator:: PY_LOCK_FAILURE
+
+      Failed to acquire the lock.
+
+   .. c:enumerator:: PY_LOCK_ACQUIRED
+
+      The lock was successfully acquired.
+
+   .. c:enumerator:: PY_LOCK_INTR
+
+      The lock was interrupted by a signal.
+
+
+.. c:function:: PyThread_type_lock PyThread_allocate_lock(void)
+
+   Allocate a new lock.
+
+   On success, this function returns a lock; on failure, this
+   function returns ``0`` without an exception set.
+
+   The caller does not need to hold an :term:`attached thread state`.
+
+   .. versionchanged:: 3.15
+      This function now always uses :c:type:`PyMutex`. In prior versions, this
+      would use a lock provided by the operating system.
+
+
+.. c:function:: void PyThread_free_lock(PyThread_type_lock lock)
+
+   Destroy *lock*. The lock should not be held by any thread when calling
+   this.
+
+   The caller does not need to hold an :term:`attached thread state`.
+
+
+.. c:function:: PyLockStatus PyThread_acquire_lock_timed(PyThread_type_lock lock, long long microseconds, int intr_flag)
+
+   Acquire *lock* with a timeout.
+
+   This will wait for *microseconds* microseconds to acquire the lock. If the
+   timeout expires, this function returns :c:enumerator:`PY_LOCK_FAILURE`.
+   If *microseconds* is ``-1``, this will wait indefinitely until the lock has
+   been released.
+
+   If *intr_flag* is ``1``, acquiring the lock may be interrupted by a signal,
+   in which case this function returns :c:enumerator:`PY_LOCK_INTR`. Upon
+   interruption, it's generally expected that the caller makes a call to
+   :c:func:`Py_MakePendingCalls` to propagate an exception to Python code.
+
+   If the lock is successfully acquired, this function returns
+   :c:enumerator:`PY_LOCK_ACQUIRED`.
+
+   The caller does not need to hold an :term:`attached thread state`.
+
+
+.. c:function:: int PyThread_acquire_lock(PyThread_type_lock lock, int waitflag)
+
+   Acquire *lock*.
+
+   If *waitflag* is ``1`` and another thread currently holds the lock, this
+   function will wait until the lock can be acquired and will always return
+   ``1``.
+
+   If *waitflag* is ``0`` and another thread holds the lock, this function will
+   not wait and instead return ``0``. If the lock is not held by any other
+   thread, then this function will acquire it and return ``1``.
+
+   Unlike :c:func:`PyThread_acquire_lock_timed`, acquiring the lock cannot be
+   interrupted by a signal.
+
+   The caller does not need to hold an :term:`attached thread state`.
+
+
+.. c:function:: int PyThread_release_lock(PyThread_type_lock lock)
+
+   Release *lock*. If *lock* is not held, then this function issues a
+   fatal error.
+
+   The caller does not need to hold an :term:`attached thread state`.
+
+
+Operating System Thread APIs
+============================
+
+.. c:macro:: PYTHREAD_INVALID_THREAD_ID
+
+   Sentinel value for an invalid thread ID.
+
+   This is currently equivalent to ``(unsigned long)-1``.
+
+
+.. c:function:: unsigned long PyThread_start_new_thread(void (*func)(void *), void *arg)
+
+   Start function *func* in a new thread with argument *arg*.
+   The resulting thread is not intended to be joined.
+
+   *func* must not be ``NULL``, but *arg* may be ``NULL``.
+
+   On success, this function returns the identifier of the new thread; on failure,
+   this returns :c:macro:`PYTHREAD_INVALID_THREAD_ID`.
+
+   The caller does not need to hold an :term:`attached thread state`.
+
+
+.. c:function:: unsigned long PyThread_get_thread_ident(void)
+
+   Return the identifier of the current thread, which will never be zero.
+
+   This function cannot fail, and the caller does not need to hold an
+   :term:`attached thread state`.
+
+   .. seealso::
+      :py:func:`threading.get_ident`
+
+
+.. c:function:: PyObject *PyThread_GetInfo(void)
+
+   Get general information about the current thread in the form of a
+   :ref:`struct sequence <struct-sequence-objects>` object. This information is
+   accessible as :py:attr:`sys.thread_info` in Python.
+
+   On success, this returns a new :term:`strong reference` to the thread
+   information; on failure, this returns ``NULL`` with an exception set.
+
+   The caller must hold an :term:`attached thread state`.
+
+
+.. c:macro:: PY_HAVE_THREAD_NATIVE_ID
+
+   This macro is defined when the system supports native thread IDs.
+
+
+.. c:function:: unsigned long PyThread_get_thread_native_id(void)
+
+   Get the native identifier of the current thread as it was assigned by the operating
+   system's kernel, which will never be less than zero.
+
+   This function is only available when :c:macro:`PY_HAVE_THREAD_NATIVE_ID` is
+   defined.
+
+   This function cannot fail, and the caller does not need to hold an
+   :term:`attached thread state`.
+
+   .. seealso::
+      :py:func:`threading.get_native_id`
+
+
+.. c:function:: void PyThread_exit_thread(void)
+
+   Terminate the current thread. This function is generally considered unsafe
+   and should be avoided. It is kept solely for backwards compatibility.
+
+   This function is only safe to call if all functions in the full call
+   stack are written to safely allow it.
+
+   .. warning::
+
+      If the current system uses POSIX threads (also known as "pthreads"),
+      this calls :manpage:`pthread_exit(3)`, which attempts to unwind the stack
+      and call C++ destructors on some libc implementations. However, if a
+      ``noexcept`` function is reached, it may terminate the process.
+      Other systems, such as macOS, do unwinding.
+
+      On Windows, this function calls ``_endthreadex()``, which kills the thread
+      without calling C++ destructors.
+
+      In any case, there is a risk of corruption on the thread's stack.
+
+   .. deprecated:: 3.14
+
+
+.. c:function:: void PyThread_init_thread(void)
+
+   Initialize ``PyThread*`` APIs. Python executes this function automatically,
+   so there's little need to call it from an extension module.
+
+
+.. c:function:: int PyThread_set_stacksize(size_t size)
+
+   Set the stack size of the current thread to *size* bytes.
+
+   This function returns ``0`` on success, ``-1`` if *size* is invalid, or
+   ``-2`` if the system does not support changing the stack size. This function
+   does not set exceptions.
+
+   The caller does not need to hold an :term:`attached thread state`.
+
+
+.. c:function:: size_t PyThread_get_stacksize(void)
+
+   Return the stack size of the current thread in bytes, or ``0`` if the system's
+   default stack size is in use.
+
+   The caller does not need to hold an :term:`attached thread state`.
diff --git a/Doc/c-api/intro.rst b/Doc/c-api/intro.rst
index bace21b7981..bb94bcb86a7 100644
--- a/Doc/c-api/intro.rst
+++ b/Doc/c-api/intro.rst
@@ -171,6 +171,17 @@ complete listing.
    Like ``getenv(s)``, but returns ``NULL`` if :option:`-E` was passed on the
    command line (see :c:member:`PyConfig.use_environment`).
 
+.. c:macro:: Py_LOCAL(type)
+
+   Declare a function returning the specified *type* using a fast-calling
+   qualifier for functions that are local to the current file.
+   Semantically, this is equivalent to ``static type``.
+
+.. c:macro:: Py_LOCAL_INLINE(type)
+
+   Equivalent to :c:macro:`Py_LOCAL` but additionally requests the function
+   be inlined.
+
 .. c:macro:: Py_MAX(x, y)
 
    Return the maximum value between ``x`` and ``y``.
@@ -311,6 +322,19 @@ complete listing.
       PyDoc_VAR(python_doc) = PyDoc_STR("A genus of constricting snakes in the Pythonidae family native "
                                         "to the tropics and subtropics of the Eastern Hemisphere.");
 
+.. c:macro:: Py_ARRAY_LENGTH(array)
+
+   Compute the length of a statically allocated C array at compile time.
+
+   The *array* argument must be a C array with a size known at compile time.
+   Passing an array with an unknown size, such as a heap-allocated array,
+   will result in a compilation error on some compilers, or otherwise produce
+   incorrect results.
+
+   This is roughly equivalent to::
+
+      sizeof(array) / sizeof((array)[0])
+
 
 .. _api-objects:
 
diff --git a/Doc/c-api/module.rst b/Doc/c-api/module.rst
index 1994a3c7d01..a12f6331c85 100644
--- a/Doc/c-api/module.rst
+++ b/Doc/c-api/module.rst
@@ -3,11 +3,10 @@
 .. _moduleobjects:
 
 Module Objects
---------------
+==============
 
 .. index:: pair: object; module
 
-
 .. c:var:: PyTypeObject PyModule_Type
 
    .. index:: single: ModuleType (in module types)
@@ -97,13 +96,6 @@ Module Objects
    Note that Python code may rename a module by setting its :py:attr:`~module.__name__`
    attribute.
 
-.. c:function:: void* PyModule_GetState(PyObject *module)
-
-   Return the "state" of the module, that is, a pointer to the block of memory
-   allocated at module creation time, or ``NULL``.  See
-   :c:member:`PyModuleDef.m_size`.
-
-
 .. c:function:: PyModuleDef* PyModule_GetDef(PyObject *module)
 
    Return a pointer to the :c:type:`PyModuleDef` struct from which the module was
@@ -141,210 +133,108 @@ Module Objects
       unencodable filenames, use :c:func:`PyModule_GetFilenameObject` instead.
 
 
-.. _pymoduledef:
+.. _pymoduledef_slot:
 
-Module definitions
-------------------
+Module definition
+-----------------
 
-The functions in the previous section work on any module object, including
-modules imported from Python code.
+Modules created using the C API are typically defined using an
+array of :dfn:`slots`.
+The slots provide a "description" of how a module should be created.
 
-Modules defined using the C API typically use a *module definition*,
-:c:type:`PyModuleDef` -- a statically allocated, constant “description" of
-how a module should be created.
+.. versionchanged:: next
 
-The definition is usually used to define an extension's “main” module object
-(see :ref:`extension-modules` for details).
-It is also used to
-:ref:`create extension modules dynamically <moduledef-dynamic>`.
+   Previously, a :c:type:`PyModuleDef` struct was necessary to define modules.
+   The older way of defining modules is still available: consult either the
+   :ref:`pymoduledef` section or earlier versions of this documentation
+   if you plan to support earlier Python versions.
 
-Unlike :c:func:`PyModule_New`, the definition allows management of
-*module state* -- a piece of memory that is allocated and cleared together
-with the module object.
-Unlike the module's Python attributes, Python code cannot replace or delete
-data stored in module state.
+The slots array is usually used to define an extension module's “main”
+module object (see :ref:`extension-modules` for details).
+It can also be used to
+:ref:`create extension modules dynamically <module-from-slots>`.
 
-.. c:type:: PyModuleDef
+Unless specified otherwise, the same slot ID may not be repeated
+in an array of slots.
 
-   The module definition struct, which holds all information needed to create
-   a module object.
-   This structure must be statically allocated (or be otherwise guaranteed
-   to be valid while any modules created from it exist).
-   Usually, there is only one variable of this type for each extension module.
-
-   .. c:member:: PyModuleDef_Base m_base
-
-      Always initialize this member to :c:macro:`PyModuleDef_HEAD_INIT`.
-
-   .. c:member:: const char *m_name
-
-      Name for the new module.
-
-   .. c:member:: const char *m_doc
-
-      Docstring for the module; usually a docstring variable created with
-      :c:macro:`PyDoc_STRVAR` is used.
-
-   .. c:member:: Py_ssize_t m_size
-
-      Module state may be kept in a per-module memory area that can be
-      retrieved with :c:func:`PyModule_GetState`, rather than in static globals.
-      This makes modules safe for use in multiple sub-interpreters.
-
-      This memory area is allocated based on *m_size* on module creation,
-      and freed when the module object is deallocated, after the
-      :c:member:`~PyModuleDef.m_free` function has been called, if present.
-
-      Setting it to a non-negative value means that the module can be
-      re-initialized and specifies the additional amount of memory it requires
-      for its state.
-
-      Setting ``m_size`` to ``-1`` means that the module does not support
-      sub-interpreters, because it has global state.
-      Negative ``m_size`` is only allowed when using
-      :ref:`legacy single-phase initialization <single-phase-initialization>`
-      or when :ref:`creating modules dynamically <moduledef-dynamic>`.
-
-      See :PEP:`3121` for more details.
-
-   .. c:member:: PyMethodDef* m_methods
-
-      A pointer to a table of module-level functions, described by
-      :c:type:`PyMethodDef` values.  Can be ``NULL`` if no functions are present.
-
-   .. c:member:: PyModuleDef_Slot* m_slots
-
-      An array of slot definitions for multi-phase initialization, terminated by
-      a ``{0, NULL}`` entry.
-      When using legacy single-phase initialization, *m_slots* must be ``NULL``.
-
-      .. versionchanged:: 3.5
-
-         Prior to version 3.5, this member was always set to ``NULL``,
-         and was defined as:
-
-           .. c:member:: inquiry m_reload
-
-   .. c:member:: traverseproc m_traverse
-
-      A traversal function to call during GC traversal of the module object, or
-      ``NULL`` if not needed.
-
-      This function is not called if the module state was requested but is not
-      allocated yet. This is the case immediately after the module is created
-      and before the module is executed (:c:data:`Py_mod_exec` function). More
-      precisely, this function is not called if :c:member:`~PyModuleDef.m_size` is greater
-      than 0 and the module state (as returned by :c:func:`PyModule_GetState`)
-      is ``NULL``.
-
-      .. versionchanged:: 3.9
-         No longer called before the module state is allocated.
-
-   .. c:member:: inquiry m_clear
-
-      A clear function to call during GC clearing of the module object, or
-      ``NULL`` if not needed.
-
-      This function is not called if the module state was requested but is not
-      allocated yet. This is the case immediately after the module is created
-      and before the module is executed (:c:data:`Py_mod_exec` function). More
-      precisely, this function is not called if :c:member:`~PyModuleDef.m_size` is greater
-      than 0 and the module state (as returned by :c:func:`PyModule_GetState`)
-      is ``NULL``.
-
-      Like :c:member:`PyTypeObject.tp_clear`, this function is not *always*
-      called before a module is deallocated. For example, when reference
-      counting is enough to determine that an object is no longer used,
-      the cyclic garbage collector is not involved and
-      :c:member:`~PyModuleDef.m_free` is called directly.
-
-      .. versionchanged:: 3.9
-         No longer called before the module state is allocated.
-
-   .. c:member:: freefunc m_free
-
-      A function to call during deallocation of the module object, or ``NULL``
-      if not needed.
-
-      This function is not called if the module state was requested but is not
-      allocated yet. This is the case immediately after the module is created
-      and before the module is executed (:c:data:`Py_mod_exec` function). More
-      precisely, this function is not called if :c:member:`~PyModuleDef.m_size` is greater
-      than 0 and the module state (as returned by :c:func:`PyModule_GetState`)
-      is ``NULL``.
-
-      .. versionchanged:: 3.9
-         No longer called before the module state is allocated.
-
-
-Module slots
-............
 
 .. c:type:: PyModuleDef_Slot
 
    .. c:member:: int slot
 
-      A slot ID, chosen from the available values explained below.
+      A slot ID, chosen from the available ``Py_mod_*`` values explained below.
+
+      An ID of 0 marks the end of a :c:type:`!PyModuleDef_Slot` array.
 
    .. c:member:: void* value
 
       Value of the slot, whose meaning depends on the slot ID.
 
+      The value may not be NULL.
+      To leave a slot out, omit the :c:type:`PyModuleDef_Slot` entry entirely.
+
    .. versionadded:: 3.5
 
-The available slot types are:
 
-.. c:macro:: Py_mod_create
+Metadata slots
+..............
 
-   Specifies a function that is called to create the module object itself.
-   The *value* pointer of this slot must point to a function of the signature:
+.. c:macro:: Py_mod_name
 
-   .. c:function:: PyObject* create_module(PyObject *spec, PyModuleDef *def)
-      :no-index-entry:
-      :no-contents-entry:
+   :c:type:`Slot ID <PyModuleDef_Slot.slot>` for the name of the new module,
+   as a NUL-terminated UTF8-encoded ``const char *``.
 
-   The function receives a :py:class:`~importlib.machinery.ModuleSpec`
-   instance, as defined in :PEP:`451`, and the module definition.
-   It should return a new module object, or set an error
-   and return ``NULL``.
+   Note that modules are typically created using a
+   :py:class:`~importlib.machinery.ModuleSpec`, and when they are, the
+   name from the spec will be used instead of :c:data:`!Py_mod_name`.
+   However, it is still recommended to include this slot for introspection
+   and debugging purposes.
 
-   This function should be kept minimal. In particular, it should not
-   call arbitrary Python code, as trying to import the same module again may
-   result in an infinite loop.
+   .. versionadded:: next
 
-   Multiple ``Py_mod_create`` slots may not be specified in one module
-   definition.
+      Use :c:member:`PyModuleDef.m_name` instead to support previous versions.
 
-   If ``Py_mod_create`` is not specified, the import machinery will create
-   a normal module object using :c:func:`PyModule_New`. The name is taken from
-   *spec*, not the definition, to allow extension modules to dynamically adjust
-   to their place in the module hierarchy and be imported under different
-   names through symlinks, all while sharing a single module definition.
+.. c:macro:: Py_mod_doc
 
-   There is no requirement for the returned object to be an instance of
-   :c:type:`PyModule_Type`. Any type can be used, as long as it supports
-   setting and getting import-related attributes.
-   However, only ``PyModule_Type`` instances may be returned if the
-   ``PyModuleDef`` has non-``NULL`` ``m_traverse``, ``m_clear``,
-   ``m_free``; non-zero ``m_size``; or slots other than ``Py_mod_create``.
+   :c:type:`Slot ID <PyModuleDef_Slot.slot>` for the docstring of the new
+   module, as a NUL-terminated UTF8-encoded ``const char *``.
 
-.. c:macro:: Py_mod_exec
+   Usually it is set to a variable created with :c:macro:`PyDoc_STRVAR`.
 
-   Specifies a function that is called to *execute* the module.
-   This is equivalent to executing the code of a Python module: typically,
-   this function adds classes and constants to the module.
-   The signature of the function is:
+   .. versionadded:: next
 
-   .. c:function:: int exec_module(PyObject* module)
-      :no-index-entry:
-      :no-contents-entry:
+      Use :c:member:`PyModuleDef.m_doc` instead to support previous versions.
 
-   If multiple ``Py_mod_exec`` slots are specified, they are processed in the
-   order they appear in the *m_slots* array.
+
+Feature slots
+.............
+
+.. c:macro:: Py_mod_abi
+
+   :c:type:`Slot ID <PyModuleDef_Slot.slot>` whose value points to
+   a :c:struct:`PyABIInfo` structure describing the ABI that
+   the extension is using.
+
+   A suitable :c:struct:`!PyABIInfo` variable can be defined using the
+   :c:macro:`PyABIInfo_VAR` macro, as in:
+
+   .. code-block:: c
+
+      PyABIInfo_VAR(abi_info);
+
+      static PyModuleDef_Slot mymodule_slots[] = {
+         {Py_mod_abi, &abi_info},
+         ...
+      };
+
+   When creating a module, Python checks the value of this slot
+   using :c:func:`PyABIInfo_Check`.
+
+   .. versionadded:: 3.15
 
 .. c:macro:: Py_mod_multiple_interpreters
 
-   Specifies one of the following values:
+   :c:type:`Slot ID <PyModuleDef_Slot.slot>` whose value is one of:
 
    .. c:namespace:: NULL
 
@@ -367,9 +257,6 @@ The available slot types are:
    This slot determines whether or not importing this module
    in a subinterpreter will fail.
 
-   Multiple ``Py_mod_multiple_interpreters`` slots may not be specified
-   in one module definition.
-
    If ``Py_mod_multiple_interpreters`` is not specified, the import
    machinery defaults to ``Py_MOD_MULTIPLE_INTERPRETERS_SUPPORTED``.
 
@@ -377,7 +264,7 @@ The available slot types are:
 
 .. c:macro:: Py_mod_gil
 
-   Specifies one of the following values:
+   :c:type:`Slot ID <PyModuleDef_Slot.slot>` whose value is one of:
 
    .. c:namespace:: NULL
 
@@ -395,45 +282,482 @@ The available slot types are:
    this module will cause the GIL to be automatically enabled. See
    :ref:`whatsnew313-free-threaded-cpython` for more detail.
 
-   Multiple ``Py_mod_gil`` slots may not be specified in one module definition.
-
    If ``Py_mod_gil`` is not specified, the import machinery defaults to
    ``Py_MOD_GIL_USED``.
 
    .. versionadded:: 3.13
 
-.. c:macro:: Py_mod_abi
 
-   A pointer to a :c:struct:`PyABIInfo` structure that describes the ABI that
-   the extension is using.
+Creation and initialization slots
+.................................
 
-   When the module is loaded, the :c:struct:`!PyABIInfo` in this slot is checked
-   using :c:func:`PyABIInfo_Check`.
+.. c:macro:: Py_mod_create
 
-   A suitable :c:struct:`!PyABIInfo` variable can be defined using the
-   :c:macro:`PyABIInfo_VAR` macro, as in:
+   :c:type:`Slot ID <PyModuleDef_Slot.slot>` for a function that creates
+   the module object itself.
+   The function must have the signature:
 
-   .. code-block:: c
+   .. c:function:: PyObject* create_module(PyObject *spec, PyModuleDef *def)
+      :no-index-entry:
+      :no-contents-entry:
 
-      PyABIInfo_VAR(abi_info);
+   The function will be called with:
 
-      static PyModuleDef_Slot mymodule_slots[] = {
-         {Py_mod_abi, &abi_info},
-         ...
-      };
+   - *spec*: a ``ModuleSpec``-like object, meaning that any attributes defined
+     for :py:class:`importlib.machinery.ModuleSpec` have matching semantics.
+     However, any of the attributes may be missing.
+   - *def*: ``NULL``, or the module definition if the module is created from one.
 
-   .. versionadded:: 3.15
+   The function should return a new module object, or set an error
+   and return ``NULL``.
+
+   This function should be kept minimal. In particular, it should not
+   call arbitrary Python code, as trying to import the same module again may
+   result in an infinite loop.
+
+   If ``Py_mod_create`` is not specified, the import machinery will create
+   a normal module object using :c:func:`PyModule_New`. The name is taken from
+   *spec*, not the definition, to allow extension modules to dynamically adjust
+   to their place in the module hierarchy and be imported under different
+   names through symlinks, all while sharing a single module definition.
+
+   There is no requirement for the returned object to be an instance of
+   :c:type:`PyModule_Type`.
+   However, some slots may only be used with
+   :c:type:`!PyModule_Type` instances; in particular:
+
+   - :c:macro:`Py_mod_exec`,
+   - :ref:`module state slots <ext-module-state-slots>` (``Py_mod_state_*``),
+   - :c:macro:`Py_mod_token`.
+
+   .. versionadded:: 3.5
+
+   .. versionchanged:: next
+
+      The *slots* argument may be a ``ModuleSpec``-like object, rather than
+      a true :py:class:`~importlib.machinery.ModuleSpec` instance.
+      Note that previous versions of CPython did not enforce this.
+
+      The *def* argument may now be ``NULL``, since modules are not necessarily
+      made from definitions.
+
+.. c:macro:: Py_mod_exec
+
+   :c:type:`Slot ID <PyModuleDef_Slot.slot>` for a function that will
+   :dfn:`execute`, or initialize, the module.
+   This function does the equivalent to executing the code of a Python module:
+   typically, it adds classes and constants to the module.
+   The signature of the function is:
+
+   .. c:function:: int exec_module(PyObject* module)
+      :no-index-entry:
+      :no-contents-entry:
+
+   See the :ref:`capi-module-support-functions` section for some useful
+   functions to call.
+
+   For backwards compatibility, the :c:type:`PyModuleDef.m_slots` array may
+   contain multiple :c:macro:`!Py_mod_exec` slots; these are processed in the
+   order they appear in the array.
+   Elsewhere (that is, in arguments to :c:func:`PyModule_FromSlotsAndSpec`
+   and in return values of :samp:`PyModExport_{<name>}`), repeating the slot
+   is not allowed.
+
+   .. versionadded:: 3.5
+
+   .. versionchanged:: next
+
+      Repeated ``Py_mod_exec`` slots are disallowed, except in
+      :c:type:`PyModuleDef.m_slots`.
+
+.. c:macro:: Py_mod_methods
+
+   :c:type:`Slot ID <PyModuleDef_Slot.slot>` for a table of module-level
+   functions, as an array of :c:type:`PyMethodDef` values suitable as the
+   *functions* argument to :c:func:`PyModule_AddFunctions`.
+
+   Like other slot IDs, a slots array may only contain one
+   :c:macro:`!Py_mod_methods` entry.
+   To add functions from multiple :c:type:`PyMethodDef` arrays, call
+   :c:func:`PyModule_AddFunctions` in the :c:macro:`Py_mod_exec` function.
+
+   The table must be statically allocated (or otherwise guaranteed to outlive
+   the module object).
+
+   .. versionadded:: next
+
+      Use :c:member:`PyModuleDef.m_methods` instead to support previous versions.
+
+.. _ext-module-state:
+
+Module state
+------------
+
+Extension modules can have *module state* -- a
+piece of memory that is allocated on module creation,
+and freed when the module object is deallocated.
+The module state is specified using :ref:`dedicated slots <ext-module-state-slots>`.
+
+A typical use of module state is storing an exception type -- or indeed *any*
+type object defined by the module --
+
+Unlike the module's Python attributes, Python code cannot replace or delete
+data stored in module state.
+
+Keeping per-module information in attributes and module state, rather than in
+static globals, makes module objects *isolated* and safer for use in
+multiple sub-interpreters.
+It also helps Python do an orderly clean-up when it shuts down.
+
+Extensions that keep references to Python objects as part of module state must
+implement :c:macro:`Py_mod_state_traverse` and :c:macro:`Py_mod_state_clear`
+functions to avoid reference leaks.
+
+To retrieve the state from a given module, use the following functions:
+
+.. c:function:: void* PyModule_GetState(PyObject *module)
+
+   Return the "state" of the module, that is, a pointer to the block of memory
+   allocated at module creation time, or ``NULL``.  See
+   :c:macro:`Py_mod_state_size`.
+
+   On error, return ``NULL`` with an exception set.
+   Use :c:func:`PyErr_Occurred` to tell this case apart from missing
+   module state.
 
 
-.. _moduledef-dynamic:
+.. c:function:: int PyModule_GetStateSize(PyObject *, Py_ssize_t *result)
+
+   Set *\*result* to the size of the module's state, as specified using
+   :c:macro:`Py_mod_state_size` (or :c:member:`PyModuleDef.m_size`),
+   and return 0.
+
+   On error, set *\*result* to -1, and return -1 with an exception set.
+
+   .. versionadded:: next
+
+
+
+.. _ext-module-state-slots:
+
+Slots for defining module state
+...............................
+
+The following :c:member:`PyModuleDef_Slot.slot` IDs are available for
+defining the module state.
+
+.. c:macro:: Py_mod_state_size
+
+   :c:type:`Slot ID <PyModuleDef_Slot.slot>` for the size of the module state,
+   in bytes.
+
+   Setting the value to a non-negative value means that the module can be
+   re-initialized and specifies the additional amount of memory it requires
+   for its state.
+
+   See :PEP:`3121` for more details.
+
+   Use :c:func:`PyModule_GetStateSize` to retrieve the size of a given module.
+
+   .. versionadded:: next
+
+      Use :c:member:`PyModuleDef.m_size` instead to support previous versions.
+
+.. c:macro:: Py_mod_state_traverse
+
+   :c:type:`Slot ID <PyModuleDef_Slot.slot>` for a traversal function to call
+   during GC traversal of the module object.
+
+   The signature of the function, and meanings of the arguments,
+   is similar as for :c:member:`PyTypeObject.tp_traverse`:
+
+   .. c:function:: int traverse_module_state(PyObject *module, visitproc visit, void *arg)
+      :no-index-entry:
+      :no-contents-entry:
+
+   This function is not called if the module state was requested but is not
+   allocated yet. This is the case immediately after the module is created
+   and before the module is executed (:c:data:`Py_mod_exec` function). More
+   precisely, this function is not called if the state size
+   (:c:data:`Py_mod_state_size`) is greater than 0 and the module state
+   (as returned by :c:func:`PyModule_GetState`) is ``NULL``.
+
+   .. versionadded:: next
+
+      Use :c:member:`PyModuleDef.m_size` instead to support previous versions.
+
+.. c:macro:: Py_mod_state_clear
+
+   :c:type:`Slot ID <PyModuleDef_Slot.slot>` for a clear function to call
+   during GC clearing of the module object.
+
+   The signature of the function is:
+
+   .. c:function:: int clear_module_state(PyObject* module)
+      :no-index-entry:
+      :no-contents-entry:
+
+   This function is not called if the module state was requested but is not
+   allocated yet. This is the case immediately after the module is created
+   and before the module is executed (:c:data:`Py_mod_exec` function). More
+   precisely, this function is not called if the state size
+   (:c:data:`Py_mod_state_size`) is greater than 0 and the module state
+   (as returned by :c:func:`PyModule_GetState`) is ``NULL``.
+
+   Like :c:member:`PyTypeObject.tp_clear`, this function is not *always*
+   called before a module is deallocated. For example, when reference
+   counting is enough to determine that an object is no longer used,
+   the cyclic garbage collector is not involved and
+   the :c:macro:`Py_mod_state_free` function is called directly.
+
+   .. versionadded:: next
+
+      Use :c:member:`PyModuleDef.m_clear` instead to support previous versions.
+
+.. c:macro:: Py_mod_state_free
+
+   :c:type:`Slot ID <PyModuleDef_Slot.slot>` for a function to call during
+   deallocation of the module object.
+
+   The signature of the function is:
+
+   .. c:function:: int free_module_state(PyObject* module)
+      :no-index-entry:
+      :no-contents-entry:
+
+   This function is not called if the module state was requested but is not
+   allocated yet. This is the case immediately after the module is created
+   and before the module is executed (:c:data:`Py_mod_exec` function). More
+   precisely, this function is not called if the state size
+   (:c:data:`Py_mod_state_size`) is greater than 0 and the module state
+   (as returned by :c:func:`PyModule_GetState`) is ``NULL``.
+
+   .. versionadded:: next
+
+      Use :c:member:`PyModuleDef.m_free` instead to support previous versions.
+
+
+.. _ext-module-token:
+
+Module token
+............
+
+Each module may have an associated *token*: a pointer-sized value intended to
+identify of the module state's memory layout.
+This means that if you have a module object, but you are not sure if it
+“belongs” to your extension, you can check using code like this:
+
+.. code-block:: c
+
+   PyObject *module = <the module in question>
+
+   void *module_token;
+   if (PyModule_GetToken(module, &module_token) < 0) {
+       return NULL;
+   }
+   if (module_token != your_token) {
+       PyErr_SetString(PyExc_ValueError, "unexpected module")
+       return NULL;
+   }
+
+   // This module's state has the expected memory layout; it's safe to cast
+   struct my_state state = (struct my_state*)PyModule_GetState(module)
+
+A module's token -- and the *your_token* value to use in the above code -- is:
+
+- For modules created with :c:type:`PyModuleDef`: the address of that
+  :c:type:`PyModuleDef`;
+- For modules defined with the :c:macro:`Py_mod_token` slot: the value
+  of that slot;
+- For modules created from an ``PyModExport_*``
+  :ref:`export hook <extension-export-hook>`: the slots array that the export
+  hook returned (unless overriden with :c:macro:`Py_mod_token`).
+
+.. c:macro:: Py_mod_token
+
+   :c:type:`Slot ID <PyModuleDef_Slot.slot>` for the module token.
+
+   If you use this slot to set the module token (rather than rely on the
+   default), you must ensure that:
+
+   * The pointer outlives the class, so it's not reused for something else
+     while the class exists.
+   * It "belongs" to the extension module where the class lives, so it will not
+     clash with other extensions.
+   * If the token points to a :c:type:`PyModuleDef` struct, the module should
+     behave as if it was created from that :c:type:`PyModuleDef`.
+     In particular, the module state must have matching layout and semantics.
+
+   Modules created from :c:type:`PyModuleDef` allways use the address of
+   the :c:type:`PyModuleDef` as the token.
+   This means that :c:macro:`!Py_mod_token` cannot be used in
+   :c:member:`PyModuleDef.m_slots`.
+
+   .. versionadded:: next
+
+.. c:function:: int PyModule_GetToken(PyObject *module, void** result)
+
+   Set *\*result* to the module's token and return 0.
+
+   On error, set *\*result* to NULL, and return -1 with an exception set.
+
+   .. versionadded:: next
+
+See also :c:func:`PyType_GetModuleByToken`.
+
+
+.. _module-from-slots:
 
 Creating extension modules dynamically
 --------------------------------------
 
-The following functions may be used to create a module outside of an
-extension's :ref:`initialization function <extension-export-hook>`.
-They are also used in
-:ref:`single-phase initialization <single-phase-initialization>`.
+The following functions may be used to create an extension module dynamically,
+rather than from an extension's :ref:`export hook <extension-export-hook>`.
+
+.. c:function:: PyObject *PyModule_FromSlotsAndSpec(const PyModuleDef_Slot *slots, PyObject *spec)
+
+   Create a new module object, given an array of :ref:`slots <pymoduledef_slot>`
+   and the :py:class:`~importlib.machinery.ModuleSpec` *spec*.
+
+   The *slots* argument must point to an array of :c:type:`PyModuleDef_Slot`
+   structures, terminated by an entry slot with slot ID of 0
+   (typically written as ``{0}`` or ``{0, NULL}`` in C).
+   The *slots* argument may not be ``NULL``.
+
+   The *spec* argument may be any ``ModuleSpec``-like object, as described
+   in :c:macro:`Py_mod_create` documentation.
+   Currently, the *spec* must have a ``name`` attribute.
+
+   On success, return the new module.
+   On error, return ``NULL`` with an exception set.
+
+   Note that this does not process the module's execution slot
+   (:c:data:`Py_mod_exec`).
+   Both :c:func:`!PyModule_FromSlotsAndSpec` and :c:func:`PyModule_Exec`
+   must be called to fully initialize a module.
+   (See also :ref:`multi-phase-initialization`.)
+
+   The *slots* array only needs to be valid for the duration of the
+   :c:func:`!PyModule_FromSlotsAndSpec` call.
+   In particular, it may be heap-allocated.
+
+   .. versionadded:: next
+
+.. c:function:: int PyModule_Exec(PyObject *module)
+
+   Execute the :c:data:`Py_mod_exec` slot(s) of the given *module*.
+
+   On success, return 0.
+   On error, return -1 with an exception set.
+
+   For clarity: If *module* has no slots, for example if it uses
+   :ref:`legacy single-phase initialization <single-phase-initialization>`,
+   this function does nothing and returns 0.
+
+   .. versionadded:: next
+
+
+
+.. _pymoduledef:
+
+Module definition struct
+------------------------
+
+Traditionally, extension modules were defined using a *module definition*
+as the “description" of how a module should be created.
+Rather than using an array of :ref:`slots <pymoduledef_slot>` directly,
+the definition has dedicated members for most common functionality,
+and allows additional slots as an extension mechanism.
+
+This way of defining modules is still available and there are no plans to
+remove it.
+
+.. c:type:: PyModuleDef
+
+   The module definition struct, which holds information needed to create
+   a module object.
+
+   This structure must be statically allocated (or be otherwise guaranteed
+   to be valid while any modules created from it exist).
+   Usually, there is only one variable of this type for each extension module
+   defined this way.
+
+   .. c:member:: PyModuleDef_Base m_base
+
+      Always initialize this member to :c:macro:`PyModuleDef_HEAD_INIT`:
+
+      .. c:namespace:: NULL
+
+      .. c:type:: PyModuleDef_Base
+
+         The type of :c:member:`!PyModuleDef.m_base`.
+
+      .. c:macro:: PyModuleDef_HEAD_INIT
+
+         The required initial value for :c:member:`!PyModuleDef.m_base`.
+
+   .. c:member:: const char *m_name
+
+      Corresponds to the :c:macro:`Py_mod_name` slot.
+
+   .. c:member:: const char *m_doc
+
+      These members correspond to the :c:macro:`Py_mod_doc` slot.
+      Setting this to NULL is equivalent to omitting the slot.
+
+   .. c:member:: Py_ssize_t m_size
+
+      Corresponds to the :c:macro:`Py_mod_state_size` slot.
+      Setting this to zero is equivalent to omitting the slot.
+
+      When using :ref:`legacy single-phase initialization <single-phase-initialization>`
+      or when creating modules dynamically using :c:func:`PyModule_Create`
+      or :c:func:`PyModule_Create2`, :c:member:`!m_size` may be set to -1.
+      This indicates that the module does not support sub-interpreters,
+      because it has global state.
+
+   .. c:member:: PyMethodDef *m_methods
+
+      Corresponds to the :c:macro:`Py_mod_methods` slot.
+      Setting this to NULL is equivalent to omitting the slot.
+
+   .. c:member:: PyModuleDef_Slot* m_slots
+
+      An array of additional slots, terminated by a ``{0, NULL}`` entry.
+
+      This array may not contain slots corresponding to :c:type:`PyModuleDef`
+      members.
+      For example, you cannot use :c:macro:`Py_mod_name` in :c:member:`!m_slots`;
+      the module name must be given as :c:member:`PyModuleDef.m_name`.
+
+      .. versionchanged:: 3.5
+
+         Prior to version 3.5, this member was always set to ``NULL``,
+         and was defined as:
+
+           .. c:member:: inquiry m_reload
+
+   .. c:member:: traverseproc m_traverse
+                 inquiry m_clear
+                 freefunc m_free
+
+      These members correspond to the :c:macro:`Py_mod_state_traverse`,
+      :c:macro:`Py_mod_state_clear`, and :c:macro:`Py_mod_state_free` slots,
+      respectively.
+
+      Setting these members to NULL is equivalent to omitting the
+      corresponding slots.
+
+      .. versionchanged:: 3.9
+
+         :c:member:`m_traverse`, :c:member:`m_clear` and :c:member:`m_free`
+         functions are longer called before the module state is allocated.
+
+
+.. _moduledef-dynamic:
+
+The following API can be used to create modules from a :c:type:`!PyModuleDef`
+struct:
 
 .. c:function:: PyObject* PyModule_Create(PyModuleDef *def)
 
@@ -510,12 +834,13 @@ They are also used in
    useful for versioning. This may change in the future.
 
 
+.. _capi-module-support-functions:
+
 Support functions
 -----------------
 
-The following functions are provided to help initialize a module
-state.
-They are intended for a module's execution slots (:c:data:`Py_mod_exec`),
+The following functions are provided to help initialize a module object.
+They are intended for a module's execution slot (:c:data:`Py_mod_exec`),
 the initialization function for legacy :ref:`single-phase initialization <single-phase-initialization>`,
 or code that creates modules dynamically.
 
diff --git a/Doc/c-api/object.rst b/Doc/c-api/object.rst
index 96353266ac7..76971c46c16 100644
--- a/Doc/c-api/object.rst
+++ b/Doc/c-api/object.rst
@@ -85,6 +85,35 @@ Object Protocol
    instead of the :func:`repr`.
 
 
+.. c:function:: void PyUnstable_Object_Dump(PyObject *op)
+
+   Dump an object *op* to ``stderr``. This should only be used for debugging.
+
+   The output is intended to try dumping objects even after memory corruption:
+
+   * Information is written starting with fields that are the least likely to
+     crash when accessed.
+   * This function can be called without an :term:`attached thread state`, but
+     it's not recommended to do so: it can cause deadlocks.
+   * An object that does not belong to the current interpreter may be dumped,
+     but this may also cause crashes or unintended behavior.
+   * Implement a heuristic to detect if the object memory has been freed. Don't
+     display the object contents in this case, only its memory address.
+   * The output format may change at any time.
+
+   Example of output:
+
+   .. code-block:: output
+
+       object address  : 0x7f80124702c0
+       object refcount : 2
+       object type     : 0x9902e0
+       object type name: str
+       object repr     : 'abcdef'
+
+   .. versionadded:: next
+
+
 .. c:function:: int PyObject_HasAttrWithError(PyObject *o, PyObject *attr_name)
 
    Returns ``1`` if *o* has the attribute *attr_name*, and ``0`` otherwise.
diff --git a/Doc/c-api/set.rst b/Doc/c-api/set.rst
index cba823aa027..09c0fb6b9c5 100644
--- a/Doc/c-api/set.rst
+++ b/Doc/c-api/set.rst
@@ -147,7 +147,7 @@ subtypes but not for instances of :class:`frozenset` or its subtypes.
 
    Return ``1`` if found and removed, ``0`` if not found (no action taken), and ``-1`` if an
    error is encountered.  Does not raise :exc:`KeyError` for missing keys.  Raise a
-   :exc:`TypeError` if the *key* is unhashable.  Unlike the Python :meth:`~frozenset.discard`
+   :exc:`TypeError` if the *key* is unhashable.  Unlike the Python :meth:`~set.discard`
    method, this function does not automatically convert unhashable sets into
    temporary frozensets. Raise :exc:`SystemError` if *set* is not an
    instance of :class:`set` or its subtype.
diff --git a/Doc/c-api/structures.rst b/Doc/c-api/structures.rst
index 414dfdc84e6..62f45def04f 100644
--- a/Doc/c-api/structures.rst
+++ b/Doc/c-api/structures.rst
@@ -280,6 +280,8 @@ Implementing functions and methods
 
       Name of the method.
 
+      A ``NULL`` *ml_name* marks the end of a :c:type:`!PyMethodDef` array.
+
    .. c:member:: PyCFunction ml_meth
 
       Pointer to the C implementation.
@@ -698,14 +700,12 @@ The following flags can be used with :c:member:`PyMemberDef.flags`:
    entry indicates an offset from the subclass-specific data, rather than
    from ``PyObject``.
 
-   Can only be used as part of :c:member:`Py_tp_members <PyTypeObject.tp_members>`
+   Can only be used as part of the :c:data:`Py_tp_members`
    :c:type:`slot <PyType_Slot>` when creating a class using negative
    :c:member:`~PyType_Spec.basicsize`.
    It is mandatory in that case.
-
-   This flag is only used in :c:type:`PyType_Slot`.
-   When setting :c:member:`~PyTypeObject.tp_members` during
-   class creation, Python clears it and sets
+   When setting :c:member:`~PyTypeObject.tp_members` from the slot during
+   class creation, Python clears the flag and sets
    :c:member:`PyMemberDef.offset` to the offset from the ``PyObject`` struct.
 
 .. index::
diff --git a/Doc/c-api/type.rst b/Doc/c-api/type.rst
index b608f815160..1f57cc04f5d 100644
--- a/Doc/c-api/type.rst
+++ b/Doc/c-api/type.rst
@@ -283,8 +283,8 @@ Type Objects
    ``Py_TYPE(self)`` may be a *subclass* of the intended class, and subclasses
    are not necessarily defined in the same module as their superclass.
    See :c:type:`PyCMethod` to get the class that defines the method.
-   See :c:func:`PyType_GetModuleByDef` for cases when :c:type:`!PyCMethod` cannot
-   be used.
+   See :c:func:`PyType_GetModuleByToken` for cases when :c:type:`!PyCMethod`
+   cannot be used.
 
    .. versionadded:: 3.9
 
@@ -304,10 +304,10 @@ Type Objects
    .. versionadded:: 3.9
 
 
-.. c:function:: PyObject* PyType_GetModuleByDef(PyTypeObject *type, struct PyModuleDef *def)
+.. c:function:: PyObject* PyType_GetModuleByToken(PyTypeObject *type, const void *mod_token)
 
-   Find the first superclass whose module was created from
-   the given :c:type:`PyModuleDef` *def*, and return that module.
+   Find the first superclass whose module has the given
+   :ref:`module token <ext-module-token>`, and return that module.
 
    If no module is found, raises a :py:class:`TypeError` and returns ``NULL``.
 
@@ -317,6 +317,23 @@ Type Objects
    and other places where a method's defining class cannot be passed using the
    :c:type:`PyCMethod` calling convention.
 
+   .. versionadded:: next
+
+
+.. c:function:: PyObject* PyType_GetModuleByDef(PyTypeObject *type, struct PyModuleDef *def)
+
+   Find the first superclass whose module was created from the given
+   :c:type:`PyModuleDef` *def*, or whose :ref:`module token <ext-module-token>`
+   is equal to *def*, and return that module.
+
+   Note that modules created from a :c:type:`PyModuleDef` always have their
+   token set to the :c:type:`PyModuleDef`'s address.
+   In other words, this function is equivalent to
+   :c:func:`PyType_GetModuleByToken`, except that it:
+
+   - returns a borrowed reference, and
+   - has a non-``void*`` argument type (which is a cosmetic difference in C).
+
    The returned reference is :term:`borrowed <borrowed reference>` from *type*,
    and will be valid as long as you hold a reference to *type*.
    Do not release it with :c:func:`Py_DECREF` or similar.
@@ -324,10 +341,10 @@ Type Objects
    .. versionadded:: 3.11
 
 
-.. c:function:: int PyType_GetBaseByToken(PyTypeObject *type, void *token, PyTypeObject **result)
+.. c:function:: int PyType_GetBaseByToken(PyTypeObject *type, void *tp_token, PyTypeObject **result)
 
    Find the first superclass in *type*'s :term:`method resolution order` whose
-   :c:macro:`Py_tp_token` token is equal to the given one.
+   :c:macro:`Py_tp_token` token is equal to *tp_token*.
 
    * If found, set *\*result* to a new :term:`strong reference`
      to it and return ``1``.
@@ -338,7 +355,7 @@ Type Objects
    The *result* argument may be ``NULL``, in which case *\*result* is not set.
    Use this if you need only the return value.
 
-   The *token* argument may not be ``NULL``.
+   The *tp_token* argument may not be ``NULL``.
 
    .. versionadded:: 3.14
 
@@ -383,8 +400,8 @@ The following functions and structs are used to create
 
    The *bases* argument can be used to specify base classes; it can either
    be only one class or a tuple of classes.
-   If *bases* is ``NULL``, the *Py_tp_bases* slot is used instead.
-   If that also is ``NULL``, the *Py_tp_base* slot is used instead.
+   If *bases* is ``NULL``, the :c:data:`Py_tp_bases` slot is used instead.
+   If that also is ``NULL``, the :c:data:`Py_tp_base` slot is used instead.
    If that also is ``NULL``, the new type derives from :class:`object`.
 
    The *module* argument can be used to record the module in which the new
@@ -590,9 +607,9 @@ The following functions and structs are used to create
       :c:type:`PyAsyncMethods` with an added ``Py_`` prefix.
       For example, use:
 
-      * ``Py_tp_dealloc`` to set :c:member:`PyTypeObject.tp_dealloc`
-      * ``Py_nb_add`` to set :c:member:`PyNumberMethods.nb_add`
-      * ``Py_sq_length`` to set :c:member:`PySequenceMethods.sq_length`
+      * :c:data:`Py_tp_dealloc` to set :c:member:`PyTypeObject.tp_dealloc`
+      * :c:data:`Py_nb_add` to set :c:member:`PyNumberMethods.nb_add`
+      * :c:data:`Py_sq_length` to set :c:member:`PySequenceMethods.sq_length`
 
       An additional slot is supported that does not correspond to a
       :c:type:`!PyTypeObject` struct field:
@@ -611,7 +628,7 @@ The following functions and structs are used to create
 
       If it is not possible to switch to a ``MANAGED`` flag (for example,
       for vectorcall or to support Python older than 3.12), specify the
-      offset in :c:member:`Py_tp_members <PyTypeObject.tp_members>`.
+      offset in :c:data:`Py_tp_members`.
       See :ref:`PyMemberDef documentation <pymemberdef-offsets>`
       for details.
 
@@ -638,8 +655,8 @@ The following functions and structs are used to create
          under the :ref:`limited API <limited-c-api>`.
 
       .. versionchanged:: 3.14
-         The field :c:member:`~PyTypeObject.tp_vectorcall` can now set
-         using ``Py_tp_vectorcall``.  See the field's documentation
+         The field :c:member:`~PyTypeObject.tp_vectorcall` can now be set
+         using :c:data:`Py_tp_vectorcall`.  See the field's documentation
          for details.
 
    .. c:member:: void *pfunc
@@ -649,7 +666,7 @@ The following functions and structs are used to create
 
       *pfunc* values may not be ``NULL``, except for the following slots:
 
-      * ``Py_tp_doc``
+      * :c:data:`Py_tp_doc`
       * :c:data:`Py_tp_token` (for clarity, prefer :c:data:`Py_TP_USE_SPEC`
         rather than ``NULL``)
 
diff --git a/Doc/c-api/typeobj.rst b/Doc/c-api/typeobj.rst
index 34d19acdf17..49fe02d919d 100644
--- a/Doc/c-api/typeobj.rst
+++ b/Doc/c-api/typeobj.rst
@@ -2273,7 +2273,7 @@ and :c:data:`PyType_Type` effectively act as defaults.)
    This field should be set to ``NULL`` and treated as read-only.
    Python will fill it in when the type is :c:func:`initialized <PyType_Ready>`.
 
-   For dynamically created classes, the ``Py_tp_bases``
+   For dynamically created classes, the :c:data:`Py_tp_bases`
    :c:type:`slot <PyType_Slot>` can be used instead of the *bases* argument
    of :c:func:`PyType_FromSpecWithBases`.
    The argument form is preferred.
diff --git a/Doc/c-api/veryhigh.rst b/Doc/c-api/veryhigh.rst
index 3b07b5fbed5..7eb9f0b54ab 100644
--- a/Doc/c-api/veryhigh.rst
+++ b/Doc/c-api/veryhigh.rst
@@ -396,3 +396,43 @@ Available start symbols
       * :pep:`484`
 
    .. versionadded:: 3.8
+
+
+Stack Effects
+^^^^^^^^^^^^^
+
+.. seealso::
+   :py:func:`dis.stack_effect`
+
+
+.. c:macro:: PY_INVALID_STACK_EFFECT
+
+   Sentinel value representing an invalid stack effect.
+
+   This is currently equivalent to ``INT_MAX``.
+
+   .. versionadded:: 3.8
+
+
+.. c:function:: int PyCompile_OpcodeStackEffect(int opcode, int oparg)
+
+   Compute the stack effect of *opcode* with argument *oparg*.
+
+   On success, this function returns the stack effect; on failure, this
+   returns :c:macro:`PY_INVALID_STACK_EFFECT`.
+
+   .. versionadded:: 3.4
+
+
+.. c:function:: int PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump)
+
+   Similar to :c:func:`PyCompile_OpcodeStackEffect`, but don't include the
+   stack effect of jumping if *jump* is zero.
+
+   If *jump* is ``0``, this will not include the stack effect of jumping, but
+   if *jump* is ``1`` or ``-1``, this will include it.
+
+   On success, this function returns the stack effect; on failure, this
+   returns :c:macro:`PY_INVALID_STACK_EFFECT`.
+
+   .. versionadded:: 3.8
diff --git a/Doc/conf.py b/Doc/conf.py
index 0f1412d1007..a4275835059 100644
--- a/Doc/conf.py
+++ b/Doc/conf.py
@@ -361,7 +361,7 @@
 
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title, author, document class [howto/manual]).
-_stdauthor = 'Guido van Rossum and the Python development team'
+_stdauthor = 'The Python development team'
 latex_documents = [
     ('c-api/index', 'c-api.tex', 'The Python/C API', _stdauthor, 'manual'),
     (
diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat
index 48f4f4919e8..64399f6ab1f 100644
--- a/Doc/data/refcounts.dat
+++ b/Doc/data/refcounts.dat
@@ -1472,6 +1472,9 @@ PyModule_Create2:PyObject*::+1:
 PyModule_Create2:PyModuleDef*:def::
 PyModule_Create2:int:module_api_version::
 
+PyModule_Exec:int:::
+PyModule_ExecDef:PyObject*:module:0:
+
 PyModule_ExecDef:int:::
 PyModule_ExecDef:PyObject*:module:0:
 PyModule_ExecDef:PyModuleDef*:def::
@@ -1485,6 +1488,10 @@ PyModule_FromDefAndSpec2:PyModuleDef*:def::
 PyModule_FromDefAndSpec2:PyObject*:spec:0:
 PyModule_FromDefAndSpec2:int:module_api_version::
 
+PyModule_FromSlotsAndSpec:PyObject*::+1:
+PyModule_FromSlotsAndSpec:const PyModuleDef_Slot *:slots::
+PyModule_FromSlotsAndSpec:PyObject*:spec:0:
+
 PyModule_GetDef:PyModuleDef*::0:
 PyModule_GetDef:PyObject*:module:0:
 
@@ -1506,6 +1513,14 @@ PyModule_GetNameObject:PyObject*:module:0:
 PyModule_GetState:void*:::
 PyModule_GetState:PyObject*:module:0:
 
+PyModule_GetStateSize:int:::
+PyModule_GetStateSize:PyObject*:module:0:
+PyModule_GetToken:Py_ssize_t**:result::
+
+PyModule_GetToken:int:::
+PyModule_GetToken:PyObject*:module:0:
+PyModule_GetToken:void**:result::
+
 PyModule_New:PyObject*::+1:
 PyModule_New:char*:name::
 
@@ -2412,6 +2427,10 @@ PyType_GetFlags:PyTypeObject*:type:0:
 PyType_GetName:PyObject*::+1:
 PyType_GetName:PyTypeObject*:type:0:
 
+PyType_GetModuleByToken:PyObject*::+1:
+PyType_GetModuleByToken:PyTypeObject*:type:0:
+PyType_GetModuleByToken:PyModuleDef*:def::
+
 PyType_GetModuleByDef:PyObject*::0:
 PyType_GetModuleByDef:PyTypeObject*:type:0:
 PyType_GetModuleByDef:PyModuleDef*:def::
diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat
index 5cbf3771950..9c5fdcefaf8 100644
--- a/Doc/data/stable_abi.dat
+++ b/Doc/data/stable_abi.dat
@@ -160,6 +160,7 @@ func,PyDict_Merge,3.2,,
 func,PyDict_MergeFromSeq2,3.2,,
 func,PyDict_New,3.2,,
 func,PyDict_Next,3.2,,
+func,PyDict_SetDefaultRef,3.15,,
 func,PyDict_SetItem,3.2,,
 func,PyDict_SetItemString,3.2,,
 func,PyDict_Size,3.2,,
@@ -463,6 +464,7 @@ data,PyMethodDescr_Type,3.2,,
 type,PyModuleDef,3.2,,full-abi
 type,PyModuleDef_Base,3.2,,full-abi
 func,PyModuleDef_Init,3.5,,
+type,PyModuleDef_Slot,3.5,,full-abi
 data,PyModuleDef_Type,3.5,,
 func,PyModule_Add,3.13,,
 func,PyModule_AddFunctions,3.7,,
@@ -914,6 +916,7 @@ func,Py_GetPlatform,3.2,,
 func,Py_GetRecursionLimit,3.2,,
 func,Py_GetVersion,3.2,,
 data,Py_HasFileSystemDefaultEncoding,3.2,,
+func,Py_IS_TYPE,3.15,,
 func,Py_IncRef,3.2,,
 func,Py_Initialize,3.2,,
 func,Py_InitializeEx,3.2,,
@@ -935,6 +938,8 @@ func,Py_REFCNT,3.14,,
 macro,Py_RELATIVE_OFFSET,3.12,,
 func,Py_ReprEnter,3.2,,
 func,Py_ReprLeave,3.2,,
+func,Py_SET_SIZE,3.15,,
+func,Py_SIZE,3.15,,
 func,Py_SetProgramName,3.2,,
 func,Py_SetPythonHome,3.2,,
 func,Py_SetRecursionLimit,3.2,,
@@ -979,8 +984,12 @@ macro,Py_bf_releasebuffer,3.11,,
 type,Py_buffer,3.11,,full-abi
 type,Py_intptr_t,3.2,,
 macro,Py_mod_abi,3.15,,
+macro,Py_mod_create,3.5,,
 macro,Py_mod_doc,3.15,,
+macro,Py_mod_exec,3.5,,
+macro,Py_mod_gil,3.13,,
 macro,Py_mod_methods,3.15,,
+macro,Py_mod_multiple_interpreters,3.12,,
 macro,Py_mod_name,3.15,,
 macro,Py_mod_state_clear,3.15,,
 macro,Py_mod_state_free,3.15,,
diff --git a/Doc/extending/extending.rst b/Doc/extending/extending.rst
index dee92312169..f9b65643dfe 100644
--- a/Doc/extending/extending.rst
+++ b/Doc/extending/extending.rst
@@ -426,7 +426,7 @@ A pointer to the module definition must be returned via :c:func:`PyModuleDef_Ini
 so that the import machinery can create the module and store it in ``sys.modules``.
 
 When embedding Python, the :c:func:`!PyInit_spam` function is not called
-automatically unless there's an entry in the :c:data:`!PyImport_Inittab` table.
+automatically unless there's an entry in the :c:data:`PyImport_Inittab` table.
 To add the module to the initialization table, use :c:func:`PyImport_AppendInittab`,
 optionally followed by an import of the module::
 
diff --git a/Doc/howto/a-conceptual-overview-of-asyncio.rst b/Doc/howto/a-conceptual-overview-of-asyncio.rst
index af1e39480cc..3adfedbf410 100644
--- a/Doc/howto/a-conceptual-overview-of-asyncio.rst
+++ b/Doc/howto/a-conceptual-overview-of-asyncio.rst
@@ -1,7 +1,7 @@
 .. _a-conceptual-overview-of-asyncio:
 
 ****************************************
-A Conceptual Overview of :mod:`!asyncio`
+A conceptual overview of :mod:`!asyncio`
 ****************************************
 
 This :ref:`HOWTO <how-tos>` article seeks to help you build a sturdy mental
@@ -37,15 +37,15 @@ In part 1, we'll cover the main, high-level building blocks of :mod:`!asyncio`:
 the event loop, coroutine functions, coroutine objects, tasks, and ``await``.
 
 ==========
-Event Loop
+Event loop
 ==========
 
 Everything in :mod:`!asyncio` happens relative to the event loop.
-It's the star of the show.
+It's the star of the show, but prefers to work behind the scenes, managing
+and coordinating resources.
 It's like an orchestra conductor.
-It's behind the scenes managing resources.
 Some power is explicitly granted to it, but a lot of its ability to get things
-done comes from the respect and cooperation of its worker bees.
+done comes from the respect and cooperation of its band members.
 
 In more technical terms, the event loop contains a collection of jobs to be run.
 Some jobs are added directly by you, and some indirectly by :mod:`!asyncio`.
@@ -59,7 +59,7 @@ This process repeats indefinitely, with the event loop cycling endlessly
 onwards.
 If there are no more jobs pending execution, the event loop is smart enough to
 rest and avoid needlessly wasting CPU cycles, and will come back when there's
-more work to be done.
+more work to be done, such as when I/O operations complete or timers expire.
 
 Effective execution relies on jobs sharing well and cooperating; a greedy job
 could hog control and leave the other jobs to starve, rendering the overall
@@ -170,14 +170,17 @@ Roughly speaking, :ref:`tasks <asyncio-task-obj>` are coroutines (not coroutine
 functions) tied to an event loop.
 A task also maintains a list of callback functions whose importance will become
 clear in a moment when we discuss :keyword:`await`.
-The recommended way to create tasks is via :func:`asyncio.create_task`.
 
 Creating a task automatically schedules it for execution (by adding a
 callback to run it in the event loop's to-do list, that is, collection of jobs).
+The recommended way to create tasks is via :func:`asyncio.create_task`.
 
-Since there's only one event loop (in each thread), :mod:`!asyncio` takes care of
-associating the task with the event loop for you. As such, there's no need
-to specify the event loop.
+:mod:`!asyncio` automatically associates tasks with the event loop for you.
+This automatic association was purposely designed into :mod:`!asyncio` for
+the sake of simplicity.
+Without it, you'd have to keep track of the event loop object and pass it to
+any coroutine function that wants to create tasks, adding redundant clutter
+to your code.
 
 ::
 
@@ -250,6 +253,10 @@ different ways::
 In a crucial way, the behavior of ``await`` depends on the type of object
 being awaited.
 
+^^^^^^^^^^^^^^
+Awaiting tasks
+^^^^^^^^^^^^^^
+
 Awaiting a task will cede control from the current task or coroutine to
 the event loop.
 In the process of relinquishing control, a few important things happen.
@@ -281,6 +288,10 @@ This is a basic, yet reliable mental model.
 In practice, the control handoffs are slightly more complex, but not by much.
 In part 2, we'll walk through the details that make this possible.
 
+^^^^^^^^^^^^^^^^^^^
+Awaiting coroutines
+^^^^^^^^^^^^^^^^^^^
+
 **Unlike tasks, awaiting a coroutine does not hand control back to the event
 loop!**
 Wrapping a coroutine in a task first, then awaiting that would cede
@@ -347,8 +358,10 @@ The design intentionally trades off some conceptual clarity around usage of
 ``await`` for improved performance.
 Each time a task is awaited, control needs to be passed all the way up the
 call stack to the event loop.
-That might sound minor, but in a large program with many ``await`` statements and a deep
-call stack, that overhead can add up to a meaningful performance drag.
+Then, the event loop needs to manage its internal state and work through
+its processing logic to resume the next job.
+That might sound minor, but in a large program with many ``await``\ s, that
+overhead can add up to a non-negligible performance drag.
 
 ------------------------------------------------
 A conceptual overview part 2: the nuts and bolts
@@ -364,7 +377,8 @@ and how to make your own asynchronous operators.
 The inner workings of coroutines
 ================================
 
-:mod:`!asyncio` leverages four components to pass around control.
+:mod:`!asyncio` leverages four components of Python to pass
+around control.
 
 :meth:`coroutine.send(arg) <generator.send>` is the method used to start or
 resume a coroutine.
@@ -448,9 +462,9 @@ That might sound odd to you. You might be thinking:
    That causes the error: ``SyntaxError: yield from not allowed in a coroutine.``
    This was intentionally designed for the sake of simplicity -- mandating only
    one way of using coroutines.
+   Despite that, ``yield from`` and ``await`` effectively do the same thing.
    Initially ``yield`` was barred as well, but was re-accepted to allow for
    async generators.
-   Despite that, ``yield from`` and ``await`` effectively do the same thing.
 
 =======
 Futures
diff --git a/Doc/howto/free-threading-extensions.rst b/Doc/howto/free-threading-extensions.rst
index 5647ab2d87c..83eba8cfea3 100644
--- a/Doc/howto/free-threading-extensions.rst
+++ b/Doc/howto/free-threading-extensions.rst
@@ -45,9 +45,12 @@ single-phase initialization.
 Multi-Phase Initialization
 ..........................
 
-Extensions that use multi-phase initialization (i.e.,
-:c:func:`PyModuleDef_Init`) should add a :c:data:`Py_mod_gil` slot in the
-module definition.  If your extension supports older versions of CPython,
+Extensions that use :ref:`multi-phase initialization <multi-phase-initialization>`
+(functions like :c:func:`PyModuleDef_Init`,
+:c:func:`PyModExport_* <PyModExport_modulename>` export hook,
+:c:func:`PyModule_FromSlotsAndSpec`) should add a
+:c:data:`Py_mod_gil` slot in the module definition.
+If your extension supports older versions of CPython,
 you should guard the slot with a :c:data:`PY_VERSION_HEX` check.
 
 ::
@@ -60,18 +63,12 @@ you should guard the slot with a :c:data:`PY_VERSION_HEX` check.
         {0, NULL}
     };
 
-    static struct PyModuleDef moduledef = {
-        PyModuleDef_HEAD_INIT,
-        .m_slots = module_slots,
-        ...
-    };
-
 
 Single-Phase Initialization
 ...........................
 
-Extensions that use single-phase initialization (i.e.,
-:c:func:`PyModule_Create`) should call :c:func:`PyUnstable_Module_SetGIL` to
+Extensions that use legacy :ref:`single-phase initialization <single-phase-initialization>`
+(that is, :c:func:`PyModule_Create`) should call :c:func:`PyUnstable_Module_SetGIL` to
 indicate that they support running with the GIL disabled.  The function is
 only defined in the free-threaded build, so you should guard the call with
 ``#ifdef Py_GIL_DISABLED`` to avoid compilation errors in the regular build.
diff --git a/Doc/howto/free-threading-python.rst b/Doc/howto/free-threading-python.rst
index e4df7a787a2..380c2be0495 100644
--- a/Doc/howto/free-threading-python.rst
+++ b/Doc/howto/free-threading-python.rst
@@ -11,9 +11,7 @@ available processing power by running threads in parallel on available CPU cores
 While not all software will benefit from this automatically, programs
 designed with threading in mind will run faster on multi-core hardware.
 
-The free-threaded mode is working and continues to be improved, but
-there is some additional overhead in single-threaded workloads compared
-to the regular build. Additionally, third-party packages, in particular ones
+Some third-party packages, in particular ones
 with an :term:`extension module`, may not be ready for use in a
 free-threaded build, and will re-enable the :term:`GIL`.
 
@@ -101,63 +99,42 @@ This section describes known limitations of the free-threaded CPython build.
 Immortalization
 ---------------
 
-The free-threaded build of the 3.13 release makes some objects :term:`immortal`.
+In the free-threaded build, some objects are :term:`immortal`.
 Immortal objects are not deallocated and have reference counts that are
 never modified.  This is done to avoid reference count contention that would
 prevent efficient multi-threaded scaling.
 
-An object will be made immortal when a new thread is started for the first time
-after the main thread is running.  The following objects are immortalized:
+As of the 3.14 release, immortalization is limited to:
 
-* :ref:`function <user-defined-funcs>` objects declared at the module level
-* :ref:`method <instance-methods>` descriptors
-* :ref:`code <code-objects>` objects
-* :term:`module` objects and their dictionaries
-* :ref:`classes <classes>` (type objects)
-
-Because immortal objects are never deallocated, applications that create many
-objects of these types may see increased memory usage under Python 3.13.  This
-has been addressed in the 3.14 release, where the aforementioned objects use
-deferred reference counting to avoid reference count contention.
-
-Additionally, numeric and string literals in the code as well as strings
-returned by :func:`sys.intern` are also immortalized in the 3.13 release.  This
-behavior is part of the 3.14 release as well and it is expected to remain in
-future free-threaded builds.
+* Code constants: numeric literals, string literals, and tuple literals
+  composed of other constants.
+* Strings interned by :func:`sys.intern`.
 
 
 Frame objects
 -------------
 
-It is not safe to access :ref:`frame <frame-objects>` objects from other
-threads and doing so may cause your program to crash .  This means that
-:func:`sys._current_frames` is generally not safe to use in a free-threaded
-build.  Functions like :func:`inspect.currentframe` and :func:`sys._getframe`
-are generally safe as long as the resulting frame object is not passed to
-another thread.
+It is not safe to access :attr:`frame.f_locals` from a :ref:`frame <frame-objects>`
+object if that frame is currently executing in another thread, and doing so may
+crash the interpreter.
+
 
 Iterators
 ---------
 
-Sharing the same iterator object between multiple threads is generally not
-safe and threads may see duplicate or missing elements when iterating or crash
-the interpreter.
+It is generally not thread-safe to access the same iterator object from
+multiple threads concurrently, and threads may see duplicate or missing
+elements.
 
 
 Single-threaded performance
 ---------------------------
 
 The free-threaded build has additional overhead when executing Python code
-compared to the default GIL-enabled build.  In 3.13, this overhead is about
-40% on the `pyperformance <https://pyperformance.readthedocs.io/>`_ suite.
-Programs that spend most of their time in C extensions or I/O will see
-less of an impact.  The largest impact is because the specializing adaptive
-interpreter (:pep:`659`) is disabled in the free-threaded build.
-
-The specializing adaptive interpreter has been re-enabled in a thread-safe way
-in the 3.14 release.  The performance penalty on single-threaded code in
-free-threaded mode is now roughly 5-10%, depending on the platform and C
-compiler used.
+compared to the default GIL-enabled build.  The amount of overhead depends
+on the workload and hardware.  On the pyperformance benchmark suite, the
+average overhead ranges from about 1% on macOS aarch64 to 8% on x86-64 Linux
+systems.
 
 
 Behavioral changes
diff --git a/Doc/howto/functional.rst b/Doc/howto/functional.rst
index 053558e3890..552514063c9 100644
--- a/Doc/howto/functional.rst
+++ b/Doc/howto/functional.rst
@@ -4,7 +4,7 @@
   Functional Programming HOWTO
 ********************************
 
-:Author: A. M. Kuchling
+:Author: \A. M. Kuchling
 :Release: 0.32
 
 In this document, we'll take a tour of Python's features suitable for
diff --git a/Doc/howto/isolating-extensions.rst b/Doc/howto/isolating-extensions.rst
index 7da6dc8a397..6092c75f48f 100644
--- a/Doc/howto/isolating-extensions.rst
+++ b/Doc/howto/isolating-extensions.rst
@@ -353,7 +353,7 @@ garbage collection protocol.
 That is, heap types should:
 
 - Have the :c:macro:`Py_TPFLAGS_HAVE_GC` flag.
-- Define a traverse function using ``Py_tp_traverse``, which
+- Define a traverse function using :c:data:`Py_tp_traverse`, which
   visits the type (e.g. using ``Py_VISIT(Py_TYPE(self))``).
 
 Please refer to the documentation of
diff --git a/Doc/howto/unicode.rst b/Doc/howto/unicode.rst
index 254fe729355..243cc27bac7 100644
--- a/Doc/howto/unicode.rst
+++ b/Doc/howto/unicode.rst
@@ -352,6 +352,8 @@ If you don't include such a comment, the default encoding used will be UTF-8 as
 already mentioned.  See also :pep:`263` for more information.
 
 
+.. _unicode-properties:
+
 Unicode Properties
 ------------------
 
diff --git a/Doc/howto/urllib2.rst b/Doc/howto/urllib2.rst
index d79d1abe8d0..4e77d2cb407 100644
--- a/Doc/howto/urllib2.rst
+++ b/Doc/howto/urllib2.rst
@@ -15,7 +15,7 @@ Introduction
     You may also find useful the following article on fetching web resources
     with Python:
 
-    * `Basic Authentication <https://web.archive.org/web/20201215133350/http://www.voidspace.org.uk/python/articles/authentication.shtml>`_
+    * `Basic Authentication <https://web.archive.org/web/20201215133350/http://www.voidspace.org.uk/python/articles/authentication.shtml>`__
 
         A tutorial on *Basic Authentication*, with examples in Python.
 
diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst
index 5a8f0bde2e3..71c4f094886 100644
--- a/Doc/library/argparse.rst
+++ b/Doc/library/argparse.rst
@@ -767,9 +767,9 @@ how the command-line arguments should be handled. The supplied actions are:
     Namespace(foo=42)
 
 * ``'store_true'`` and ``'store_false'`` - These are special cases of
-  ``'store_const'`` used for storing the values ``True`` and ``False``
-  respectively.  In addition, they create default values of ``False`` and
-  ``True`` respectively::
+  ``'store_const'`` that respectively store the values ``True`` and ``False``
+  with default values of ``False`` and
+  ``True``::
 
     >>> parser = argparse.ArgumentParser()
     >>> parser.add_argument('--foo', action='store_true')
@@ -789,8 +789,8 @@ how the command-line arguments should be handled. The supplied actions are:
     >>> parser.parse_args('--foo 1 --foo 2'.split())
     Namespace(foo=['0', '1', '2'])
 
-* ``'append_const'`` - This stores a list, and appends the value specified by
-  the const_ keyword argument to the list; note that the const_ keyword
+* ``'append_const'`` - This appends the value specified by
+  the const_ keyword argument to a list; note that the const_ keyword
   argument defaults to ``None``. The ``'append_const'`` action is typically
   useful when multiple arguments need to store constants to the same list. For
   example::
@@ -801,8 +801,8 @@ how the command-line arguments should be handled. The supplied actions are:
     >>> parser.parse_args('--str --int'.split())
     Namespace(types=[<class 'str'>, <class 'int'>])
 
-* ``'extend'`` - This stores a list and appends each item from the multi-value
-  argument list to it.
+* ``'extend'`` - This appends each item from a multi-value
+  argument to a list.
   The ``'extend'`` action is typically used with the nargs_ keyword argument
   value ``'+'`` or ``'*'``.
   Note that when nargs_ is ``None`` (the default) or ``'?'``, each
@@ -816,7 +816,7 @@ how the command-line arguments should be handled. The supplied actions are:
 
   .. versionadded:: 3.8
 
-* ``'count'`` - This counts the number of times a keyword argument occurs. For
+* ``'count'`` - This counts the number of times an argument occurs. For
   example, this is useful for increasing verbosity levels::
 
     >>> parser = argparse.ArgumentParser()
@@ -1322,8 +1322,12 @@ attribute is determined by the ``dest`` keyword argument of
 
 For optional argument actions, the value of ``dest`` is normally inferred from
 the option strings.  :class:`ArgumentParser` generates the value of ``dest`` by
-taking the first long option string and stripping away the initial ``--``
-string.  If no long option strings were supplied, ``dest`` will be derived from
+taking the first double-dash long option string and stripping away the initial
+``-`` characters.
+If no double-dash long option strings were supplied, ``dest`` will be derived
+from the first single-dash long option string by stripping the initial ``-``
+character.
+If no long option strings were supplied, ``dest`` will be derived from
 the first short option string by stripping the initial ``-`` character.  Any
 internal ``-`` characters will be converted to ``_`` characters to make sure
 the string is a valid attribute name.  The examples below illustrate this
@@ -1331,11 +1335,12 @@ behavior::
 
    >>> parser = argparse.ArgumentParser()
    >>> parser.add_argument('-f', '--foo-bar', '--foo')
+   >>> parser.add_argument('-q', '-quz')
    >>> parser.add_argument('-x', '-y')
-   >>> parser.parse_args('-f 1 -x 2'.split())
-   Namespace(foo_bar='1', x='2')
-   >>> parser.parse_args('--foo 1 -y 2'.split())
-   Namespace(foo_bar='1', x='2')
+   >>> parser.parse_args('-f 1 -q 2 -x 3'.split())
+   Namespace(foo_bar='1', quz='2', x='3')
+   >>> parser.parse_args('--foo 1 -quz 2 -y 3'.split())
+   Namespace(foo_bar='1', quz='2', x='2')
 
 ``dest`` allows a custom attribute name to be provided::
 
@@ -1344,6 +1349,9 @@ behavior::
    >>> parser.parse_args('--foo XXX'.split())
    Namespace(bar='XXX')
 
+.. versionchanged:: next
+   Single-dash long option now takes precedence over short options.
+
 
 .. _deprecated:
 
@@ -1437,8 +1445,18 @@ this API may be passed as the ``action`` parameter to
        >>> parser.parse_args(['--no-foo'])
        Namespace(foo=False)
 
+   Single-dash long options are also supported.
+   For example, negative option ``-nofoo`` is automatically added for
+   positive option ``-foo``.
+   But no additional options are added for short options such as ``-f``.
+
    .. versionadded:: 3.9
 
+   .. versionchanged:: next
+      Added support for single-dash options.
+
+      Added support for alternate prefix_chars_.
+
 
 The parse_args() method
 -----------------------
diff --git a/Doc/library/bdb.rst b/Doc/library/bdb.rst
index c7a3e0c596b..a3c6da7a6d6 100644
--- a/Doc/library/bdb.rst
+++ b/Doc/library/bdb.rst
@@ -236,7 +236,7 @@ The :mod:`bdb` module also defines two classes:
    Normally derived classes don't override the following methods, but they may
    if they want to redefine the definition of stopping and breakpoints.
 
-   .. method:: is_skipped_line(module_name)
+   .. method:: is_skipped_module(module_name)
 
       Return ``True`` if *module_name* matches any skip pattern.
 
diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst
index 9a8108d882e..4e0db485e06 100644
--- a/Doc/library/collections.rst
+++ b/Doc/library/collections.rst
@@ -1209,7 +1209,7 @@ If a new entry overwrites an existing entry, the
 original insertion position is changed and moved to the end::
 
     class LastUpdatedOrderedDict(OrderedDict):
-        'Store items in the order the keys were last added'
+        'Store items in the order that the keys were last updated.'
 
         def __setitem__(self, key, value):
             super().__setitem__(key, value)
diff --git a/Doc/library/decimal.rst b/Doc/library/decimal.rst
index 05937775699..621aa23ecc8 100644
--- a/Doc/library/decimal.rst
+++ b/Doc/library/decimal.rst
@@ -2109,20 +2109,20 @@ to work with the :class:`Decimal` class::
 Decimal FAQ
 -----------
 
-Q. It is cumbersome to type ``decimal.Decimal('1234.5')``.  Is there a way to
+Q: It is cumbersome to type ``decimal.Decimal('1234.5')``.  Is there a way to
 minimize typing when using the interactive interpreter?
 
-A. Some users abbreviate the constructor to just a single letter:
+A: Some users abbreviate the constructor to just a single letter:
 
    >>> D = decimal.Decimal
    >>> D('1.23') + D('3.45')
    Decimal('4.68')
 
-Q. In a fixed-point application with two decimal places, some inputs have many
+Q: In a fixed-point application with two decimal places, some inputs have many
 places and need to be rounded.  Others are not supposed to have excess digits
 and need to be validated.  What methods should be used?
 
-A. The :meth:`~Decimal.quantize` method rounds to a fixed number of decimal places. If
+A: The :meth:`~Decimal.quantize` method rounds to a fixed number of decimal places. If
 the :const:`Inexact` trap is set, it is also useful for validation:
 
    >>> TWOPLACES = Decimal(10) ** -2       # same as Decimal('0.01')
@@ -2140,10 +2140,10 @@ the :const:`Inexact` trap is set, it is also useful for validation:
       ...
    Inexact: None
 
-Q. Once I have valid two place inputs, how do I maintain that invariant
+Q: Once I have valid two place inputs, how do I maintain that invariant
 throughout an application?
 
-A. Some operations like addition, subtraction, and multiplication by an integer
+A: Some operations like addition, subtraction, and multiplication by an integer
 will automatically preserve fixed point.  Others operations, like division and
 non-integer multiplication, will change the number of decimal places and need to
 be followed-up with a :meth:`~Decimal.quantize` step:
@@ -2175,21 +2175,21 @@ to handle the :meth:`~Decimal.quantize` step:
     >>> div(b, a)
     Decimal('0.03')
 
-Q. There are many ways to express the same value.  The numbers ``200``,
+Q: There are many ways to express the same value.  The numbers ``200``,
 ``200.000``, ``2E2``, and ``.02E+4`` all have the same value at
 various precisions. Is there a way to transform them to a single recognizable
 canonical value?
 
-A. The :meth:`~Decimal.normalize` method maps all equivalent values to a single
+A: The :meth:`~Decimal.normalize` method maps all equivalent values to a single
 representative:
 
    >>> values = map(Decimal, '200 200.000 2E2 .02E+4'.split())
    >>> [v.normalize() for v in values]
    [Decimal('2E+2'), Decimal('2E+2'), Decimal('2E+2'), Decimal('2E+2')]
 
-Q. When does rounding occur in a computation?
+Q: When does rounding occur in a computation?
 
-A. It occurs *after* the computation.  The philosophy of the decimal
+A: It occurs *after* the computation.  The philosophy of the decimal
 specification is that numbers are considered exact and are created
 independent of the current context.  They can even have greater
 precision than current context.  Computations process with those
@@ -2207,10 +2207,10 @@ applied to the *result* of the computation::
    >>> pi + 0 - Decimal('0.00005').   # Intermediate values are rounded
    Decimal('3.1416')
 
-Q. Some decimal values always print with exponential notation.  Is there a way
+Q: Some decimal values always print with exponential notation.  Is there a way
 to get a non-exponential representation?
 
-A. For some values, exponential notation is the only way to express the number
+A: For some values, exponential notation is the only way to express the number
 of significant places in the coefficient.  For example, expressing
 ``5.0E+3`` as ``5000`` keeps the value constant but cannot show the
 original's two-place significance.
@@ -2225,9 +2225,9 @@ value unchanged:
     >>> remove_exponent(Decimal('5E+3'))
     Decimal('5000')
 
-Q. Is there a way to convert a regular float to a :class:`Decimal`?
+Q: Is there a way to convert a regular float to a :class:`Decimal`?
 
-A. Yes, any binary floating-point number can be exactly expressed as a
+A: Yes, any binary floating-point number can be exactly expressed as a
 Decimal though an exact conversion may take more precision than intuition would
 suggest:
 
@@ -2236,19 +2236,19 @@ suggest:
     >>> Decimal(math.pi)
     Decimal('3.141592653589793115997963468544185161590576171875')
 
-Q. Within a complex calculation, how can I make sure that I haven't gotten a
+Q: Within a complex calculation, how can I make sure that I haven't gotten a
 spurious result because of insufficient precision or rounding anomalies.
 
-A. The decimal module makes it easy to test results.  A best practice is to
+A: The decimal module makes it easy to test results.  A best practice is to
 re-run calculations using greater precision and with various rounding modes.
 Widely differing results indicate insufficient precision, rounding mode issues,
 ill-conditioned inputs, or a numerically unstable algorithm.
 
-Q. I noticed that context precision is applied to the results of operations but
+Q: I noticed that context precision is applied to the results of operations but
 not to the inputs.  Is there anything to watch out for when mixing values of
 different precisions?
 
-A. Yes.  The principle is that all values are considered to be exact and so is
+A: Yes.  The principle is that all values are considered to be exact and so is
 the arithmetic on those values.  Only the results are rounded.  The advantage
 for inputs is that "what you type is what you get".  A disadvantage is that the
 results can look odd if you forget that the inputs haven't been rounded:
@@ -2276,9 +2276,9 @@ Alternatively, inputs can be rounded upon creation using the
    >>> Context(prec=5, rounding=ROUND_DOWN).create_decimal('1.2345678')
    Decimal('1.2345')
 
-Q. Is the CPython implementation fast for large numbers?
+Q: Is the CPython implementation fast for large numbers?
 
-A. Yes.  In the CPython and PyPy3 implementations, the C/CFFI versions of
+A: Yes.  In the CPython and PyPy3 implementations, the C/CFFI versions of
 the decimal module integrate the high speed `libmpdec
 <https://www.bytereef.org/mpdecimal/doc/libmpdec/index.html>`_ library for
 arbitrary precision correctly rounded decimal floating-point arithmetic [#]_.
diff --git a/Doc/library/exceptions.rst b/Doc/library/exceptions.rst
index 16d42c010f6..b5e3a84b455 100644
--- a/Doc/library/exceptions.rst
+++ b/Doc/library/exceptions.rst
@@ -978,6 +978,12 @@ their subgroups based on the types of the contained exceptions.
    raises a :exc:`TypeError` if any contained exception is not an
    :exc:`Exception` subclass.
 
+   .. impl-detail::
+
+      The ``excs`` parameter may be any sequence, but lists and tuples are
+      specifically processed more efficiently here. For optimal performance,
+      pass a tuple as ``excs``.
+
    .. attribute:: message
 
        The ``msg`` argument to the constructor. This is a read-only attribute.
diff --git a/Doc/library/functools.rst b/Doc/library/functools.rst
index 97136b23408..221c0712c7c 100644
--- a/Doc/library/functools.rst
+++ b/Doc/library/functools.rst
@@ -57,6 +57,10 @@ The :mod:`functools` module defines the following functions:
    another thread makes an additional call before the initial call has been
    completed and cached.
 
+   Call-once behavior is not guaranteed because locks are not held during the
+   function call. Potentially another call with the same arguments could
+   occur while the first call is still running.
+
    .. versionadded:: 3.9
 
 
diff --git a/Doc/library/gc.rst b/Doc/library/gc.rst
index 2ef5c4b35a2..79a8c38626f 100644
--- a/Doc/library/gc.rst
+++ b/Doc/library/gc.rst
@@ -108,10 +108,19 @@ The :mod:`gc` module provides the following functions:
 
    * ``uncollectable`` is the total number of objects which were found
      to be uncollectable (and were therefore moved to the :data:`garbage`
-     list) inside this generation.
+     list) inside this generation;
+
+   * ``candidates`` is the total number of objects in this generation which were
+     considered for collection and traversed;
+
+   * ``duration`` is the total time in seconds spent in collections for this
+     generation.
 
    .. versionadded:: 3.4
 
+   .. versionchanged:: next
+      Add ``duration`` and ``candidates``.
+
 
 .. function:: set_threshold(threshold0, [threshold1, [threshold2]])
 
@@ -313,6 +322,12 @@ values but should not rebind them):
       "uncollectable": When *phase* is "stop", the number of objects
       that could not be collected and were put in :data:`garbage`.
 
+      "candidates": When *phase* is "stop", the total number of objects in this
+      generation which were considered for collection and traversed.
+
+      "duration": When *phase* is "stop", the time in seconds spent in the
+      collection.
+
    Applications can add their own callbacks to this list.  The primary
    use cases are:
 
@@ -325,6 +340,9 @@ values but should not rebind them):
 
    .. versionadded:: 3.3
 
+   .. versionchanged:: next
+      Add "duration" and "candidates".
+
 
 The following constants are provided for use with :func:`set_debug`:
 
diff --git a/Doc/library/math.rst b/Doc/library/math.rst
index 186f99e9591..d2ff74822f9 100644
--- a/Doc/library/math.rst
+++ b/Doc/library/math.rst
@@ -506,7 +506,7 @@ Summation and product functions
 
    Roughly equivalent to::
 
-       sqrt(sum((px - qx) ** 2.0 for px, qx in zip(p, q)))
+       sqrt(sum((px - qx) ** 2.0 for px, qx in zip(p, q, strict=True)))
 
    .. versionadded:: 3.8
 
diff --git a/Doc/library/multiprocessing.rst b/Doc/library/multiprocessing.rst
index 714207cb0ae..92605c57527 100644
--- a/Doc/library/multiprocessing.rst
+++ b/Doc/library/multiprocessing.rst
@@ -832,8 +832,8 @@ raising an exception.
 
 One difference from other Python queue implementations, is that :mod:`multiprocessing`
 queues serializes all objects that are put into them using :mod:`pickle`.
-The object return by the get method is a re-created object that does not share memory
-with the original object.
+The object returned by the get method is a re-created object that does not share
+memory with the original object.
 
 Note that one can also create a shared queue by using a manager object -- see
 :ref:`multiprocessing-managers`.
@@ -890,7 +890,7 @@ For an example of the usage of queues for interprocess communication see
 :ref:`multiprocessing-examples`.
 
 
-.. function:: Pipe([duplex])
+.. function:: Pipe(duplex=True)
 
    Returns a pair ``(conn1, conn2)`` of
    :class:`~multiprocessing.connection.Connection` objects representing the
@@ -1577,12 +1577,22 @@ object -- see :ref:`multiprocessing-managers`.
    A solitary difference from its close analog exists: its ``acquire`` method's
    first argument is named *block*, as is consistent with :meth:`Lock.acquire`.
 
+
+   .. method:: get_value()
+
+      Return the current value of semaphore.
+
+      Note that this may raise :exc:`NotImplementedError` on platforms like
+      macOS where ``sem_getvalue()`` is not implemented.
+
+
    .. method:: locked()
 
       Return a boolean indicating whether this object is locked right now.
 
       .. versionadded:: 3.14
 
+
 .. note::
 
    On macOS, ``sem_timedwait`` is unsupported, so calling ``acquire()`` with
diff --git a/Doc/library/profile.rst b/Doc/library/profile.rst
index 5bf36b13c6d..03ad50b2c5e 100644
--- a/Doc/library/profile.rst
+++ b/Doc/library/profile.rst
@@ -347,81 +347,6 @@ The statistical profiler produces output similar to deterministic profilers but
 
 .. _profile-cli:
 
-:mod:`!profiling.sampling` Module Reference
-=======================================================
-
-.. module:: profiling.sampling
-   :synopsis: Python statistical profiler.
-
-This section documents the programmatic interface for the :mod:`!profiling.sampling` module.
-For command-line usage, see :ref:`sampling-profiler-cli`. For conceptual information
-about statistical profiling, see :ref:`statistical-profiling`
-
-.. function:: sample(pid, *, sort=2, sample_interval_usec=100, duration_sec=10, filename=None, all_threads=False, limit=None, show_summary=True, output_format="pstats", realtime_stats=False, native=False, gc=True)
-
-   Sample a Python process and generate profiling data.
-
-   This is the main entry point for statistical profiling. It creates a
-   :class:`SampleProfiler`, collects stack traces from the target process, and
-   outputs the results in the specified format.
-
-   :param int pid: Process ID of the target Python process
-   :param int sort: Sort order for pstats output (default: 2 for cumulative time)
-   :param int sample_interval_usec: Sampling interval in microseconds (default: 100)
-   :param int duration_sec: Duration to sample in seconds (default: 10)
-   :param str filename: Output filename (None for stdout/default naming)
-   :param bool all_threads: Whether to sample all threads (default: False)
-   :param int limit: Maximum number of functions to display (default: None)
-   :param bool show_summary: Whether to show summary statistics (default: True)
-   :param str output_format: Output format - 'pstats' or 'collapsed' (default: 'pstats')
-   :param bool realtime_stats: Whether to display real-time statistics (default: False)
-   :param bool native: Whether to include ``<native>`` frames (default: False)
-   :param bool gc: Whether to include ``<GC>`` frames (default: True)
-
-   :raises ValueError: If output_format is not 'pstats' or 'collapsed'
-
-   Examples::
-
-       # Basic usage - profile process 1234 for 10 seconds
-       import profiling.sampling
-       profiling.sampling.sample(1234)
-
-       # Profile with custom settings
-       profiling.sampling.sample(1234, duration_sec=30, sample_interval_usec=50, all_threads=True)
-
-       # Generate collapsed stack traces for flamegraph.pl
-       profiling.sampling.sample(1234, output_format='collapsed', filename='profile.collapsed')
-
-.. class:: SampleProfiler(pid, sample_interval_usec, all_threads)
-
-   Low-level API for the statistical profiler.
-
-   This profiler uses periodic stack sampling to collect performance data
-   from running Python processes with minimal overhead. It can attach to
-   any Python process by PID and collect stack traces at regular intervals.
-
-   :param int pid: Process ID of the target Python process
-   :param int sample_interval_usec: Sampling interval in microseconds
-   :param bool all_threads: Whether to sample all threads or just the main thread
-
-   .. method:: sample(collector, duration_sec=10)
-
-      Sample the target process for the specified duration.
-
-      Collects stack traces from the target process at regular intervals
-      and passes them to the provided collector for processing.
-
-      :param collector: Object that implements ``collect()`` method to process stack traces
-      :param int duration_sec: Duration to sample in seconds (default: 10)
-
-      The method tracks sampling statistics and can display real-time
-      information if realtime_stats is enabled.
-
-.. seealso::
-
-   :ref:`sampling-profiler-cli`
-      Command-line interface documentation for the statistical profiler.
-
 Deterministic Profiler Command Line Interface
 =============================================
 
diff --git a/Doc/library/readline.rst b/Doc/library/readline.rst
index 75db832c546..780cc773403 100644
--- a/Doc/library/readline.rst
+++ b/Doc/library/readline.rst
@@ -246,6 +246,15 @@ Startup hooks
    if Python was compiled for a version of the library that supports it.
 
 
+.. function:: get_pre_input_hook()
+
+   Get the current pre-input hook function, or ``None`` if no pre-input hook
+   function has been set.  This function only exists if Python was compiled
+   for a version of the library that supports it.
+
+   .. versionadded:: next
+
+
 .. _readline-completion:
 
 Completion
diff --git a/Doc/library/select.rst b/Doc/library/select.rst
index e821cb01d94..62b5161fb80 100644
--- a/Doc/library/select.rst
+++ b/Doc/library/select.rst
@@ -115,7 +115,7 @@ The module defines the following:
    :ref:`kevent-objects` below for the methods supported by kevent objects.
 
 
-.. function:: select(rlist, wlist, xlist[, timeout])
+.. function:: select(rlist, wlist, xlist, timeout=None)
 
    This is a straightforward interface to the Unix :c:func:`!select` system call.
    The first three arguments are iterables of 'waitable objects': either
@@ -131,7 +131,7 @@ The module defines the following:
    platform-dependent. (It is known to work on Unix but not on Windows.)  The
    optional *timeout* argument specifies a time-out in seconds; it may be
    a non-integer to specify fractions of seconds.
-   When the *timeout* argument is omitted the function blocks until
+   When the *timeout* argument is omitted or ``None``, the function blocks until
    at least one file descriptor is ready.  A time-out value of zero specifies a
    poll and never blocks.
 
diff --git a/Doc/library/socket.rst b/Doc/library/socket.rst
index 89bca9b5b20..b16cde34ec1 100644
--- a/Doc/library/socket.rst
+++ b/Doc/library/socket.rst
@@ -482,6 +482,9 @@ The AF_* and SOCK_* constants are now :class:`AddressFamily` and
    .. versionchanged:: 3.14
       Added support for ``TCP_QUICKACK`` on Windows platforms when available.
 
+   .. versionchanged:: next
+      ``IPV6_HDRINCL`` was added.
+
 
 .. data:: AF_CAN
           PF_CAN
@@ -2092,11 +2095,8 @@ to sockets.
       Accepts any real number, not only integer or float.
 
 
-.. method:: socket.setsockopt(level, optname, value: int)
-.. method:: socket.setsockopt(level, optname, value: buffer)
-   :noindex:
-.. method:: socket.setsockopt(level, optname, None, optlen: int)
-   :noindex:
+.. method:: socket.setsockopt(level, optname, value: int | Buffer)
+            socket.setsockopt(level, optname, None, optlen: int)
 
    .. index:: pair: module; struct
 
diff --git a/Doc/library/ssl.rst b/Doc/library/ssl.rst
index fa0a5234720..7d30094963d 100644
--- a/Doc/library/ssl.rst
+++ b/Doc/library/ssl.rst
@@ -2959,16 +2959,16 @@ of TLS/SSL. Some new TLS 1.3 features are not yet available.
        Steve Kent
 
    :rfc:`RFC 4086: Randomness Requirements for Security <4086>`
-       Donald E., Jeffrey I. Schiller
+       Donald E. Eastlake, Jeffrey I. Schiller, Steve Crocker
 
    :rfc:`RFC 5280: Internet X.509 Public Key Infrastructure Certificate and Certificate Revocation List (CRL) Profile <5280>`
-       D. Cooper
+       David Cooper et al.
 
    :rfc:`RFC 5246: The Transport Layer Security (TLS) Protocol Version 1.2 <5246>`
-       T. Dierks et. al.
+       Tim Dierks and Eric Rescorla.
 
    :rfc:`RFC 6066: Transport Layer Security (TLS) Extensions <6066>`
-       D. Eastlake
+       Donald E. Eastlake
 
    `IANA TLS: Transport Layer Security (TLS) Parameters <https://www.iana.org/assignments/tls-parameters/tls-parameters.xml>`_
        IANA
diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst
index 3bcaba0b3e1..3899e5b59d8 100644
--- a/Doc/library/stdtypes.rst
+++ b/Doc/library/stdtypes.rst
@@ -1994,10 +1994,16 @@ expression support in the :mod:`re` module).
    ``{}``.  Each replacement field contains either the numeric index of a
    positional argument, or the name of a keyword argument.  Returns a copy of
    the string where each replacement field is replaced with the string value of
-   the corresponding argument.
+   the corresponding argument. For example:
+
+   .. doctest::
 
       >>> "The sum of 1 + 2 is {0}".format(1+2)
       'The sum of 1 + 2 is 3'
+      >>> "The sum of {a} + {b} is {answer}".format(answer=1+2, a=1, b=2)
+      'The sum of 1 + 2 is 3'
+      >>> "{1} expects the {0} Inquisition!".format("Spanish", "Nobody")
+      'Nobody expects the Spanish Inquisition!'
 
    See :ref:`formatstrings` for a description of the various formatting options
    that can be specified in format strings.
@@ -2057,13 +2063,32 @@ expression support in the :mod:`re` module).
    from the `Alphabetic property defined in the section 4.10 'Letters, Alphabetic, and
    Ideographic' of the Unicode Standard
    <https://www.unicode.org/versions/Unicode17.0.0/core-spec/chapter-4/#G91002>`__.
+   For example:
+
+   .. doctest::
+
+      >>> 'Letters and spaces'.isalpha()
+      False
+      >>> 'LettersOnly'.isalpha()
+      True
+      >>> 'µ'.isalpha()  # non-ASCII characters can be considered alphabetical too
+      True
+
+   See :ref:`unicode-properties`.
 
 
 .. method:: str.isascii()
 
    Return ``True`` if the string is empty or all characters in the string are ASCII,
    ``False`` otherwise.
-   ASCII characters have code points in the range U+0000-U+007F.
+   ASCII characters have code points in the range U+0000-U+007F. For example:
+
+   .. doctest::
+
+      >>> 'ASCII characters'.isascii()
+      True
+      >>> 'µ'.isascii()
+      False
 
    .. versionadded:: 3.7
 
@@ -2073,9 +2098,18 @@ expression support in the :mod:`re` module).
    Return ``True`` if all characters in the string are decimal
    characters and there is at least one character, ``False``
    otherwise. Decimal characters are those that can be used to form
-   numbers in base 10, e.g. U+0660, ARABIC-INDIC DIGIT
+   numbers in base 10, such as U+0660, ARABIC-INDIC DIGIT
    ZERO.  Formally a decimal character is a character in the Unicode
-   General Category "Nd".
+   General Category "Nd". For example:
+
+   .. doctest::
+
+      >>> '0123456789'.isdecimal()
+      True
+      >>> '٠١٢٣٤٥٦٧٨٩'.isdecimal()  # Arabic-Indic digits zero to nine
+      True
+      >>> 'alphabetic'.isdecimal()
+      False
 
 
 .. method:: str.isdigit()
@@ -2194,7 +2228,16 @@ expression support in the :mod:`re` module).
    Return a string which is the concatenation of the strings in *iterable*.
    A :exc:`TypeError` will be raised if there are any non-string values in
    *iterable*, including :class:`bytes` objects.  The separator between
-   elements is the string providing this method.
+   elements is the string providing this method. For example:
+
+   .. doctest::
+
+      >>> ', '.join(['spam', 'spam', 'spam'])
+      'spam, spam, spam'
+      >>> '-'.join('Python')
+      'P-y-t-h-o-n'
+
+   See also :meth:`split`.
 
 
 .. method:: str.ljust(width, fillchar=' ', /)
@@ -2408,6 +2451,8 @@ expression support in the :mod:`re` module).
       >>> "   foo   ".split(maxsplit=0)
       ['foo   ']
 
+   See also :meth:`join`.
+
 
 .. index::
    single: universal newlines; str.splitlines method
@@ -2611,6 +2656,8 @@ expression support in the :mod:`re` module).
    single: : (colon); in formatted string literal
    single: = (equals); for help in debugging using string literals
 
+.. _stdtypes-fstrings:
+
 Formatted String Literals (f-strings)
 -------------------------------------
 
@@ -2619,123 +2666,147 @@ Formatted String Literals (f-strings)
    The :keyword:`await` and :keyword:`async for` can be used in expressions
    within f-strings.
 .. versionchanged:: 3.8
-   Added the debugging operator (``=``)
+   Added the debug specifier (``=``)
 .. versionchanged:: 3.12
    Many restrictions on expressions within f-strings have been removed.
    Notably, nested strings, comments, and backslashes are now permitted.
 
 An :dfn:`f-string` (formally a :dfn:`formatted string literal`) is
 a string literal that is prefixed with ``f`` or ``F``.
-This type of string literal allows embedding arbitrary Python expressions
-within *replacement fields*, which are delimited by curly brackets (``{}``).
-These expressions are evaluated at runtime, similarly to :meth:`str.format`,
-and are converted into regular :class:`str` objects.
-For example:
+This type of string literal allows embedding the results of arbitrary Python
+expressions within *replacement fields*, which are delimited by curly
+brackets (``{}``).
+Each replacement field must contain an expression, optionally followed by:
 
-.. doctest::
+* a *debug specifier* -- an equal sign (``=``);
+* a *conversion specifier* -- ``!s``, ``!r`` or ``!a``; and/or
+* a *format specifier* prefixed with a colon (``:``).
 
-   >>> who = 'nobody'
-   >>> nationality = 'Spanish'
-   >>> f'{who.title()} expects the {nationality} Inquisition!'
-   'Nobody expects the Spanish Inquisition!'
+See the :ref:`Lexical Analysis section on f-strings <f-strings>` for details
+on the syntax of these fields.
 
-It is also possible to use a multi line f-string:
+Debug specifier
+^^^^^^^^^^^^^^^
 
-.. doctest::
+.. versionadded:: 3.8
 
-   >>> f'''This is a string
-   ... on two lines'''
-   'This is a string\non two lines'
+If a debug specifier -- an equal sign (``=``) -- appears after the replacement
+field expression, the resulting f-string will contain the expression's source,
+the equal sign, and the value of the expression.
+This is often useful for debugging::
 
-A single opening curly bracket, ``'{'``, marks a *replacement field* that
-can contain any Python expression:
+   >>> number = 14.3
+   >>> f'{number=}'
+   'number=14.3'
 
-.. doctest::
+Whitespace before, inside and after the expression, as well as whitespace
+after the equal sign, is significant --- it is retained in the result::
 
-   >>> nationality = 'Spanish'
-   >>> f'The {nationality} Inquisition!'
-   'The Spanish Inquisition!'
+   >>> f'{ number  -  4  = }'
+   ' number  -  4  = 10.3'
 
-To include a literal ``{`` or ``}``, use a double bracket:
 
-.. doctest::
+Conversion specifier
+^^^^^^^^^^^^^^^^^^^^
 
-   >>> x = 42
-   >>> f'{{x}} is {x}'
-   '{x} is 42'
-
-Functions can also be used, and :ref:`format specifiers <formatstrings>`:
-
-.. doctest::
-
-   >>> from math import sqrt
-   >>> f'√2 \N{ALMOST EQUAL TO} {sqrt(2):.5f}'
-   '√2 ≈ 1.41421'
-
-Any non-string expression is converted using :func:`str`, by default:
-
-.. doctest::
+By default, the value of a replacement field expression is converted to
+a string using :func:`str`::
 
    >>> from fractions import Fraction
-   >>> f'{Fraction(1, 3)}'
+   >>> one_third = Fraction(1, 3)
+   >>> f'{one_third}'
    '1/3'
 
-To use an explicit conversion, use the ``!`` (exclamation mark) operator,
-followed by any of the valid formats, which are:
+When a debug specifier but no format specifier is used, the default conversion
+instead uses :func:`repr`::
 
-========== ==============
-Conversion  Meaning
-========== ==============
-``!a``      :func:`ascii`
-``!r``      :func:`repr`
-``!s``      :func:`str`
-========== ==============
+   >>> f'{one_third = }'
+   'one_third = Fraction(1, 3)'
 
-For example:
+The conversion can be specified explicitly using one of these specifiers:
 
-.. doctest::
+* ``!s`` for :func:`str`
+* ``!r`` for :func:`repr`
+* ``!a`` for :func:`ascii`
 
-   >>> from fractions import Fraction
-   >>> f'{Fraction(1, 3)!s}'
+For example::
+
+   >>> str(one_third)
    '1/3'
-   >>> f'{Fraction(1, 3)!r}'
+   >>> repr(one_third)
    'Fraction(1, 3)'
-   >>> question = '¿Dónde está el Presidente?'
-   >>> print(f'{question!a}')
-   '\xbfD\xf3nde est\xe1 el Presidente?'
 
-While debugging it may be helpful to see both the expression and its value,
-by using the equals sign (``=``) after the expression.
-This preserves spaces within the brackets, and can be used with a converter.
-By default, the debugging operator uses the :func:`repr` (``!r``) conversion.
-For example:
+   >>> f'{one_third!s} is {one_third!r}'
+   '1/3 is Fraction(1, 3)'
 
-.. doctest::
+   >>> string = "¡kočka 😸!"
+   >>> ascii(string)
+   "'\\xa1ko\\u010dka \\U0001f638!'"
+
+   >>> f'{string = !a}'
+   "string = '\\xa1ko\\u010dka \\U0001f638!'"
+
+
+Format specifier
+^^^^^^^^^^^^^^^^
+
+After the expression has been evaluated, and possibly converted using an
+explicit conversion specifier, it is formatted using the :func:`format` function.
+If the replacement field includes a *format specifier* introduced by a colon
+(``:``), the specifier is passed to :func:`!format` as the second argument.
+The result of :func:`!format` is then used as the final value for the
+replacement field. For example::
 
    >>> from fractions import Fraction
-   >>> calculation = Fraction(1, 3)
-   >>> f'{calculation=}'
-   'calculation=Fraction(1, 3)'
-   >>> f'{calculation = }'
-   'calculation = Fraction(1, 3)'
-   >>> f'{calculation = !s}'
-   'calculation = 1/3'
+   >>> one_third = Fraction(1, 3)
+   >>> f'{one_third:.6f}'
+   '0.333333'
+   >>> f'{one_third:_^+10}'
+   '___+1/3___'
+   >>> >>> f'{one_third!r:_^20}'
+   '___Fraction(1, 3)___'
+   >>> f'{one_third = :~>10}~'
+   'one_third = ~~~~~~~1/3~'
 
-Once the output has been evaluated, it can be formatted using a
-:ref:`format specifier <formatstrings>` following a colon (``':'``).
-After the expression has been evaluated, and possibly converted to a string,
-the :meth:`!__format__` method of the result is called with the format specifier,
-or the empty string if no format specifier is given.
-The formatted result is then used as the final value for the replacement field.
-For example:
+.. _stdtypes-tstrings:
 
-.. doctest::
+Template String Literals (t-strings)
+------------------------------------
 
-   >>> from fractions import Fraction
-   >>> f'{Fraction(1, 7):.6f}'
-   '0.142857'
-   >>> f'{Fraction(1, 7):_^+10}'
-   '___+1/7___'
+An :dfn:`t-string` (formally a :dfn:`template string literal`) is
+a string literal that is prefixed with ``t`` or ``T``.
+
+These strings follow the same syntax and evaluation rules as
+:ref:`formatted string literals <stdtypes-fstrings>`,
+with for the following differences:
+
+* Rather than evaluating to a ``str`` object, template string literals evaluate
+  to a :class:`string.templatelib.Template` object.
+
+* The :func:`format` protocol is not used.
+  Instead, the format specifier and conversions (if any) are passed to
+  a new :class:`~string.templatelib.Interpolation` object that is created
+  for each evaluated expression.
+  It is up to code that processes the resulting :class:`~string.templatelib.Template`
+  object to decide how to handle format specifiers and conversions.
+
+* Format specifiers containing nested replacement fields are evaluated eagerly,
+  prior to being passed to the :class:`~string.templatelib.Interpolation` object.
+  For instance, an interpolation of the form ``{amount:.{precision}f}`` will
+  evaluate the inner expression ``{precision}`` to determine the value of the
+  ``format_spec`` attribute.
+  If ``precision`` were to be ``2``, the resulting format specifier
+  would be ``'.2f'``.
+
+* When the equals sign ``'='`` is provided in an interpolation expression,
+  the text of the expression is appended to the literal string that precedes
+  the relevant interpolation.
+  This includes the equals sign and any surrounding whitespace.
+  The :class:`!Interpolation` instance for the expression will be created as
+  normal, except that :attr:`~string.templatelib.Interpolation.conversion` will
+  be set to '``r``' (:func:`repr`) by default.
+  If an explicit conversion or format specifier are provided,
+  this will override the default behaviour.
 
 
 .. _old-string-formatting:
@@ -3195,7 +3266,7 @@ objects.
 
          See the :ref:`What's New <whatsnew315-bytearray-take-bytes>` entry for
          common code patterns which can be optimized with
-         :func:`bytearray.take_bytes`.
+         :meth:`bytearray.take_bytes`.
 
 Since bytearray objects are sequences of integers (akin to a list), for a
 bytearray object *b*, ``b[0]`` will be an integer, while ``b[0:1]`` will be
@@ -4604,7 +4675,7 @@ copying.
 
       .. versionadded:: 3.14
 
-  .. method:: index(value, start=0, stop=sys.maxsize, /)
+   .. method:: index(value, start=0, stop=sys.maxsize, /)
 
       Return the index of the first occurrence of *value* (at or after
       index *start* and before index *stop*).
@@ -4755,11 +4826,12 @@ other sequence-like behavior.
 
 There are currently two built-in set types, :class:`set` and :class:`frozenset`.
 The :class:`set` type is mutable --- the contents can be changed using methods
-like :meth:`~set.add` and :meth:`~set.remove`.  Since it is mutable, it has no
-hash value and cannot be used as either a dictionary key or as an element of
-another set.  The :class:`frozenset` type is immutable and :term:`hashable` ---
-its contents cannot be altered after it is created; it can therefore be used as
-a dictionary key or as an element of another set.
+like :meth:`~set.add` and :meth:`~set.remove`.
+Since it is mutable, it has no hash value and cannot be used as
+either a dictionary key or as an element of another set.
+The :class:`frozenset` type is immutable and :term:`hashable` ---
+its contents cannot be altered after it is created;
+it can therefore be used as a dictionary key or as an element of another set.
 
 Non-empty sets (not frozensets) can be created by placing a comma-separated list
 of elements within braces, for example: ``{'jack', 'sjoerd'}``, in addition to the
@@ -4776,164 +4848,172 @@ The constructors for both classes work the same:
    objects.  If *iterable* is not specified, a new empty set is
    returned.
 
-   Sets can be created by several means:
+Sets can be created by several means:
 
-   * Use a comma-separated list of elements within braces: ``{'jack', 'sjoerd'}``
-   * Use a set comprehension: ``{c for c in 'abracadabra' if c not in 'abc'}``
-   * Use the type constructor: ``set()``, ``set('foobar')``, ``set(['a', 'b', 'foo'])``
+* Use a comma-separated list of elements within braces: ``{'jack', 'sjoerd'}``
+* Use a set comprehension: ``{c for c in 'abracadabra' if c not in 'abc'}``
+* Use the type constructor: ``set()``, ``set('foobar')``, ``set(['a', 'b', 'foo'])``
 
-   Instances of :class:`set` and :class:`frozenset` provide the following
-   operations:
+Instances of :class:`set` and :class:`frozenset` provide the following
+operations:
 
-   .. describe:: len(s)
+.. describe:: len(s)
 
-      Return the number of elements in set *s* (cardinality of *s*).
+   Return the number of elements in set *s* (cardinality of *s*).
 
-   .. describe:: x in s
+.. describe:: x in s
 
-      Test *x* for membership in *s*.
+   Test *x* for membership in *s*.
 
-   .. describe:: x not in s
+.. describe:: x not in s
 
-      Test *x* for non-membership in *s*.
+   Test *x* for non-membership in *s*.
 
-   .. method:: isdisjoint(other, /)
+.. method:: frozenset.isdisjoint(other, /)
+            set.isdisjoint(other, /)
 
-      Return ``True`` if the set has no elements in common with *other*.  Sets are
-      disjoint if and only if their intersection is the empty set.
+   Return ``True`` if the set has no elements in common with *other*.  Sets are
+   disjoint if and only if their intersection is the empty set.
 
-   .. method:: issubset(other, /)
-               set <= other
+.. method:: frozenset.issubset(other, /)
+            set.issubset(other, /)
+.. describe:: set <= other
 
-      Test whether every element in the set is in *other*.
+   Test whether every element in the set is in *other*.
 
-   .. method:: set < other
+.. describe:: set < other
 
-      Test whether the set is a proper subset of *other*, that is,
-      ``set <= other and set != other``.
+   Test whether the set is a proper subset of *other*, that is,
+   ``set <= other and set != other``.
 
-   .. method:: issuperset(other, /)
-               set >= other
+.. method:: frozenset.issuperset(other, /)
+            set.issuperset(other, /)
+.. describe:: set >= other
 
-      Test whether every element in *other* is in the set.
+   Test whether every element in *other* is in the set.
 
-   .. method:: set > other
+.. describe:: set > other
 
-      Test whether the set is a proper superset of *other*, that is, ``set >=
-      other and set != other``.
+   Test whether the set is a proper superset of *other*, that is, ``set >=
+   other and set != other``.
 
-   .. method:: union(*others)
-               set | other | ...
+.. method:: frozenset.union(*others)
+            set.union(*others)
+.. describe:: set | other | ...
 
-      Return a new set with elements from the set and all others.
+   Return a new set with elements from the set and all others.
 
-   .. method:: intersection(*others)
-               set & other & ...
+.. method:: frozenset.intersection(*others)
+            set.intersection(*others)
+.. describe:: set & other & ...
 
-      Return a new set with elements common to the set and all others.
+   Return a new set with elements common to the set and all others.
 
-   .. method:: difference(*others)
-               set - other - ...
+.. method:: frozenset.difference(*others)
+            set.difference(*others)
+.. describe:: set - other - ...
 
-      Return a new set with elements in the set that are not in the others.
+   Return a new set with elements in the set that are not in the others.
 
-   .. method:: symmetric_difference(other, /)
-               set ^ other
+.. method:: frozenset.symmetric_difference(other, /)
+            set.symmetric_difference(other, /)
+.. describe:: set ^ other
 
-      Return a new set with elements in either the set or *other* but not both.
+   Return a new set with elements in either the set or *other* but not both.
 
-   .. method:: copy()
+.. method:: frozenset.copy()
+            set.copy()
 
-      Return a shallow copy of the set.
+   Return a shallow copy of the set.
 
 
-   Note, the non-operator versions of :meth:`union`, :meth:`intersection`,
-   :meth:`difference`, :meth:`symmetric_difference`, :meth:`issubset`, and
-   :meth:`issuperset` methods will accept any iterable as an argument.  In
-   contrast, their operator based counterparts require their arguments to be
-   sets.  This precludes error-prone constructions like ``set('abc') & 'cbs'``
-   in favor of the more readable ``set('abc').intersection('cbs')``.
+Note, the non-operator versions of :meth:`~frozenset.union`,
+:meth:`~frozenset.intersection`, :meth:`~frozenset.difference`, :meth:`~frozenset.symmetric_difference`, :meth:`~frozenset.issubset`, and
+:meth:`~frozenset.issuperset` methods will accept any iterable as an argument.  In
+contrast, their operator based counterparts require their arguments to be
+sets.  This precludes error-prone constructions like ``set('abc') & 'cbs'``
+in favor of the more readable ``set('abc').intersection('cbs')``.
 
-   Both :class:`set` and :class:`frozenset` support set to set comparisons. Two
-   sets are equal if and only if every element of each set is contained in the
-   other (each is a subset of the other). A set is less than another set if and
-   only if the first set is a proper subset of the second set (is a subset, but
-   is not equal). A set is greater than another set if and only if the first set
-   is a proper superset of the second set (is a superset, but is not equal).
+Both :class:`set` and :class:`frozenset` support set to set comparisons. Two
+sets are equal if and only if every element of each set is contained in the
+other (each is a subset of the other). A set is less than another set if and
+only if the first set is a proper subset of the second set (is a subset, but
+is not equal). A set is greater than another set if and only if the first set
+is a proper superset of the second set (is a superset, but is not equal).
 
-   Instances of :class:`set` are compared to instances of :class:`frozenset`
-   based on their members.  For example, ``set('abc') == frozenset('abc')``
-   returns ``True`` and so does ``set('abc') in set([frozenset('abc')])``.
+Instances of :class:`set` are compared to instances of :class:`frozenset`
+based on their members.  For example, ``set('abc') == frozenset('abc')``
+returns ``True`` and so does ``set('abc') in set([frozenset('abc')])``.
 
-   The subset and equality comparisons do not generalize to a total ordering
-   function.  For example, any two nonempty disjoint sets are not equal and are not
-   subsets of each other, so *all* of the following return ``False``: ``a<b``,
-   ``a==b``, or ``a>b``.
+The subset and equality comparisons do not generalize to a total ordering
+function.  For example, any two nonempty disjoint sets are not equal and are not
+subsets of each other, so *all* of the following return ``False``: ``a<b``,
+``a==b``, or ``a>b``.
 
-   Since sets only define partial ordering (subset relationships), the output of
-   the :meth:`list.sort` method is undefined for lists of sets.
+Since sets only define partial ordering (subset relationships), the output of
+the :meth:`list.sort` method is undefined for lists of sets.
 
-   Set elements, like dictionary keys, must be :term:`hashable`.
+Set elements, like dictionary keys, must be :term:`hashable`.
 
-   Binary operations that mix :class:`set` instances with :class:`frozenset`
-   return the type of the first operand.  For example: ``frozenset('ab') |
-   set('bc')`` returns an instance of :class:`frozenset`.
+Binary operations that mix :class:`set` instances with :class:`frozenset`
+return the type of the first operand.  For example: ``frozenset('ab') |
+set('bc')`` returns an instance of :class:`frozenset`.
 
-   The following table lists operations available for :class:`set` that do not
-   apply to immutable instances of :class:`frozenset`:
+The following table lists operations available for :class:`set` that do not
+apply to immutable instances of :class:`frozenset`:
 
-   .. method:: update(*others)
-               set |= other | ...
+.. method:: set.update(*others)
+.. describe:: set |= other | ...
 
-      Update the set, adding elements from all others.
+   Update the set, adding elements from all others.
 
-   .. method:: intersection_update(*others)
-               set &= other & ...
+.. method:: set.intersection_update(*others)
+.. describe:: set &= other & ...
 
-      Update the set, keeping only elements found in it and all others.
+   Update the set, keeping only elements found in it and all others.
 
-   .. method:: difference_update(*others)
-               set -= other | ...
+.. method:: set.difference_update(*others)
+.. describe:: set -= other | ...
 
-      Update the set, removing elements found in others.
+   Update the set, removing elements found in others.
 
-   .. method:: symmetric_difference_update(other, /)
-               set ^= other
+.. method:: set.symmetric_difference_update(other, /)
+.. describe:: set ^= other
 
-      Update the set, keeping only elements found in either set, but not in both.
+   Update the set, keeping only elements found in either set, but not in both.
 
-   .. method:: add(elem, /)
+.. method:: set.add(elem, /)
 
-      Add element *elem* to the set.
+   Add element *elem* to the set.
 
-   .. method:: remove(elem, /)
+.. method:: set.remove(elem, /)
 
-      Remove element *elem* from the set.  Raises :exc:`KeyError` if *elem* is
-      not contained in the set.
+   Remove element *elem* from the set.  Raises :exc:`KeyError` if *elem* is
+   not contained in the set.
 
-   .. method:: discard(elem, /)
+.. method:: set.discard(elem, /)
 
-      Remove element *elem* from the set if it is present.
+   Remove element *elem* from the set if it is present.
 
-   .. method:: pop()
+.. method:: set.pop()
 
-      Remove and return an arbitrary element from the set.  Raises
-      :exc:`KeyError` if the set is empty.
+   Remove and return an arbitrary element from the set.  Raises
+   :exc:`KeyError` if the set is empty.
 
-   .. method:: clear()
+.. method:: set.clear()
 
-      Remove all elements from the set.
+   Remove all elements from the set.
 
 
-   Note, the non-operator versions of the :meth:`update`,
-   :meth:`intersection_update`, :meth:`difference_update`, and
-   :meth:`symmetric_difference_update` methods will accept any iterable as an
-   argument.
+Note, the non-operator versions of the :meth:`~set.update`,
+:meth:`~set.intersection_update`, :meth:`~set.difference_update`, and
+:meth:`~set.symmetric_difference_update` methods will accept any iterable as an
+argument.
 
-   Note, the *elem* argument to the :meth:`~object.__contains__`,
-   :meth:`remove`, and
-   :meth:`discard` methods may be a set.  To support searching for an equivalent
-   frozenset, a temporary one is created from *elem*.
+Note, the *elem* argument to the :meth:`~object.__contains__`,
+:meth:`~set.remove`, and
+:meth:`~set.discard` methods may be a set.  To support searching for an equivalent
+frozenset, a temporary one is created from *elem*.
 
 
 .. _typesmapping:
diff --git a/Doc/library/subprocess.rst b/Doc/library/subprocess.rst
index 1aade881745..b8dfcc31077 100644
--- a/Doc/library/subprocess.rst
+++ b/Doc/library/subprocess.rst
@@ -831,7 +831,9 @@ Instances of the :class:`Popen` class have the following methods:
 
    If the process does not terminate after *timeout* seconds, a
    :exc:`TimeoutExpired` exception will be raised.  Catching this exception and
-   retrying communication will not lose any output.
+   retrying communication will not lose any output.  Supplying *input* to a
+   subsequent post-timeout :meth:`communicate` call is in undefined behavior
+   and may become an error in the future.
 
    The child process is not killed if the timeout expires, so in order to
    cleanup properly a well-behaved application should kill the child process and
@@ -844,6 +846,11 @@ Instances of the :class:`Popen` class have the following methods:
           proc.kill()
           outs, errs = proc.communicate()
 
+   After a call to :meth:`~Popen.communicate` raises :exc:`TimeoutExpired`, do
+   not call :meth:`~Popen.wait`. Use an additional :meth:`~Popen.communicate`
+   call to finish handling pipes and populate the :attr:`~Popen.returncode`
+   attribute.
+
    .. note::
 
       The data read is buffered in memory, so do not use this method if the data
diff --git a/Doc/library/unittest.rst b/Doc/library/unittest.rst
index fe45becce2e..0bc0a953fd9 100644
--- a/Doc/library/unittest.rst
+++ b/Doc/library/unittest.rst
@@ -438,7 +438,7 @@ run whether the test method succeeded or not.
 Such a working environment for the testing code is called a
 :dfn:`test fixture`.  A new TestCase instance is created as a unique
 test fixture used to execute each individual test method.  Thus
-:meth:`~TestCase.setUp`, :meth:`~TestCase.tearDown`, and :meth:`~TestCase.__init__`
+:meth:`~TestCase.setUp`, :meth:`~TestCase.tearDown`, and :meth:`!TestCase.__init__`
 will be called once per test.
 
 It is recommended that you use TestCase implementations to group tests together
@@ -518,7 +518,7 @@ set-up and tear-down methods::
    subclasses will make future test refactorings infinitely easier.
 
 In some cases, the existing tests may have been written using the :mod:`doctest`
-module.  If so, :mod:`doctest` provides a :class:`DocTestSuite` class that can
+module.  If so, :mod:`doctest` provides a :class:`~doctest.DocTestSuite` class that can
 automatically build :class:`unittest.TestSuite` instances from the existing
 :mod:`doctest`\ -based tests.
 
@@ -1023,7 +1023,7 @@ Test cases
       additional keyword argument *msg*.
 
       The context manager will store the caught exception object in its
-      :attr:`exception` attribute.  This can be useful if the intention
+      :attr:`!exception` attribute.  This can be useful if the intention
       is to perform additional checks on the exception raised::
 
          with self.assertRaises(SomeException) as cm:
@@ -1036,7 +1036,7 @@ Test cases
          Added the ability to use :meth:`assertRaises` as a context manager.
 
       .. versionchanged:: 3.2
-         Added the :attr:`exception` attribute.
+         Added the :attr:`!exception` attribute.
 
       .. versionchanged:: 3.3
          Added the *msg* keyword argument when used as a context manager.
@@ -1089,8 +1089,8 @@ Test cases
       additional keyword argument *msg*.
 
       The context manager will store the caught warning object in its
-      :attr:`warning` attribute, and the source line which triggered the
-      warnings in the :attr:`filename` and :attr:`lineno` attributes.
+      :attr:`!warning` attribute, and the source line which triggered the
+      warnings in the :attr:`!filename` and :attr:`!lineno` attributes.
       This can be useful if the intention is to perform additional checks
       on the warning caught::
 
@@ -1437,7 +1437,7 @@ Test cases
       that lists the differences between the sets.  This method is used by
       default when comparing sets or frozensets with :meth:`assertEqual`.
 
-      Fails if either of *first* or *second* does not have a :meth:`set.difference`
+      Fails if either of *first* or *second* does not have a :meth:`~frozenset.difference`
       method.
 
       .. versionadded:: 3.1
@@ -1645,7 +1645,7 @@ Test cases
    .. method:: asyncSetUp()
       :async:
 
-      Method called to prepare the test fixture. This is called after :meth:`setUp`.
+      Method called to prepare the test fixture. This is called after :meth:`TestCase.setUp`.
       This is called immediately before calling the test method; other than
       :exc:`AssertionError` or :exc:`SkipTest`, any exception raised by this method
       will be considered an error rather than a test failure. The default implementation
@@ -1655,7 +1655,7 @@ Test cases
       :async:
 
       Method called immediately after the test method has been called and the
-      result recorded.  This is called before :meth:`tearDown`. This is called even if
+      result recorded.  This is called before :meth:`~TestCase.tearDown`. This is called even if
       the test method raised an exception, so the implementation in subclasses may need
       to be particularly careful about checking internal state.  Any exception, other than
       :exc:`AssertionError` or :exc:`SkipTest`, raised by this method will be
@@ -1684,7 +1684,7 @@ Test cases
       Sets up a new event loop to run the test, collecting the result into
       the :class:`TestResult` object passed as *result*.  If *result* is
       omitted or ``None``, a temporary result object is created (by calling
-      the :meth:`defaultTestResult` method) and used. The result object is
+      the :meth:`~TestCase.defaultTestResult` method) and used. The result object is
       returned to :meth:`run`'s caller. At the end of the test all the tasks
       in the event loop are cancelled.
 
@@ -1805,7 +1805,7 @@ Grouping tests
       returned by repeated iterations before :meth:`TestSuite.run` must be the
       same for each call iteration. After :meth:`TestSuite.run`, callers should
       not rely on the tests returned by this method unless the caller uses a
-      subclass that overrides :meth:`TestSuite._removeTestAtIndex` to preserve
+      subclass that overrides :meth:`!TestSuite._removeTestAtIndex` to preserve
       test references.
 
       .. versionchanged:: 3.2
@@ -1816,10 +1816,10 @@ Grouping tests
       .. versionchanged:: 3.4
          In earlier versions the :class:`TestSuite` held references to each
          :class:`TestCase` after :meth:`TestSuite.run`. Subclasses can restore
-         that behavior by overriding :meth:`TestSuite._removeTestAtIndex`.
+         that behavior by overriding :meth:`!TestSuite._removeTestAtIndex`.
 
    In the typical usage of a :class:`TestSuite` object, the :meth:`run` method
-   is invoked by a :class:`TestRunner` rather than by the end-user test harness.
+   is invoked by a :class:`!TestRunner` rather than by the end-user test harness.
 
 
 Loading and running tests
@@ -1853,12 +1853,12 @@ Loading and running tests
    .. method:: loadTestsFromTestCase(testCaseClass)
 
       Return a suite of all test cases contained in the :class:`TestCase`\ -derived
-      :class:`testCaseClass`.
+      :class:`!testCaseClass`.
 
       A test case instance is created for each method named by
       :meth:`getTestCaseNames`. By default these are the method names
       beginning with ``test``. If :meth:`getTestCaseNames` returns no
-      methods, but the :meth:`runTest` method is implemented, a single test
+      methods, but the :meth:`!runTest` method is implemented, a single test
       case is created for that method instead.
 
 
@@ -1905,13 +1905,13 @@ Loading and running tests
       case class will be picked up as "a test method within a test case class",
       rather than "a callable object".
 
-      For example, if you have a module :mod:`SampleTests` containing a
-      :class:`TestCase`\ -derived class :class:`SampleTestCase` with three test
-      methods (:meth:`test_one`, :meth:`test_two`, and :meth:`test_three`), the
+      For example, if you have a module :mod:`!SampleTests` containing a
+      :class:`TestCase`\ -derived class :class:`!SampleTestCase` with three test
+      methods (:meth:`!test_one`, :meth:`!test_two`, and :meth:`!test_three`), the
       specifier ``'SampleTests.SampleTestCase'`` would cause this method to
       return a suite which will run all three test methods. Using the specifier
       ``'SampleTests.SampleTestCase.test_two'`` would cause it to return a test
-      suite which will run only the :meth:`test_two` test method. The specifier
+      suite which will run only the :meth:`!test_two` test method. The specifier
       can refer to modules and packages which have not been imported; they will
       be imported as a side-effect.
 
@@ -2058,7 +2058,7 @@ Loading and running tests
    Testing frameworks built on top of :mod:`unittest` may want access to the
    :class:`TestResult` object generated by running a set of tests for reporting
    purposes; a :class:`TestResult` instance is returned by the
-   :meth:`TestRunner.run` method for this purpose.
+   :meth:`!TestRunner.run` method for this purpose.
 
    :class:`TestResult` instances have the following attributes that will be of
    interest when inspecting the results of running a set of tests:
@@ -2144,12 +2144,12 @@ Loading and running tests
 
       This method can be called to signal that the set of tests being run should
       be aborted by setting the :attr:`shouldStop` attribute to ``True``.
-      :class:`TestRunner` objects should respect this flag and return without
+      :class:`!TestRunner` objects should respect this flag and return without
       running any additional tests.
 
       For example, this feature is used by the :class:`TextTestRunner` class to
       stop the test framework when the user signals an interrupt from the
-      keyboard.  Interactive tools which provide :class:`TestRunner`
+      keyboard.  Interactive tools which provide :class:`!TestRunner`
       implementations can use this in a similar manner.
 
    The following methods of the :class:`TestResult` class are used to maintain
@@ -2469,9 +2469,9 @@ Class and Module Fixtures
 -------------------------
 
 Class and module level fixtures are implemented in :class:`TestSuite`. When
-the test suite encounters a test from a new class then :meth:`tearDownClass`
-from the previous class (if there is one) is called, followed by
-:meth:`setUpClass` from the new class.
+the test suite encounters a test from a new class then
+:meth:`~TestCase.tearDownClass` from the previous class (if there is one)
+is called, followed by :meth:`~TestCase.setUpClass` from the new class.
 
 Similarly if a test is from a different module from the previous test then
 ``tearDownModule`` from the previous module is run, followed by
diff --git a/Doc/library/xml.dom.pulldom.rst b/Doc/library/xml.dom.pulldom.rst
index 8bceeecd463..a21cfaa4645 100644
--- a/Doc/library/xml.dom.pulldom.rst
+++ b/Doc/library/xml.dom.pulldom.rst
@@ -74,7 +74,7 @@ given point) or to make use of the :func:`DOMEventStream.expandNode` method
 and switch to DOM-related processing.
 
 
-.. class:: PullDom(documentFactory=None)
+.. class:: PullDOM(documentFactory=None)
 
    Subclass of :class:`xml.sax.handler.ContentHandler`.
 
diff --git a/Doc/library/xml.sax.handler.rst b/Doc/library/xml.sax.handler.rst
index 38ca4507d81..f1af7253e43 100644
--- a/Doc/library/xml.sax.handler.rst
+++ b/Doc/library/xml.sax.handler.rst
@@ -96,6 +96,14 @@ for the feature and property names.
 
 .. data:: feature_external_ges
 
+   .. warning::
+
+      Enabling opens a vulnerability to
+      `external entity attacks <https://en.wikipedia.org/wiki/XML_external_entity_attack>`_
+      if the parser is used with user-provided XML content.
+      Please reflect on your `threat model <https://en.wikipedia.org/wiki/Threat_model>`_
+      before enabling this feature.
+
    | value: ``"http://xml.org/sax/features/external-general-entities"``
    | true: Include all external general (text) entities.
    | false: Do not include external general entities.
diff --git a/Doc/library/xmlrpc.client.rst b/Doc/library/xmlrpc.client.rst
index e4912629aac..7e511237a6a 100644
--- a/Doc/library/xmlrpc.client.rst
+++ b/Doc/library/xmlrpc.client.rst
@@ -472,7 +472,7 @@ remote server into a single request [#]_.
 
    Create an object used to boxcar method calls. *server* is the eventual target of
    the call. Calls can be made to the result object, but they will immediately
-   return ``None``, and only store the call name and parameters in the
+   return ``None``, and only store the call name and arguments in the
    :class:`MultiCall` object. Calling the object itself causes all stored calls to
    be transmitted as a single ``system.multicall`` request. The result of this call
    is a :term:`generator`; iterating over this generator yields the individual
diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst
index 5a8bbc8c1ae..ae4e25b13b9 100644
--- a/Doc/library/zipfile.rst
+++ b/Doc/library/zipfile.rst
@@ -16,10 +16,10 @@ provides tools to create, read, write, append, and list a ZIP file.  Any
 advanced use of this module will require an understanding of the format, as
 defined in `PKZIP Application Note`_.
 
-This module does not currently handle multi-disk ZIP files.
+This module does not handle multipart ZIP files.
 It can handle ZIP files that use the ZIP64 extensions
 (that is ZIP files that are more than 4 GiB in size).  It supports
-decryption of encrypted files in ZIP archives, but it currently cannot
+decryption of encrypted files in ZIP archives, but it cannot
 create an encrypted file.  Decryption is extremely slow as it is
 implemented in native Python rather than C.
 
@@ -175,7 +175,7 @@ The module defines the following items:
 
 .. _zipfile-objects:
 
-ZipFile Objects
+ZipFile objects
 ---------------
 
 
@@ -248,7 +248,7 @@ ZipFile Objects
    .. note::
 
       *metadata_encoding* is an instance-wide setting for the ZipFile.
-      It is not currently possible to set this on a per-member basis.
+      It is not possible to set this on a per-member basis.
 
       This attribute is a workaround for legacy implementations which produce
       archives with names in the current locale encoding or code page (mostly
@@ -571,7 +571,7 @@ The following data attributes are also available:
 
 .. _path-objects:
 
-Path Objects
+Path objects
 ------------
 
 .. class:: Path(root, at='')
@@ -707,7 +707,7 @@ changes.
 
 .. _pyzipfile-objects:
 
-PyZipFile Objects
+PyZipFile objects
 -----------------
 
 The :class:`PyZipFile` constructor takes the same parameters as the
@@ -784,7 +784,7 @@ The :class:`PyZipFile` constructor takes the same parameters as the
 
 .. _zipinfo-objects:
 
-ZipInfo Objects
+ZipInfo objects
 ---------------
 
 Instances of the :class:`ZipInfo` class are returned by the :meth:`.getinfo` and
@@ -954,7 +954,7 @@ Instances have the following methods and attributes:
 .. _zipfile-commandline:
 .. program:: zipfile
 
-Command-Line Interface
+Command-line interface
 ----------------------
 
 The :mod:`zipfile` module provides a simple command-line interface to interact
@@ -1029,7 +1029,7 @@ From file itself
 Decompression may fail due to incorrect password / CRC checksum / ZIP format or
 unsupported compression method / decryption.
 
-File System limitations
+File system limitations
 ~~~~~~~~~~~~~~~~~~~~~~~
 
 Exceeding limitations on different file systems can cause decompression failed.
diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst
index 882b05e8731..5f79c6fe8f5 100644
--- a/Doc/reference/datamodel.rst
+++ b/Doc/reference/datamodel.rst
@@ -449,7 +449,7 @@ Sets
 
    These represent a mutable set. They are created by the built-in :func:`set`
    constructor and can be modified afterwards by several methods, such as
-   :meth:`add <frozenset.add>`.
+   :meth:`~set.add`.
 
 
 Frozen sets
@@ -2630,8 +2630,8 @@ Notes on using *__slots__*:
   descriptor directly from the base class). This renders the meaning of the
   program undefined.  In the future, a check may be added to prevent this.
 
-* :exc:`TypeError` will be raised if nonempty *__slots__* are defined for a
-  class derived from a
+* :exc:`TypeError` will be raised if *__slots__* other than *__dict__* and
+  *__weakref__* are defined for a class derived from a
   :c:member:`"variable-length" built-in type <PyTypeObject.tp_itemsize>` such as
   :class:`int`, :class:`bytes`, and :class:`tuple`.
 
@@ -2656,6 +2656,10 @@ Notes on using *__slots__*:
   of the iterator's values. However, the *__slots__* attribute will be an empty
   iterator.
 
+.. versionchanged:: next
+   Allowed defining the *__dict__* and *__weakref__* *__slots__* for any class.
+
+
 .. _class-customization:
 
 Customizing class creation
diff --git a/Doc/reference/expressions.rst b/Doc/reference/expressions.rst
index c655d6c52ec..165dfa69f88 100644
--- a/Doc/reference/expressions.rst
+++ b/Doc/reference/expressions.rst
@@ -174,7 +174,7 @@ Formally:
 .. grammar-snippet::
    :group: python-grammar
 
-   strings: ( `STRING` | fstring)+ | tstring+
+   strings: ( `STRING` | `fstring`)+ | `tstring`+
 
 This feature is defined at the syntactical level, so it only works with literals.
 To concatenate string expressions at run time, the '+' operator may be used::
diff --git a/Doc/reference/lexical_analysis.rst b/Doc/reference/lexical_analysis.rst
index ea386706e8b..9322d8571f7 100644
--- a/Doc/reference/lexical_analysis.rst
+++ b/Doc/reference/lexical_analysis.rst
@@ -345,7 +345,15 @@ Whitespace between tokens
 
 Except at the beginning of a logical line or in string literals, the whitespace
 characters space, tab and formfeed can be used interchangeably to separate
-tokens.  Whitespace is needed between two tokens only if their concatenation
+tokens:
+
+.. grammar-snippet::
+   :group: python-grammar
+
+   whitespace:  ' ' | tab | formfeed
+
+
+Whitespace is needed between two tokens only if their concatenation
 could otherwise be interpreted as a different token. For example, ``ab`` is one
 token, but ``a b`` is two tokens. However, ``+a`` and ``+ a`` both produce
 two tokens, ``+`` and ``a``, as ``+a`` is not a valid token.
@@ -386,73 +394,29 @@ Names (identifiers and keywords)
 :data:`~token.NAME` tokens represent *identifiers*, *keywords*, and
 *soft keywords*.
 
-Within the ASCII range (U+0001..U+007F), the valid characters for names
-include the uppercase and lowercase letters (``A-Z`` and ``a-z``),
-the underscore ``_`` and, except for the first character, the digits
-``0`` through ``9``.
+Names are composed of the following characters:
+
+* uppercase and lowercase letters (``A-Z`` and ``a-z``),
+* the underscore (``_``),
+* digits (``0`` through ``9``), which cannot appear as the first character, and
+* non-ASCII characters. Valid names may only contain "letter-like" and
+  "digit-like" characters; see :ref:`lexical-names-nonascii` for details.
 
 Names must contain at least one character, but have no upper length limit.
 Case is significant.
 
-Besides ``A-Z``, ``a-z``, ``_`` and ``0-9``, names can also use "letter-like"
-and "number-like" characters from outside the ASCII range, as detailed below.
-
-All identifiers are converted into the `normalization form`_ NFKC while
-parsing; comparison of identifiers is based on NFKC.
-
-Formally, the first character of a normalized identifier must belong to the
-set ``id_start``, which is the union of:
-
-* Unicode category ``<Lu>`` - uppercase letters (includes ``A`` to ``Z``)
-* Unicode category ``<Ll>`` - lowercase letters (includes ``a`` to ``z``)
-* Unicode category ``<Lt>`` - titlecase letters
-* Unicode category ``<Lm>`` - modifier letters
-* Unicode category ``<Lo>`` - other letters
-* Unicode category ``<Nl>`` - letter numbers
-* {``"_"``} - the underscore
-* ``<Other_ID_Start>`` - an explicit set of characters in `PropList.txt`_
-  to support backwards compatibility
-
-The remaining characters must belong to the set ``id_continue``, which is the
-union of:
-
-* all characters in ``id_start``
-* Unicode category ``<Nd>`` - decimal numbers (includes ``0`` to ``9``)
-* Unicode category ``<Pc>`` - connector punctuations
-* Unicode category ``<Mn>`` - nonspacing marks
-* Unicode category ``<Mc>`` - spacing combining marks
-* ``<Other_ID_Continue>`` - another explicit set of characters in
-  `PropList.txt`_ to support backwards compatibility
-
-Unicode categories use the version of the Unicode Character Database as
-included in the :mod:`unicodedata` module.
-
-These sets are based on the Unicode standard annex `UAX-31`_.
-See also :pep:`3131` for further details.
-
-Even more formally, names are described by the following lexical definitions:
+Formally, names are described by the following lexical definitions:
 
 .. grammar-snippet::
    :group: python-grammar
 
-   NAME:         `xid_start` `xid_continue`*
-   id_start:     <Lu> | <Ll> | <Lt> | <Lm> | <Lo> | <Nl> | "_" | <Other_ID_Start>
-   id_continue:  `id_start` | <Nd> | <Pc> | <Mn> | <Mc> | <Other_ID_Continue>
-   xid_start:    <all characters in `id_start` whose NFKC normalization is
-                  in (`id_start` `xid_continue`*)">
-   xid_continue: <all characters in `id_continue` whose NFKC normalization is
-                  in (`id_continue`*)">
-   identifier:   <`NAME`, except keywords>
+   NAME:          `name_start` `name_continue`*
+   name_start:    "a"..."z" | "A"..."Z" | "_" | <non-ASCII character>
+   name_continue: name_start | "0"..."9"
+   identifier:    <`NAME`, except keywords>
 
-A non-normative listing of all valid identifier characters as defined by
-Unicode is available in the `DerivedCoreProperties.txt`_ file in the Unicode
-Character Database.
-
-
-.. _UAX-31: https://www.unicode.org/reports/tr31/
-.. _PropList.txt: https://www.unicode.org/Public/17.0.0/ucd/PropList.txt
-.. _DerivedCoreProperties.txt: https://www.unicode.org/Public/17.0.0/ucd/DerivedCoreProperties.txt
-.. _normalization form: https://www.unicode.org/reports/tr15/#Norm_Forms
+Note that not all names matched by this grammar are valid; see
+:ref:`lexical-names-nonascii` for details.
 
 
 .. _keywords:
@@ -555,6 +519,95 @@ characters:
    :ref:`atom-identifiers`.
 
 
+.. _lexical-names-nonascii:
+
+Non-ASCII characters in names
+-----------------------------
+
+Names that contain non-ASCII characters need additional normalization
+and validation beyond the rules and grammar explained
+:ref:`above <identifiers>`.
+For example, ``ř_1``, ``蛇``, or ``साँप``  are valid names, but ``r〰2``,
+``€``, or ``🐍`` are not.
+
+This section explains the exact rules.
+
+All names are converted into the `normalization form`_ NFKC while parsing.
+This means that, for example, some typographic variants of characters are
+converted to their "basic" form. For example, ``ﬁⁿₐˡᵢᶻₐᵗᵢᵒₙ`` normalizes to
+``finalization``, so Python treats them as the same name::
+
+   >>> ﬁⁿₐˡᵢᶻₐᵗᵢᵒₙ = 3
+   >>> finalization
+   3
+
+.. note::
+
+   Normalization is done at the lexical level only.
+   Run-time functions that take names as *strings* generally do not normalize
+   their arguments.
+   For example, the variable defined above is accessible at run time in the
+   :func:`globals` dictionary as ``globals()["finalization"]`` but not
+   ``globals()["ﬁⁿₐˡᵢᶻₐᵗᵢᵒₙ"]``.
+
+Similarly to how ASCII-only names must contain only letters, digits and
+the underscore, and cannot start with a digit, a valid name must
+start with a character in the "letter-like" set ``xid_start``,
+and the remaining characters must be in the "letter- and digit-like" set
+``xid_continue``.
+
+These sets based on the *XID_Start* and *XID_Continue* sets as defined by the
+Unicode standard annex `UAX-31`_.
+Python's ``xid_start`` additionally includes the underscore (``_``).
+Note that Python does not necessarily conform to `UAX-31`_.
+
+A non-normative listing of characters in the *XID_Start* and *XID_Continue*
+sets as defined by Unicode is available in the `DerivedCoreProperties.txt`_
+file in the Unicode Character Database.
+For reference, the construction rules for the ``xid_*`` sets are given below.
+
+The set ``id_start`` is defined as the union of:
+
+* Unicode category ``<Lu>`` - uppercase letters (includes ``A`` to ``Z``)
+* Unicode category ``<Ll>`` - lowercase letters (includes ``a`` to ``z``)
+* Unicode category ``<Lt>`` - titlecase letters
+* Unicode category ``<Lm>`` - modifier letters
+* Unicode category ``<Lo>`` - other letters
+* Unicode category ``<Nl>`` - letter numbers
+* {``"_"``} - the underscore
+* ``<Other_ID_Start>`` - an explicit set of characters in `PropList.txt`_
+  to support backwards compatibility
+
+The set ``xid_start`` then closes this set under NFKC normalization, by
+removing all characters whose normalization is not of the form
+``id_start id_continue*``.
+
+The set ``id_continue`` is defined as the union of:
+
+* ``id_start`` (see above)
+* Unicode category ``<Nd>`` - decimal numbers (includes ``0`` to ``9``)
+* Unicode category ``<Pc>`` - connector punctuations
+* Unicode category ``<Mn>`` - nonspacing marks
+* Unicode category ``<Mc>`` - spacing combining marks
+* ``<Other_ID_Continue>`` - another explicit set of characters in
+  `PropList.txt`_ to support backwards compatibility
+
+Again, ``xid_continue`` closes this set under NFKC normalization.
+
+Unicode categories use the version of the Unicode Character Database as
+included in the :mod:`unicodedata` module.
+
+.. _UAX-31: https://www.unicode.org/reports/tr31/
+.. _PropList.txt: https://www.unicode.org/Public/17.0.0/ucd/PropList.txt
+.. _DerivedCoreProperties.txt: https://www.unicode.org/Public/17.0.0/ucd/DerivedCoreProperties.txt
+.. _normalization form: https://www.unicode.org/reports/tr15/#Norm_Forms
+
+.. seealso::
+
+   * :pep:`3131` -- Supporting Non-ASCII Identifiers
+   * :pep:`672` -- Unicode-related Security Considerations for Python
+
+
 .. _literals:
 
 Literals
@@ -987,124 +1040,59 @@ f-strings
 ---------
 
 .. versionadded:: 3.6
+.. versionchanged:: 3.7
+   The :keyword:`await` and :keyword:`async for` can be used in expressions
+   within f-strings.
+.. versionchanged:: 3.8
+   Added the debug specifier (``=``)
+.. versionchanged:: 3.12
+   Many restrictions on expressions within f-strings have been removed.
+   Notably, nested strings, comments, and backslashes are now permitted.
 
 A :dfn:`formatted string literal` or :dfn:`f-string` is a string literal
-that is prefixed with '``f``' or '``F``'.  These strings may contain
-replacement fields, which are expressions delimited by curly braces ``{}``.
-While other string literals always have a constant value, formatted strings
-are really expressions evaluated at run time.
+that is prefixed with '``f``' or '``F``'.
+Unlike other string literals, f-strings do not have a constant value.
+They may contain *replacement fields* delimited by curly braces ``{}``.
+Replacement fields contain expressions which are evaluated at run time.
+For example::
 
-Escape sequences are decoded like in ordinary string literals (except when
-a literal is also marked as a raw string).  After decoding, the grammar
-for the contents of the string is:
+   >>> who = 'nobody'
+   >>> nationality = 'Spanish'
+   >>> f'{who.title()} expects the {nationality} Inquisition!'
+   'Nobody expects the Spanish Inquisition!'
 
-.. productionlist:: python-grammar
-   f_string: (`literal_char` | "{{" | "}}" | `replacement_field`)*
-   replacement_field: "{" `f_expression` ["="] ["!" `conversion`] [":" `format_spec`] "}"
-   f_expression: (`conditional_expression` | "*" `or_expr`)
-               :   ("," `conditional_expression` | "," "*" `or_expr`)* [","]
-               : | `yield_expression`
-   conversion: "s" | "r" | "a"
-   format_spec: (`literal_char` | `replacement_field`)*
-   literal_char: <any code point except "{", "}" or NULL>
+Any doubled curly braces (``{{`` or ``}}``) outside replacement fields
+are replaced with the corresponding single curly brace::
 
-The parts of the string outside curly braces are treated literally,
-except that any doubled curly braces ``'{{'`` or ``'}}'`` are replaced
-with the corresponding single curly brace.  A single opening curly
-bracket ``'{'`` marks a replacement field, which starts with a
-Python expression. To display both the expression text and its value after
-evaluation, (useful in debugging), an equal sign ``'='`` may be added after the
-expression. A conversion field, introduced by an exclamation point ``'!'`` may
-follow.  A format specifier may also be appended, introduced by a colon ``':'``.
-A replacement field ends with a closing curly bracket ``'}'``.
+   >>> print(f'{{...}}')
+   {...}
+
+Other characters outside replacement fields are treated like in ordinary
+string literals.
+This means that escape sequences are decoded (except when a literal is
+also marked as a raw string), and newlines are possible in triple-quoted
+f-strings::
+
+   >>> name = 'Galahad'
+   >>> favorite_color = 'blue'
+   >>> print(f'{name}:\t{favorite_color}')
+   Galahad:       blue
+   >>> print(rf"C:\Users\{name}")
+   C:\Users\Galahad
+   >>> print(f'''Three shall be the number of the counting
+   ... and the number of the counting shall be three.''')
+   Three shall be the number of the counting
+   and the number of the counting shall be three.
 
 Expressions in formatted string literals are treated like regular
-Python expressions surrounded by parentheses, with a few exceptions.
-An empty expression is not allowed, and both :keyword:`lambda`  and
-assignment expressions ``:=`` must be surrounded by explicit parentheses.
+Python expressions.
 Each expression is evaluated in the context where the formatted string literal
-appears, in order from left to right.  Replacement expressions can contain
-newlines in both single-quoted and triple-quoted f-strings and they can contain
-comments.  Everything that comes after a ``#`` inside a replacement field
-is a comment (even closing braces and quotes). In that case, replacement fields
-must be closed in a different line.
-
-.. code-block:: text
-
-   >>> f"abc{a # This is a comment }"
-   ... + 3}"
-   'abc5'
-
-.. versionchanged:: 3.7
-   Prior to Python 3.7, an :keyword:`await` expression and comprehensions
-   containing an :keyword:`async for` clause were illegal in the expressions
-   in formatted string literals due to a problem with the implementation.
-
-.. versionchanged:: 3.12
-   Prior to Python 3.12, comments were not allowed inside f-string replacement
-   fields.
-
-When the equal sign ``'='`` is provided, the output will have the expression
-text, the ``'='`` and the evaluated value. Spaces after the opening brace
-``'{'``, within the expression and after the ``'='`` are all retained in the
-output. By default, the ``'='`` causes the :func:`repr` of the expression to be
-provided, unless there is a format specified. When a format is specified it
-defaults to the :func:`str` of the expression unless a conversion ``'!r'`` is
-declared.
-
-.. versionadded:: 3.8
-   The equal sign ``'='``.
-
-If a conversion is specified, the result of evaluating the expression
-is converted before formatting.  Conversion ``'!s'`` calls :func:`str` on
-the result, ``'!r'`` calls :func:`repr`, and ``'!a'`` calls :func:`ascii`.
-
-The result is then formatted using the :func:`format` protocol.  The
-format specifier is passed to the :meth:`~object.__format__` method of the
-expression or conversion result.  An empty string is passed when the
-format specifier is omitted.  The formatted result is then included in
-the final value of the whole string.
-
-Top-level format specifiers may include nested replacement fields. These nested
-fields may include their own conversion fields and :ref:`format specifiers
-<formatspec>`, but may not include more deeply nested replacement fields. The
-:ref:`format specifier mini-language <formatspec>` is the same as that used by
-the :meth:`str.format` method.
-
-Formatted string literals may be concatenated, but replacement fields
-cannot be split across literals.
-
-Some examples of formatted string literals::
-
-   >>> name = "Fred"
-   >>> f"He said his name is {name!r}."
-   "He said his name is 'Fred'."
-   >>> f"He said his name is {repr(name)}."  # repr() is equivalent to !r
-   "He said his name is 'Fred'."
-   >>> width = 10
-   >>> precision = 4
-   >>> value = decimal.Decimal("12.34567")
-   >>> f"result: {value:{width}.{precision}}"  # nested fields
-   'result:      12.35'
-   >>> today = datetime(year=2017, month=1, day=27)
-   >>> f"{today:%B %d, %Y}"  # using date format specifier
-   'January 27, 2017'
-   >>> f"{today=:%B %d, %Y}" # using date format specifier and debugging
-   'today=January 27, 2017'
-   >>> number = 1024
-   >>> f"{number:#0x}"  # using integer format specifier
-   '0x400'
-   >>> foo = "bar"
-   >>> f"{ foo = }" # preserves whitespace
-   " foo = 'bar'"
-   >>> line = "The mill's closed"
-   >>> f"{line = }"
-   'line = "The mill\'s closed"'
-   >>> f"{line = :20}"
-   "line = The mill's closed   "
-   >>> f"{line = !r:20}"
-   'line = "The mill\'s closed" '
+appears, in order from left to right.
+An empty expression is not allowed, and both :keyword:`lambda` and
+assignment expressions ``:=`` must be surrounded by explicit parentheses::
 
+   >>> f'{(half := 1/2)}, {half * 42}'
+   '0.5, 21.0'
 
 Reusing the outer f-string quoting type inside a replacement field is
 permitted::
@@ -1113,10 +1101,6 @@ permitted::
    >>> f"abc {a["x"]} def"
    'abc 2 def'
 
-.. versionchanged:: 3.12
-   Prior to Python 3.12, reuse of the same quoting type of the outer f-string
-   inside a replacement field was not possible.
-
 Backslashes are also allowed in replacement fields and are evaluated the same
 way as in any other context::
 
@@ -1127,23 +1111,84 @@ way as in any other context::
    b
    c
 
-.. versionchanged:: 3.12
-   Prior to Python 3.12, backslashes were not permitted inside an f-string
-   replacement field.
+It is possible to nest f-strings::
 
-Formatted string literals cannot be used as docstrings, even if they do not
-include expressions.
+   >>> name = 'world'
+   >>> f'Repeated:{f' hello {name}' * 3}'
+   'Repeated: hello world hello world hello world'
 
-::
+Portable Python programs should not use more than 5 levels of nesting.
+
+.. impl-detail::
+
+   CPython does not limit nesting of f-strings.
+
+Replacement expressions can contain newlines in both single-quoted and
+triple-quoted f-strings and they can contain comments.
+Everything that comes after a ``#`` inside a replacement field
+is a comment (even closing braces and quotes).
+This means that replacement fields with comments must be closed in a
+different line:
+
+.. code-block:: text
+
+   >>> a = 2
+   >>> f"abc{a  # This comment  }"  continues until the end of the line
+   ...       + 3}"
+   'abc5'
+
+After the expression, replacement fields may optionally contain:
+
+* a *debug specifier* -- an equal sign (``=``), optionally surrounded by
+  whitespace on one or both sides;
+* a *conversion specifier* -- ``!s``, ``!r`` or ``!a``; and/or
+* a *format specifier* prefixed with a colon (``:``).
+
+See the :ref:`Standard Library section on f-strings <stdtypes-fstrings>`
+for details on how these fields are evaluated.
+
+As that section explains, *format specifiers* are passed as the second argument
+to the :func:`format` function to format a replacement field value.
+For example, they can be used to specify a field width and padding characters
+using the :ref:`Format Specification Mini-Language <formatspec>`::
+
+   >>> number = 14.3
+   >>> f'{number:20.7f}'
+   '          14.3000000'
+
+Top-level format specifiers may include nested replacement fields::
+
+   >>> field_size = 20
+   >>> precision = 7
+   >>> f'{number:{field_size}.{precision}f}'
+   '          14.3000000'
+
+These nested fields may include their own conversion fields and
+:ref:`format specifiers <formatspec>`::
+
+   >>> number = 3
+   >>> f'{number:{field_size}}'
+   '                   3'
+   >>> f'{number:{field_size:05}}'
+   '00000000000000000003'
+
+However, these nested fields may not include more deeply nested replacement
+fields.
+
+Formatted string literals cannot be used as :term:`docstrings <docstring>`,
+even if they do not include expressions::
 
    >>> def foo():
    ...     f"Not a docstring"
    ...
-   >>> foo.__doc__ is None
-   True
+   >>> print(foo.__doc__)
+   None
 
-See also :pep:`498` for the proposal that added formatted string literals,
-and :meth:`str.format`, which uses a related format string mechanism.
+.. seealso::
+
+   * :pep:`498` -- Literal String Interpolation
+   * :pep:`701` -- Syntactic formalization of f-strings
+   * :meth:`str.format`, which uses a related format string mechanism.
 
 
 .. _t-strings:
@@ -1156,36 +1201,99 @@ t-strings
 
 A :dfn:`template string literal` or :dfn:`t-string` is a string literal
 that is prefixed with '``t``' or '``T``'.
-These strings follow the same syntax and evaluation rules as
-:ref:`formatted string literals <f-strings>`, with the following differences:
+These strings follow the same syntax rules as
+:ref:`formatted string literals <f-strings>`.
+For differences in evaluation rules, see the
+:ref:`Standard Library section on t-strings <stdtypes-tstrings>`
 
-* Rather than evaluating to a ``str`` object, template string literals evaluate
-  to a :class:`string.templatelib.Template` object.
 
-* The :func:`format` protocol is not used.
-  Instead, the format specifier and conversions (if any) are passed to
-  a new :class:`~string.templatelib.Interpolation` object that is created
-  for each evaluated expression.
-  It is up to code that processes the resulting :class:`~string.templatelib.Template`
-  object to decide how to handle format specifiers and conversions.
+Formal grammar for f-strings
+----------------------------
 
-* Format specifiers containing nested replacement fields are evaluated eagerly,
-  prior to being passed to the :class:`~string.templatelib.Interpolation` object.
-  For instance, an interpolation of the form ``{amount:.{precision}f}`` will
-  evaluate the inner expression ``{precision}`` to determine the value of the
-  ``format_spec`` attribute.
-  If ``precision`` were to be ``2``, the resulting format specifier
-  would be ``'.2f'``.
+F-strings are handled partly by the :term:`lexical analyzer`, which produces the
+tokens :py:data:`~token.FSTRING_START`, :py:data:`~token.FSTRING_MIDDLE`
+and :py:data:`~token.FSTRING_END`, and partly by the parser, which handles
+expressions in the replacement field.
+The exact way the work is split is a CPython implementation detail.
 
-* When the equals sign ``'='`` is provided in an interpolation expression,
-  the text of the expression is appended to the literal string that precedes
-  the relevant interpolation.
-  This includes the equals sign and any surrounding whitespace.
-  The :class:`!Interpolation` instance for the expression will be created as
-  normal, except that :attr:`~string.templatelib.Interpolation.conversion` will
-  be set to '``r``' (:func:`repr`) by default.
-  If an explicit conversion or format specifier are provided,
-  this will override the default behaviour.
+Correspondingly, the f-string grammar is a mix of
+:ref:`lexical and syntactic definitions <notation-lexical-vs-syntactic>`.
+
+Whitespace is significant in these situations:
+
+* There may be no whitespace in :py:data:`~token.FSTRING_START` (between
+  the prefix and quote).
+* Whitespace in :py:data:`~token.FSTRING_MIDDLE` is part of the literal
+  string contents.
+* In ``fstring_replacement_field``, if ``f_debug_specifier`` is present,
+  all whitespace after the opening brace until the ``f_debug_specifier``,
+  as well as whitespace immediatelly following ``f_debug_specifier``,
+  is retained as part of the expression.
+
+  .. impl-detail::
+
+     The expression is not handled in the tokenization phase; it is
+     retrieved from the source code using locations of the ``{`` token
+     and the token after ``=``.
+
+
+The ``FSTRING_MIDDLE`` definition uses
+:ref:`negative lookaheads <lexical-lookaheads>` (``!``)
+to indicate special characters (backslash, newline, ``{``, ``}``) and
+sequences (``f_quote``).
+
+.. grammar-snippet::
+   :group: python-grammar
+
+   fstring:    `FSTRING_START` `fstring_middle`* `FSTRING_END`
+
+   FSTRING_START:      `fstringprefix` ("'" | '"' | "'''" | '"""')
+   FSTRING_END:        `f_quote`
+   fstringprefix:      <("f" | "fr" | "rf"), case-insensitive>
+   f_debug_specifier:  '='
+   f_quote:            <the quote character(s) used in FSTRING_START>
+
+   fstring_middle:
+      | `fstring_replacement_field`
+      | `FSTRING_MIDDLE`
+   FSTRING_MIDDLE:
+      | (!"\" !`newline` !'{' !'}' !`f_quote`) `source_character`
+      | `stringescapeseq`
+      | "{{"
+      | "}}"
+      | <newline, in triple-quoted f-strings only>
+   fstring_replacement_field:
+      | '{' `f_expression` [`f_debug_specifier`] [`fstring_conversion`]
+            [`fstring_full_format_spec`] '}'
+   fstring_conversion:
+      | "!" ("s" | "r" | "a")
+   fstring_full_format_spec:
+      | ':' `fstring_format_spec`*
+   fstring_format_spec:
+      | `FSTRING_MIDDLE`
+      | `fstring_replacement_field`
+   f_expression:
+      | ','.(`conditional_expression` | "*" `or_expr`)+ [","]
+      | `yield_expression`
+
+.. note::
+
+   In the above grammar snippet, the ``f_quote`` and ``FSTRING_MIDDLE`` rules
+   are context-sensitive -- they depend on the contents of ``FSTRING_START``
+   of the nearest enclosing ``fstring``.
+
+   Constructing a more traditional formal grammar from this template is left
+   as an exercise for the reader.
+
+The grammar for t-strings is identical to the one for f-strings, with *t*
+instead of *f* at the beginning of rule and token names and in the prefix.
+
+.. grammar-snippet::
+   :group: python-grammar
+
+   tstring:    TSTRING_START tstring_middle* TSTRING_END
+
+   <rest of the t-string grammar is omitted; see above>
 
 
 .. _numbers:
diff --git a/Doc/requirements.txt b/Doc/requirements.txt
index d5f7b473c3a..716772b7f28 100644
--- a/Doc/requirements.txt
+++ b/Doc/requirements.txt
@@ -7,7 +7,7 @@
 # won't suddenly cause build failures. Updating the version is fine as long
 # as no warnings are raised by doing so.
 # Keep this version in sync with ``Doc/conf.py``.
-sphinx~=8.2.0
+sphinx~=9.0.0
 
 blurb
 
diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore
index 04e8e5580fc..c41c70f0ed3 100644
--- a/Doc/tools/.nitignore
+++ b/Doc/tools/.nitignore
@@ -6,7 +6,6 @@ Doc/c-api/descriptor.rst
 Doc/c-api/float.rst
 Doc/c-api/init_config.rst
 Doc/c-api/intro.rst
-Doc/c-api/module.rst
 Doc/c-api/stable.rst
 Doc/c-api/typeobj.rst
 Doc/library/ast.rst
@@ -29,14 +28,12 @@ Doc/library/pyexpat.rst
 Doc/library/select.rst
 Doc/library/socket.rst
 Doc/library/ssl.rst
-Doc/library/stdtypes.rst
 Doc/library/termios.rst
 Doc/library/test.rst
 Doc/library/tkinter.rst
 Doc/library/tkinter.scrolledtext.rst
 Doc/library/tkinter.ttk.rst
 Doc/library/unittest.mock.rst
-Doc/library/unittest.rst
 Doc/library/urllib.parse.rst
 Doc/library/urllib.request.rst
 Doc/library/wsgiref.rst
diff --git a/Doc/tools/extensions/glossary_search.py b/Doc/tools/extensions/glossary_search.py
index 502b6cd95bc..bcda4b7b435 100644
--- a/Doc/tools/extensions/glossary_search.py
+++ b/Doc/tools/extensions/glossary_search.py
@@ -38,7 +38,7 @@ def process_glossary_nodes(
             rendered = app.builder.render_partial(definition)
             terms[term.lower()] = {
                 'title': term,
-                'body': rendered['html_body'],
+                'body': rendered['fragment'],
             }
 
 
diff --git a/Doc/tools/extensions/pyspecific.py b/Doc/tools/extensions/pyspecific.py
index f5451adb37b..f9bf273e762 100644
--- a/Doc/tools/extensions/pyspecific.py
+++ b/Doc/tools/extensions/pyspecific.py
@@ -24,14 +24,6 @@
 # Used in conf.py and updated here by python/release-tools/run_release.py
 SOURCE_URI = 'https://github.com/python/cpython/tree/main/%s'
 
-# monkey-patch reST parser to disable alphabetic and roman enumerated lists
-from docutils.parsers.rst.states import Body
-Body.enum.converters['loweralpha'] = \
-    Body.enum.converters['upperalpha'] = \
-    Body.enum.converters['lowerroman'] = \
-    Body.enum.converters['upperroman'] = lambda x: None
-
-
 class PyAwaitableMixin(object):
     def handle_signature(self, sig, signode):
         ret = super(PyAwaitableMixin, self).handle_signature(sig, signode)
diff --git a/Doc/tools/templates/dummy.html b/Doc/tools/templates/dummy.html
index 0fdbe2a5801..75f6607d8f3 100644
--- a/Doc/tools/templates/dummy.html
+++ b/Doc/tools/templates/dummy.html
@@ -27,8 +27,8 @@
 
 In extensions/changes.py:
 
-{% trans %}Deprecated since version {deprecated}, will be removed in version {removed}{% endtrans %}
-{% trans %}Deprecated since version {deprecated}, removed in version {removed}{% endtrans %}
+{% trans %}Deprecated since version %s, will be removed in version %s{% endtrans %}
+{% trans %}Deprecated since version %s, removed in version %s{% endtrans %}
 
 In docsbuild-scripts, when rewriting indexsidebar.html with actual versions:
 
diff --git a/Doc/tutorial/introduction.rst b/Doc/tutorial/introduction.rst
index fb491149793..deabac52530 100644
--- a/Doc/tutorial/introduction.rst
+++ b/Doc/tutorial/introduction.rst
@@ -49,7 +49,7 @@ primary prompt, ``>>>``.  (It shouldn't take long.)
 Numbers
 -------
 
-The interpreter acts as a simple calculator: you can type an expression at it
+The interpreter acts as a simple calculator: you can type an expression into it
 and it will write the value.  Expression syntax is straightforward: the
 operators ``+``, ``-``, ``*`` and ``/`` can be used to perform
 arithmetic; parentheses (``()``) can be used for grouping.
diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst
index cdadbe51417..dff0fe036ea 100644
--- a/Doc/using/configure.rst
+++ b/Doc/using/configure.rst
@@ -322,6 +322,30 @@ General Options
 
    .. versionadded:: 3.11
 
+.. option:: --with-missing-stdlib-config=FILE
+
+   Path to a `JSON <https://www.json.org/json-en.html>`_ configuration file
+   containing custom error messages for missing :term:`standard library` modules.
+
+   This option is intended for Python distributors who wish to provide
+   distribution-specific guidance when users encounter standard library
+   modules that are missing or packaged separately.
+
+   The JSON file should map missing module names to custom error message strings.
+   For example, if your distribution packages :mod:`tkinter` and
+   :mod:`_tkinter` separately and excludes :mod:`!_gdbm` for legal reasons,
+   the configuration could contain:
+
+   .. code-block:: json
+
+      {
+          "_gdbm": "The '_gdbm' module is not available in this distribution",
+          "tkinter": "Install the python-tk package to use tkinter",
+          "_tkinter": "Install the python-tk package to use tkinter",
+      }
+
+   .. versionadded:: next
+
 .. option:: --enable-pystats
 
    Turn on internal Python performance statistics gathering.
diff --git a/Doc/whatsnew/2.3.rst b/Doc/whatsnew/2.3.rst
index b7e4e73f4ce..f43692b3dce 100644
--- a/Doc/whatsnew/2.3.rst
+++ b/Doc/whatsnew/2.3.rst
@@ -66,7 +66,7 @@ Here's a simple example::
 The union and intersection of sets can be computed with the :meth:`~frozenset.union` and
 :meth:`~frozenset.intersection` methods; an alternative notation uses the bitwise operators
 ``&`` and ``|``. Mutable sets also have in-place versions of these methods,
-:meth:`!union_update` and :meth:`~frozenset.intersection_update`. ::
+:meth:`!union_update` and :meth:`~set.intersection_update`. ::
 
    >>> S1 = sets.Set([1,2,3])
    >>> S2 = sets.Set([4,5,6])
@@ -87,7 +87,7 @@ It's also possible to take the symmetric difference of two sets.  This is the
 set of all elements in the union that aren't in the intersection.  Another way
 of putting it is that the symmetric difference contains all elements that are in
 exactly one set.  Again, there's an alternative notation (``^``), and an
-in-place version with the ungainly name :meth:`~frozenset.symmetric_difference_update`. ::
+in-place version with the ungainly name :meth:`~set.symmetric_difference_update`. ::
 
    >>> S1 = sets.Set([1,2,3,4])
    >>> S2 = sets.Set([3,4,5,6])
diff --git a/Doc/whatsnew/2.5.rst b/Doc/whatsnew/2.5.rst
index 3430ac8668e..e195d9d462d 100644
--- a/Doc/whatsnew/2.5.rst
+++ b/Doc/whatsnew/2.5.rst
@@ -2169,9 +2169,9 @@ Changes to Python's build process and to the C API include:
 
 * Two new macros can be used to indicate C functions that are local to the
   current file so that a faster calling convention can be used.
-  ``Py_LOCAL(type)`` declares the function as returning a value of the
+  :c:macro:`Py_LOCAL` declares the function as returning a value of the
   specified *type* and uses a fast-calling qualifier.
-  ``Py_LOCAL_INLINE(type)`` does the same thing and also requests the
+  :c:macro:`Py_LOCAL_INLINE` does the same thing and also requests the
   function be inlined.  If macro :c:macro:`!PY_LOCAL_AGGRESSIVE` is defined before
   :file:`python.h` is included, a set of more aggressive optimizations are enabled
   for the module; you should benchmark the results to find out if these
diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst
index 24cc7e2d7eb..1bd82545e58 100644
--- a/Doc/whatsnew/3.15.rst
+++ b/Doc/whatsnew/3.15.rst
@@ -66,7 +66,7 @@ Summary -- Release highlights
 .. PEP-sized items next.
 
 * :pep:`799`: :ref:`A dedicated profiling package for organizing Python
-  profiling tools <whatsnew315-sampling-profiler>`
+  profiling tools <whatsnew315-profiling-package>`
 * :pep:`686`: :ref:`Python now uses UTF-8 as the default encoding
   <whatsnew315-utf8-default>`
 * :pep:`782`: :ref:`A new PyBytesWriter C API to create a Python bytes object
@@ -77,12 +77,32 @@ Summary -- Release highlights
 New features
 ============
 
+.. _whatsnew315-profiling-package:
+
+:pep:`799`: A dedicated profiling package
+-----------------------------------------
+
+A new :mod:`!profiling` module has been added to organize Python's built-in
+profiling tools under a single, coherent namespace. This module contains:
+
+* :mod:`!profiling.tracing`: deterministic function-call tracing (relocated from
+  :mod:`cProfile`).
+* :mod:`!profiling.sampling`: a new statistical sampling profiler (named Tachyon).
+
+The :mod:`cProfile` module remains as an alias for backwards compatibility.
+The :mod:`profile` module is deprecated and will be removed in Python 3.17.
+
+.. seealso:: :pep:`799` for further details.
+
+(Contributed by Pablo Galindo and László Kiss Kollár in :gh:`138122`.)
+
+
 .. _whatsnew315-sampling-profiler:
 
-:pep:`799`: High frequency statistical sampling profiler
---------------------------------------------------------
+Tachyon: High frequency statistical sampling profiler
+-----------------------------------------------------
 
-A new statistical sampling profiler has been added to the new :mod:`!profiling` module as
+A new statistical sampling profiler (Tachyon) has been added as
 :mod:`!profiling.sampling`. This profiler enables low-overhead performance analysis of
 running Python processes without requiring code modification or process restart.
 
@@ -91,101 +111,64 @@ every function call, the sampling profiler periodically captures stack traces fr
 running processes.  This approach provides virtually zero overhead while achieving
 sampling rates of **up to 1,000,000 Hz**, making it the fastest sampling profiler
 available for Python (at the time of its contribution) and ideal for debugging
-performance issues in production environments.
+performance issues in production environments. This capability is particularly
+valuable for debugging performance issues in production systems where traditional
+profiling approaches would be too intrusive.
 
 Key features include:
 
 * **Zero-overhead profiling**: Attach to any running Python process without
-  affecting its performance
-* **No code modification required**: Profile existing applications without restart
-* **Real-time statistics**: Monitor sampling quality during data collection
-* **Multiple output formats**: Generate both detailed statistics and flamegraph data
-* **Thread-aware profiling**: Option to profile all threads or just the main thread
+  affecting its performance. Ideal for production debugging where you can't afford
+  to restart or slow down your application.
 
-Profile process 1234 for 10 seconds with default settings:
+* **No code modification required**: Profile existing applications without restart.
+  Simply point the profiler at a running process by PID and start collecting data.
 
-.. code-block:: shell
+* **Flexible target modes**:
 
-  python -m profiling.sampling 1234
+  * Profile running processes by PID (``attach``) - attach to already-running applications
+  * Run and profile scripts directly (``run``) - profile from the very start of execution
+  * Execute and profile modules (``run -m``) - profile packages run as ``python -m module``
 
-Profile with custom interval and duration, save to file:
+* **Multiple profiling modes**: Choose what to measure based on your performance investigation:
 
-.. code-block:: shell
+  * **Wall-clock time** (``--mode wall``, default): Measures real elapsed time including I/O,
+    network waits, and blocking operations. Use this to understand where your program spends
+    calendar time, including when waiting for external resources.
+  * **CPU time** (``--mode cpu``): Measures only active CPU execution time, excluding I/O waits
+    and blocking. Use this to identify CPU-bound bottlenecks and optimize computational work.
+  * **GIL-holding time** (``--mode gil``): Measures time spent holding Python's Global Interpreter
+    Lock. Use this to identify which threads dominate GIL usage in multi-threaded applications.
 
-  python -m profiling.sampling -i 50 -d 30 -o profile.stats 1234
+* **Thread-aware profiling**: Option to profile all threads (``-a``) or just the main thread,
+  essential for understanding multi-threaded application behavior.
 
-Generate collapsed stacks for flamegraph:
+* **Multiple output formats**: Choose the visualization that best fits your workflow:
 
-.. code-block:: shell
+  * ``--pstats``: Detailed tabular statistics compatible with :mod:`pstats`. Shows function-level
+    timing with direct and cumulative samples. Best for detailed analysis and integration with
+    existing Python profiling tools.
+  * ``--collapsed``: Generates collapsed stack traces (one line per stack). This format is
+    specifically designed for creating flamegraphs with external tools like Brendan Gregg's
+    FlameGraph scripts or speedscope.
+  * ``--flamegraph``: Generates a self-contained interactive HTML flamegraph using D3.js.
+    Opens directly in your browser for immediate visual analysis. Flamegraphs show the call
+    hierarchy where width represents time spent, making it easy to spot bottlenecks at a glance.
+  * ``--gecko``: Generates Gecko Profiler format compatible with Firefox Profiler
+    (https://profiler.firefox.com). Upload the output to Firefox Profiler for advanced
+    timeline-based analysis with features like stack charts, markers, and network activity.
+  * ``--heatmap``: Generates an interactive HTML heatmap visualization with line-level sample
+    counts. Creates a directory with per-file heatmaps showing exactly where time is spent
+    at the source code level.
 
-  python -m profiling.sampling --collapsed 1234
+* **Live interactive mode**: Real-time TUI profiler with a top-like interface (``--live``).
+  Monitor performance as your application runs with interactive sorting and filtering.
 
-Profile all threads and sort by total time:
+* **Async-aware profiling**: Profile async/await code with task-based stack reconstruction
+  (``--async-aware``). See which coroutines are consuming time, with options to show only
+  running tasks or all tasks including those waiting.
 
-.. code-block:: shell
-
-  python -m profiling.sampling -a --sort-tottime 1234
-
-The profiler generates statistical estimates of where time is spent:
-
-.. code-block:: text
-
-  Real-time sampling stats: Mean: 100261.5Hz (9.97µs) Min: 86333.4Hz (11.58µs) Max: 118807.2Hz (8.42µs) Samples: 400001
-  Captured 498841 samples in 5.00 seconds
-  Sample rate: 99768.04 samples/sec
-  Error rate: 0.72%
-  Profile Stats:
-        nsamples   sample%   tottime (s)    cumul%   cumtime (s)  filename:lineno(function)
-        43/418858       0.0         0.000      87.9         4.189  case.py:667(TestCase.run)
-      3293/418812       0.7         0.033      87.9         4.188  case.py:613(TestCase._callTestMethod)
-    158562/158562      33.3         1.586      33.3         1.586  test_compile.py:725(TestSpecifics.test_compiler_recursion_limit.<locals>.check_limit)
-    129553/129553      27.2         1.296      27.2         1.296  ast.py:46(parse)
-        0/128129       0.0         0.000      26.9         1.281  test_ast.py:884(AST_Tests.test_ast_recursion_limit.<locals>.check_limit)
-          7/67446       0.0         0.000      14.2         0.674  test_compile.py:729(TestSpecifics.test_compiler_recursion_limit)
-          6/60380       0.0         0.000      12.7         0.604  test_ast.py:888(AST_Tests.test_ast_recursion_limit)
-          3/50020       0.0         0.000      10.5         0.500  test_compile.py:727(TestSpecifics.test_compiler_recursion_limit)
-          1/38011       0.0         0.000       8.0         0.380  test_ast.py:886(AST_Tests.test_ast_recursion_limit)
-          1/25076       0.0         0.000       5.3         0.251  test_compile.py:728(TestSpecifics.test_compiler_recursion_limit)
-      22361/22362       4.7         0.224       4.7         0.224  test_compile.py:1368(TestSpecifics.test_big_dict_literal)
-          4/18008       0.0         0.000       3.8         0.180  test_ast.py:889(AST_Tests.test_ast_recursion_limit)
-        11/17696       0.0         0.000       3.7         0.177  subprocess.py:1038(Popen.__init__)
-      16968/16968       3.6         0.170       3.6         0.170  subprocess.py:1900(Popen._execute_child)
-          2/16941       0.0         0.000       3.6         0.169  test_compile.py:730(TestSpecifics.test_compiler_recursion_limit)
-
-  Legend:
-    nsamples: Direct/Cumulative samples (direct executing / on call stack)
-    sample%: Percentage of total samples this function was directly executing
-    tottime: Estimated total time spent directly in this function
-    cumul%: Percentage of total samples when this function was on the call stack
-    cumtime: Estimated cumulative time (including time in called functions)
-    filename:lineno(function): Function location and name
-
-  Summary of Interesting Functions:
-
-  Functions with Highest Direct/Cumulative Ratio (Hot Spots):
-    1.000 direct/cumulative ratio, 33.3% direct samples: test_compile.py:(TestSpecifics.test_compiler_recursion_limit.<locals>.check_limit)
-    1.000 direct/cumulative ratio, 27.2% direct samples: ast.py:(parse)
-    1.000 direct/cumulative ratio, 3.6% direct samples: subprocess.py:(Popen._execute_child)
-
-  Functions with Highest Call Frequency (Indirect Calls):
-    418815 indirect calls, 87.9% total stack presence: case.py:(TestCase.run)
-    415519 indirect calls, 87.9% total stack presence: case.py:(TestCase._callTestMethod)
-    159470 indirect calls, 33.5% total stack presence: test_compile.py:(TestSpecifics.test_compiler_recursion_limit)
-
-  Functions with Highest Call Magnification (Cumulative/Direct):
-    12267.9x call magnification, 159470 indirect calls from 13 direct: test_compile.py:(TestSpecifics.test_compiler_recursion_limit)
-    10581.7x call magnification, 116388 indirect calls from 11 direct: test_ast.py:(AST_Tests.test_ast_recursion_limit)
-    9740.9x call magnification, 418815 indirect calls from 43 direct: case.py:(TestCase.run)
-
-The profiler automatically identifies performance bottlenecks through statistical
-analysis, highlighting functions with high CPU usage and call frequency patterns.
-
-This capability is particularly valuable for debugging performance issues in
-production systems where traditional profiling approaches would be too intrusive.
-
-  .. seealso:: :pep:`799` for further details.
-
-(Contributed by Pablo Galindo and László Kiss Kollár in :gh:`135953`.)
+(Contributed by Pablo Galindo and László Kiss Kollár in :gh:`135953` and :gh:`138122`.)
 
 
 .. _whatsnew315-improved-error-messages:
@@ -394,6 +377,10 @@ Other language changes
   syntax warnings by module name.
   (Contributed by Serhiy Storchaka in :gh:`135801`.)
 
+* Allowed defining the *__dict__* and *__weakref__* :ref:`__slots__ <slots>`
+  for any class.
+  (Contributed by Serhiy Storchaka in :gh:`41779`.)
+
 
 New modules
 ===========
@@ -412,6 +399,10 @@ Improved modules
 argparse
 --------
 
+* The :class:`~argparse.BooleanOptionalAction` action supports now single-dash
+  long options and alternate prefix characters.
+  (Contributed by Serhiy Storchaka in :gh:`138525`.)
+
 * Changed the *suggest_on_error* parameter of :class:`argparse.ArgumentParser` to
   default to ``True``. This enables suggestions for mistyped arguments by default.
   (Contributed by Jakob Schluse in :gh:`140450`.)
@@ -1084,19 +1075,23 @@ New features
 
   (Contributed by Victor Stinner in :gh:`129813`.)
 
+* Add a new :c:func:`PyImport_CreateModuleFromInitfunc` C-API for creating
+  a module from a *spec* and *initfunc*.
+  (Contributed by Itamar Oren in :gh:`116146`.)
+
 * Add :c:func:`PyTuple_FromArray` to create a :class:`tuple` from an array.
   (Contributed by Victor Stinner in :gh:`111489`.)
 
+* Add :c:func:`PyUnstable_Object_Dump` to dump an object to ``stderr``.
+  It should only be used for debugging.
+  (Contributed by Victor Stinner in :gh:`141070`.)
+
 * Add :c:func:`PyUnstable_ThreadState_SetStackProtection` and
   :c:func:`PyUnstable_ThreadState_ResetStackProtection` functions to set
   the stack protection base address and stack protection size of a Python
   thread state.
   (Contributed by Victor Stinner in :gh:`139653`.)
 
-* Add a new :c:func:`PyImport_CreateModuleFromInitfunc` C-API for creating
-  a module from a *spec* and *initfunc*.
-  (Contributed by Itamar Oren in :gh:`116146`.)
-
 
 Changed C APIs
 --------------
@@ -1235,6 +1230,12 @@ Build changes
   set to ``no`` or with :option:`!--without-system-libmpdec`.
   (Contributed by Sergey B Kirpichev in :gh:`115119`.)
 
+* The new configure option :option:`--with-missing-stdlib-config=FILE` allows
+  distributors to pass a `JSON <https://www.json.org/json-en.html>`_
+  configuration file containing custom error messages for :term:`standard library`
+  modules that are missing or packaged separately.
+  (Contributed by Stan Ulbrych and Petr Viktorin in :gh:`139707`.)
+
 
 Porting to Python 3.15
 ======================
@@ -1267,3 +1268,10 @@ that may require changes to your code.
   Use its :meth:`!close` method or the :func:`contextlib.closing` context
   manager to close it.
   (Contributed by Osama Abdelkader and Serhiy Storchaka in :gh:`140601`.)
+
+* If a short option and a single-dash long option are passed to
+  :meth:`argparse.ArgumentParser.add_argument`, *dest* is now inferred from
+  the single-dash long option. For example, in ``add_argument('-f', '-foo')``,
+  *dest* is now ``'foo'`` instead of ``'f'``.
+  Pass an explicit *dest* argument to preserve the old behavior.
+  (Contributed by Serhiy Storchaka in :gh:`138697`.)
diff --git a/Doc/whatsnew/3.4.rst b/Doc/whatsnew/3.4.rst
index 59afd6520c4..9f4068116e3 100644
--- a/Doc/whatsnew/3.4.rst
+++ b/Doc/whatsnew/3.4.rst
@@ -2,7 +2,7 @@
   What's New In Python 3.4
 ****************************
 
-:Author: R. David Murray <rdmurray@bitdance.com> (Editor)
+:Author: \R. David Murray <rdmurray@bitdance.com> (Editor)
 
 .. Rules for maintenance:
 
diff --git a/Include/cpython/dictobject.h b/Include/cpython/dictobject.h
index df9ec7050fc..5f2f7b6d4f5 100644
--- a/Include/cpython/dictobject.h
+++ b/Include/cpython/dictobject.h
@@ -39,16 +39,6 @@ Py_DEPRECATED(3.14) PyAPI_FUNC(PyObject *) _PyDict_GetItemStringWithError(PyObje
 PyAPI_FUNC(PyObject *) PyDict_SetDefault(
     PyObject *mp, PyObject *key, PyObject *defaultobj);
 
-// Inserts `key` with a value `default_value`, if `key` is not already present
-// in the dictionary.  If `result` is not NULL, then the value associated
-// with `key` is returned in `*result` (either the existing value, or the now
-// inserted `default_value`).
-// Returns:
-//   -1 on error
-//    0 if `key` was not present and `default_value` was inserted
-//    1 if `key` was present and `default_value` was not inserted
-PyAPI_FUNC(int) PyDict_SetDefaultRef(PyObject *mp, PyObject *key, PyObject *default_value, PyObject **result);
-
 /* Get the number of items of a dictionary. */
 static inline Py_ssize_t PyDict_GET_SIZE(PyObject *op) {
     PyDictObject *mp;
diff --git a/Include/cpython/object.h b/Include/cpython/object.h
index d64298232e7..8693390aeda 100644
--- a/Include/cpython/object.h
+++ b/Include/cpython/object.h
@@ -295,7 +295,10 @@ PyAPI_FUNC(PyObject *) PyType_GetDict(PyTypeObject *);
 
 PyAPI_FUNC(int) PyObject_Print(PyObject *, FILE *, int);
 PyAPI_FUNC(void) _Py_BreakPoint(void);
-PyAPI_FUNC(void) _PyObject_Dump(PyObject *);
+PyAPI_FUNC(void) PyUnstable_Object_Dump(PyObject *);
+
+// Alias for backward compatibility
+#define _PyObject_Dump PyUnstable_Object_Dump
 
 PyAPI_FUNC(PyObject*) _PyObject_GetAttrId(PyObject *, _Py_Identifier *);
 
@@ -387,10 +390,11 @@ PyAPI_FUNC(PyObject *) _PyObject_FunctionStr(PyObject *);
    process with a message on stderr if the given condition fails to hold,
    but compile away to nothing if NDEBUG is defined.
 
-   However, before aborting, Python will also try to call _PyObject_Dump() on
-   the given object.  This may be of use when investigating bugs in which a
-   particular object is corrupt (e.g. buggy a tp_visit method in an extension
-   module breaking the garbage collector), to help locate the broken objects.
+   However, before aborting, Python will also try to call
+   PyUnstable_Object_Dump() on the given object. This may be of use when
+   investigating bugs in which a particular object is corrupt (e.g. buggy a
+   tp_visit method in an extension module breaking the garbage collector), to
+   help locate the broken objects.
 
    The WITH_MSG variant allows you to supply an additional message that Python
    will attempt to print to stderr, after the object dump. */
@@ -432,8 +436,6 @@ PyAPI_FUNC(void) _Py_NO_RETURN _PyObject_AssertFailed(
 PyAPI_FUNC(void) _PyTrash_thread_deposit_object(PyThreadState *tstate, PyObject *op);
 PyAPI_FUNC(void) _PyTrash_thread_destroy_chain(PyThreadState *tstate);
 
-PyAPI_FUNC(int) _Py_ReachedRecursionLimitWithMargin(PyThreadState *tstate, int margin_count);
-
 /* For backwards compatibility with the old trashcan mechanism */
 #define Py_TRASHCAN_BEGIN(op, dealloc)
 #define Py_TRASHCAN_END
diff --git a/Include/cpython/pyerrors.h b/Include/cpython/pyerrors.h
index 6b63d304b0d..be2e3b641c2 100644
--- a/Include/cpython/pyerrors.h
+++ b/Include/cpython/pyerrors.h
@@ -18,6 +18,7 @@ typedef struct {
     PyException_HEAD
     PyObject *msg;
     PyObject *excs;
+    PyObject *excs_str;
 } PyBaseExceptionGroupObject;
 
 typedef struct {
diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h
index 1e1e46ea4c0..22df26bd37a 100644
--- a/Include/cpython/pystate.h
+++ b/Include/cpython/pystate.h
@@ -135,6 +135,15 @@ struct _ts {
     /* Pointer to currently executing frame. */
     struct _PyInterpreterFrame *current_frame;
 
+    /* Pointer to the base frame (bottommost sentinel frame).
+       Used by profilers to validate complete stack unwinding.
+       Points to the embedded base_frame in _PyThreadStateImpl.
+       The frame is embedded there rather than here because _PyInterpreterFrame
+       is defined in internal headers that cannot be exposed in the public API. */
+    struct _PyInterpreterFrame *base_frame;
+
+    struct _PyInterpreterFrame *last_profiled_frame;
+
     Py_tracefunc c_profilefunc;
     Py_tracefunc c_tracefunc;
     PyObject *c_profileobj;
diff --git a/Include/dictobject.h b/Include/dictobject.h
index 1bbeec1ab69..0384e3131dc 100644
--- a/Include/dictobject.h
+++ b/Include/dictobject.h
@@ -68,6 +68,18 @@ PyAPI_FUNC(int) PyDict_GetItemRef(PyObject *mp, PyObject *key, PyObject **result
 PyAPI_FUNC(int) PyDict_GetItemStringRef(PyObject *mp, const char *key, PyObject **result);
 #endif
 
+#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030F0000
+// Inserts `key` with a value `default_value`, if `key` is not already present
+// in the dictionary.  If `result` is not NULL, then the value associated
+// with `key` is returned in `*result` (either the existing value, or the now
+// inserted `default_value`).
+// Returns:
+//   -1 on error
+//    0 if `key` was not present and `default_value` was inserted
+//    1 if `key` was present and `default_value` was not inserted
+PyAPI_FUNC(int) PyDict_SetDefaultRef(PyObject *mp, PyObject *key, PyObject *default_value, PyObject **result);
+#endif
+
 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030A0000
 PyAPI_FUNC(PyObject *) PyObject_GenericGetDict(PyObject *, void *);
 #endif
diff --git a/Include/internal/pycore_backoff.h b/Include/internal/pycore_backoff.h
index 71066f1bd9f..7f60eb49508 100644
--- a/Include/internal/pycore_backoff.h
+++ b/Include/internal/pycore_backoff.h
@@ -22,33 +22,48 @@ extern "C" {
    Another use is for the Tier 2 optimizer to decide when to create
    a new Tier 2 trace (executor). Again, exponential backoff is used.
 
-   The 16-bit counter is structured as a 12-bit unsigned 'value'
-   and a 4-bit 'backoff' field. When resetting the counter, the
+   The 16-bit counter is structured as a 13-bit unsigned 'value'
+   and a 3-bit 'backoff' field. When resetting the counter, the
    backoff field is incremented (until it reaches a limit) and the
-   value is set to a bit mask representing the value 2**backoff - 1.
-   The maximum backoff is 12 (the number of bits in the value).
+   value is set to a bit mask representing some prime value - 1.
+   New values and backoffs for each backoff are calculated once
+   at compile time and saved to value_and_backoff_next table.
+   The maximum backoff is 6, since 7 is an UNREACHABLE_BACKOFF.
 
    There is an exceptional value which must not be updated, 0xFFFF.
 */
 
-#define BACKOFF_BITS 4
-#define MAX_BACKOFF 12
-#define UNREACHABLE_BACKOFF 15
+#define BACKOFF_BITS 3
+#define BACKOFF_MASK 7
+#define MAX_BACKOFF 6
+#define UNREACHABLE_BACKOFF 7
+#define MAX_VALUE 0x1FFF
 
-static inline bool
-is_unreachable_backoff_counter(_Py_BackoffCounter counter)
-{
-    return counter.value_and_backoff == UNREACHABLE_BACKOFF;
-}
+#define MAKE_VALUE_AND_BACKOFF(value, backoff) \
+    ((value << BACKOFF_BITS) | backoff)
+
+// For previous backoff b we use value x such that
+// x + 1 is near to 2**(2*b+1) and x + 1 is prime.
+static const uint16_t value_and_backoff_next[] = {
+    MAKE_VALUE_AND_BACKOFF(1, 1),
+    MAKE_VALUE_AND_BACKOFF(6, 2),
+    MAKE_VALUE_AND_BACKOFF(30, 3),
+    MAKE_VALUE_AND_BACKOFF(126, 4),
+    MAKE_VALUE_AND_BACKOFF(508, 5),
+    MAKE_VALUE_AND_BACKOFF(2052, 6),
+    // We use the same backoff counter for all backoffs >= MAX_BACKOFF.
+    MAKE_VALUE_AND_BACKOFF(8190, 6),
+    MAKE_VALUE_AND_BACKOFF(8190, 6),
+};
 
 static inline _Py_BackoffCounter
 make_backoff_counter(uint16_t value, uint16_t backoff)
 {
-    assert(backoff <= 15);
-    assert(value <= 0xFFF);
-    _Py_BackoffCounter result;
-    result.value_and_backoff = (value << BACKOFF_BITS) | backoff;
-    return result;
+    assert(backoff <= UNREACHABLE_BACKOFF);
+    assert(value <= MAX_VALUE);
+    return ((_Py_BackoffCounter){
+        .value_and_backoff = MAKE_VALUE_AND_BACKOFF(value, backoff)
+    });
 }
 
 static inline _Py_BackoffCounter
@@ -62,14 +77,11 @@ forge_backoff_counter(uint16_t counter)
 static inline _Py_BackoffCounter
 restart_backoff_counter(_Py_BackoffCounter counter)
 {
-    assert(!is_unreachable_backoff_counter(counter));
-    int backoff = counter.value_and_backoff & 15;
-    if (backoff < MAX_BACKOFF) {
-        return make_backoff_counter((1 << (backoff + 1)) - 1, backoff + 1);
-    }
-    else {
-        return make_backoff_counter((1 << MAX_BACKOFF) - 1, MAX_BACKOFF);
-    }
+    uint16_t backoff = counter.value_and_backoff & BACKOFF_MASK;
+    assert(backoff <= MAX_BACKOFF);
+    return ((_Py_BackoffCounter){
+        .value_and_backoff = value_and_backoff_next[backoff]
+    });
 }
 
 static inline _Py_BackoffCounter
@@ -113,7 +125,7 @@ trigger_backoff_counter(void)
 // as we always end up tracing the loop iteration's
 // exhaustion iteration. Which aborts our current tracer.
 #define JUMP_BACKWARD_INITIAL_VALUE 4000
-#define JUMP_BACKWARD_INITIAL_BACKOFF 12
+#define JUMP_BACKWARD_INITIAL_BACKOFF 6
 static inline _Py_BackoffCounter
 initial_jump_backoff_counter(void)
 {
@@ -126,7 +138,7 @@ initial_jump_backoff_counter(void)
  * otherwise when a side exit warms up we may construct
  * a new trace before the Tier 1 code has properly re-specialized. */
 #define SIDE_EXIT_INITIAL_VALUE 4000
-#define SIDE_EXIT_INITIAL_BACKOFF 12
+#define SIDE_EXIT_INITIAL_BACKOFF 6
 
 static inline _Py_BackoffCounter
 initial_temperature_backoff_counter(void)
diff --git a/Include/internal/pycore_call.h b/Include/internal/pycore_call.h
index 506da06f708..4f4cf02f64b 100644
--- a/Include/internal/pycore_call.h
+++ b/Include/internal/pycore_call.h
@@ -64,39 +64,6 @@ PyAPI_FUNC(PyObject*) _PyObject_CallMethod(
     PyObject *name,
     const char *format, ...);
 
-extern PyObject* _PyObject_CallMethodIdObjArgs(
-    PyObject *obj,
-    _Py_Identifier *name,
-    ...);
-
-static inline PyObject *
-_PyObject_VectorcallMethodId(
-    _Py_Identifier *name, PyObject *const *args,
-    size_t nargsf, PyObject *kwnames)
-{
-    PyObject *oname = _PyUnicode_FromId(name); /* borrowed */
-    if (!oname) {
-        return _Py_NULL;
-    }
-    return PyObject_VectorcallMethod(oname, args, nargsf, kwnames);
-}
-
-static inline PyObject *
-_PyObject_CallMethodIdNoArgs(PyObject *self, _Py_Identifier *name)
-{
-    size_t nargsf = 1 | PY_VECTORCALL_ARGUMENTS_OFFSET;
-    return _PyObject_VectorcallMethodId(name, &self, nargsf, _Py_NULL);
-}
-
-static inline PyObject *
-_PyObject_CallMethodIdOneArg(PyObject *self, _Py_Identifier *name, PyObject *arg)
-{
-    PyObject *args[2] = {self, arg};
-    size_t nargsf = 2 | PY_VECTORCALL_ARGUMENTS_OFFSET;
-    assert(arg != NULL);
-    return _PyObject_VectorcallMethodId(name, args, nargsf, _Py_NULL);
-}
-
 
 /* === Vectorcall protocol (PEP 590) ============================= */
 
diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h
index 47c42fccdc2..6bf33bddd5b 100644
--- a/Include/internal/pycore_ceval.h
+++ b/Include/internal/pycore_ceval.h
@@ -33,8 +33,6 @@ extern int _PyEval_SetOpcodeTrace(PyFrameObject *f, bool enable);
 // Export for 'array' shared extension
 PyAPI_FUNC(PyObject*) _PyEval_GetBuiltin(PyObject *);
 
-extern PyObject* _PyEval_GetBuiltinId(_Py_Identifier *);
-
 extern void _PyEval_SetSwitchInterval(unsigned long microseconds);
 extern unsigned long _PyEval_GetSwitchInterval(void);
 
@@ -217,10 +215,13 @@ extern void _PyEval_DeactivateOpCache(void);
 static inline int _Py_MakeRecCheck(PyThreadState *tstate)  {
     uintptr_t here_addr = _Py_get_machine_stack_pointer();
     _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
+    // Overflow if stack pointer is between soft limit and the base of the hardware stack.
+    // If it is below the hardware stack base, assume that we have the wrong stack limits, and do nothing.
+    // We could have the wrong stack limits because of limited platform support, or user-space threads.
 #if _Py_STACK_GROWS_DOWN
-    return here_addr < _tstate->c_stack_soft_limit;
+    return here_addr < _tstate->c_stack_soft_limit && here_addr >= _tstate->c_stack_soft_limit - 2 * _PyOS_STACK_MARGIN_BYTES;
 #else
-    return here_addr > _tstate->c_stack_soft_limit;
+    return here_addr > _tstate->c_stack_soft_limit && here_addr <= _tstate->c_stack_soft_limit + 2 * _PyOS_STACK_MARGIN_BYTES;
 #endif
 }
 
@@ -260,6 +261,11 @@ static inline int _Py_ReachedRecursionLimit(PyThreadState *tstate)  {
 #endif
 }
 
+// Export for test_peg_generator
+PyAPI_FUNC(int) _Py_ReachedRecursionLimitWithMargin(
+    PyThreadState *tstate,
+    int margin_count);
+
 static inline void _Py_LeaveRecursiveCall(void)  {
 }
 
@@ -402,6 +408,64 @@ _PyForIter_VirtualIteratorNext(PyThreadState* tstate, struct _PyInterpreterFrame
 
 PyAPI_DATA(const _Py_CODEUNIT *) _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS_PTR;
 
+/* Helper functions for large uops */
+
+PyAPI_FUNC(PyObject *)
+_Py_VectorCall_StackRefSteal(
+    _PyStackRef callable,
+    _PyStackRef *arguments,
+    int total_args,
+    _PyStackRef kwnames);
+
+PyAPI_FUNC(PyObject *)
+_Py_BuiltinCallFast_StackRefSteal(
+    _PyStackRef callable,
+    _PyStackRef *arguments,
+    int total_args);
+
+PyAPI_FUNC(PyObject *)
+_Py_BuiltinCallFastWithKeywords_StackRefSteal(
+    _PyStackRef callable,
+    _PyStackRef *arguments,
+    int total_args);
+
+PyAPI_FUNC(PyObject *)
+_PyCallMethodDescriptorFast_StackRefSteal(
+    _PyStackRef callable,
+    PyMethodDef *meth,
+    PyObject *self,
+    _PyStackRef *arguments,
+    int total_args);
+
+PyAPI_FUNC(PyObject *)
+_PyCallMethodDescriptorFastWithKeywords_StackRefSteal(
+    _PyStackRef callable,
+    PyMethodDef *meth,
+    PyObject *self,
+    _PyStackRef *arguments,
+    int total_args);
+
+PyAPI_FUNC(PyObject *)
+_Py_CallBuiltinClass_StackRefSteal(
+    _PyStackRef callable,
+    _PyStackRef *arguments,
+    int total_args);
+
+PyAPI_FUNC(PyObject *)
+_Py_BuildString_StackRefSteal(
+    _PyStackRef *arguments,
+    int total_args);
+
+PyAPI_FUNC(PyObject *)
+_Py_BuildMap_StackRefSteal(
+    _PyStackRef *arguments,
+    int half_args);
+
+PyAPI_FUNC(void)
+_Py_assert_within_stack_bounds(
+    _PyInterpreterFrame *frame, _PyStackRef *stack_pointer,
+    const char *filename, int lineno);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/Include/internal/pycore_critical_section.h b/Include/internal/pycore_critical_section.h
index 2601de40737..60b6fc4a72e 100644
--- a/Include/internal/pycore_critical_section.h
+++ b/Include/internal/pycore_critical_section.h
@@ -32,7 +32,7 @@ extern "C" {
         const bool _should_lock_cs = PyList_CheckExact(_orig_seq);      \
         PyCriticalSection _cs;                                          \
         if (_should_lock_cs) {                                          \
-            _PyCriticalSection_Begin(&_cs, _orig_seq);                  \
+            PyCriticalSection_Begin(&_cs, _orig_seq);                  \
         }
 
 # define Py_END_CRITICAL_SECTION_SEQUENCE_FAST()                        \
@@ -77,10 +77,10 @@ _PyCriticalSection_Resume(PyThreadState *tstate);
 
 // (private) slow path for locking the mutex
 PyAPI_FUNC(void)
-_PyCriticalSection_BeginSlow(PyCriticalSection *c, PyMutex *m);
+_PyCriticalSection_BeginSlow(PyThreadState *tstate, PyCriticalSection *c, PyMutex *m);
 
 PyAPI_FUNC(void)
-_PyCriticalSection2_BeginSlow(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2,
+_PyCriticalSection2_BeginSlow(PyThreadState *tstate, PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2,
                              int is_m1_locked);
 
 PyAPI_FUNC(void)
@@ -95,34 +95,30 @@ _PyCriticalSection_IsActive(uintptr_t tag)
 }
 
 static inline void
-_PyCriticalSection_BeginMutex(PyCriticalSection *c, PyMutex *m)
+_PyCriticalSection_BeginMutex(PyThreadState *tstate, PyCriticalSection *c, PyMutex *m)
 {
     if (PyMutex_LockFast(m)) {
-        PyThreadState *tstate = _PyThreadState_GET();
         c->_cs_mutex = m;
         c->_cs_prev = tstate->critical_section;
         tstate->critical_section = (uintptr_t)c;
     }
     else {
-        _PyCriticalSection_BeginSlow(c, m);
+        _PyCriticalSection_BeginSlow(tstate, c, m);
     }
 }
-#define PyCriticalSection_BeginMutex _PyCriticalSection_BeginMutex
 
 static inline void
-_PyCriticalSection_Begin(PyCriticalSection *c, PyObject *op)
+_PyCriticalSection_Begin(PyThreadState *tstate, PyCriticalSection *c, PyObject *op)
 {
-    _PyCriticalSection_BeginMutex(c, &op->ob_mutex);
+    _PyCriticalSection_BeginMutex(tstate, c, &op->ob_mutex);
 }
-#define PyCriticalSection_Begin _PyCriticalSection_Begin
 
 // Removes the top-most critical section from the thread's stack of critical
 // sections. If the new top-most critical section is inactive, then it is
 // resumed.
 static inline void
-_PyCriticalSection_Pop(PyCriticalSection *c)
+_PyCriticalSection_Pop(PyThreadState *tstate, PyCriticalSection *c)
 {
-    PyThreadState *tstate = _PyThreadState_GET();
     uintptr_t prev = c->_cs_prev;
     tstate->critical_section = prev;
 
@@ -132,7 +128,7 @@ _PyCriticalSection_Pop(PyCriticalSection *c)
 }
 
 static inline void
-_PyCriticalSection_End(PyCriticalSection *c)
+_PyCriticalSection_End(PyThreadState *tstate, PyCriticalSection *c)
 {
     // If the mutex is NULL, we used the fast path in
     // _PyCriticalSection_BeginSlow for locks already held in the top-most
@@ -141,18 +137,17 @@ _PyCriticalSection_End(PyCriticalSection *c)
         return;
     }
     PyMutex_Unlock(c->_cs_mutex);
-    _PyCriticalSection_Pop(c);
+    _PyCriticalSection_Pop(tstate, c);
 }
-#define PyCriticalSection_End _PyCriticalSection_End
 
 static inline void
-_PyCriticalSection2_BeginMutex(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2)
+_PyCriticalSection2_BeginMutex(PyThreadState *tstate, PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2)
 {
     if (m1 == m2) {
         // If the two mutex arguments are the same, treat this as a critical
         // section with a single mutex.
         c->_cs_mutex2 = NULL;
-        _PyCriticalSection_BeginMutex(&c->_cs_base, m1);
+        _PyCriticalSection_BeginMutex(tstate, &c->_cs_base, m1);
         return;
     }
 
@@ -167,7 +162,6 @@ _PyCriticalSection2_BeginMutex(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2)
 
     if (PyMutex_LockFast(m1)) {
         if (PyMutex_LockFast(m2)) {
-            PyThreadState *tstate = _PyThreadState_GET();
             c->_cs_base._cs_mutex = m1;
             c->_cs_mutex2 = m2;
             c->_cs_base._cs_prev = tstate->critical_section;
@@ -176,24 +170,22 @@ _PyCriticalSection2_BeginMutex(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2)
             tstate->critical_section = p;
         }
         else {
-            _PyCriticalSection2_BeginSlow(c, m1, m2, 1);
+            _PyCriticalSection2_BeginSlow(tstate, c, m1, m2, 1);
         }
     }
     else {
-        _PyCriticalSection2_BeginSlow(c, m1, m2, 0);
+        _PyCriticalSection2_BeginSlow(tstate, c, m1, m2, 0);
     }
 }
-#define PyCriticalSection2_BeginMutex _PyCriticalSection2_BeginMutex
 
 static inline void
-_PyCriticalSection2_Begin(PyCriticalSection2 *c, PyObject *a, PyObject *b)
+_PyCriticalSection2_Begin(PyThreadState *tstate, PyCriticalSection2 *c, PyObject *a, PyObject *b)
 {
-    _PyCriticalSection2_BeginMutex(c, &a->ob_mutex, &b->ob_mutex);
+    _PyCriticalSection2_BeginMutex(tstate, c, &a->ob_mutex, &b->ob_mutex);
 }
-#define PyCriticalSection2_Begin _PyCriticalSection2_Begin
 
 static inline void
-_PyCriticalSection2_End(PyCriticalSection2 *c)
+_PyCriticalSection2_End(PyThreadState *tstate, PyCriticalSection2 *c)
 {
     // if mutex1 is NULL, we used the fast path in
     // _PyCriticalSection_BeginSlow for mutexes that are already held,
@@ -207,9 +199,8 @@ _PyCriticalSection2_End(PyCriticalSection2 *c)
         PyMutex_Unlock(c->_cs_mutex2);
     }
     PyMutex_Unlock(c->_cs_base._cs_mutex);
-    _PyCriticalSection_Pop(&c->_cs_base);
+    _PyCriticalSection_Pop(tstate, &c->_cs_base);
 }
-#define PyCriticalSection2_End _PyCriticalSection2_End
 
 static inline void
 _PyCriticalSection_AssertHeld(PyMutex *mutex)
@@ -251,6 +242,45 @@ _PyCriticalSection_AssertHeldObj(PyObject *op)
 
 #endif
 }
+
+#undef Py_BEGIN_CRITICAL_SECTION
+# define Py_BEGIN_CRITICAL_SECTION(op)                                  \
+    {                                                                   \
+        PyCriticalSection _py_cs;                                       \
+        PyThreadState *_cs_tstate = _PyThreadState_GET();               \
+        _PyCriticalSection_Begin(_cs_tstate, &_py_cs, _PyObject_CAST(op))
+
+#undef Py_BEGIN_CRITICAL_SECTION_MUTEX
+# define Py_BEGIN_CRITICAL_SECTION_MUTEX(mutex)                         \
+    {                                                                   \
+        PyCriticalSection _py_cs;                                       \
+        PyThreadState *_cs_tstate = _PyThreadState_GET();               \
+        _PyCriticalSection_BeginMutex(_cs_tstate, &_py_cs, mutex)
+
+#undef Py_END_CRITICAL_SECTION
+# define Py_END_CRITICAL_SECTION()                                      \
+        _PyCriticalSection_End(_cs_tstate, &_py_cs);                    \
+    }
+
+#undef Py_BEGIN_CRITICAL_SECTION2
+# define Py_BEGIN_CRITICAL_SECTION2(a, b)                               \
+    {                                                                   \
+        PyCriticalSection2 _py_cs2;                                     \
+        PyThreadState *_cs_tstate = _PyThreadState_GET();               \
+        _PyCriticalSection2_Begin(_cs_tstate, &_py_cs2, _PyObject_CAST(a), _PyObject_CAST(b))
+
+#undef Py_BEGIN_CRITICAL_SECTION2_MUTEX
+# define Py_BEGIN_CRITICAL_SECTION2_MUTEX(m1, m2)                       \
+    {                                                                   \
+        PyCriticalSection2 _py_cs2;                                     \
+        PyThreadState *_cs_tstate = _PyThreadState_GET();               \
+        _PyCriticalSection2_BeginMutex(_cs_tstate, &_py_cs2, m1, m2)
+
+#undef Py_END_CRITICAL_SECTION2
+# define Py_END_CRITICAL_SECTION2()                                     \
+        _PyCriticalSection2_End(_cs_tstate, &_py_cs2);                  \
+    }
+
 #endif /* Py_GIL_DISABLED */
 
 #ifdef __cplusplus
diff --git a/Include/internal/pycore_debug_offsets.h b/Include/internal/pycore_debug_offsets.h
index 0f17bf17f82..1cdc4449b17 100644
--- a/Include/internal/pycore_debug_offsets.h
+++ b/Include/internal/pycore_debug_offsets.h
@@ -102,6 +102,8 @@ typedef struct _Py_DebugOffsets {
         uint64_t next;
         uint64_t interp;
         uint64_t current_frame;
+        uint64_t base_frame;
+        uint64_t last_profiled_frame;
         uint64_t thread_id;
         uint64_t native_thread_id;
         uint64_t datastack_chunk;
@@ -272,6 +274,8 @@ typedef struct _Py_DebugOffsets {
         .next = offsetof(PyThreadState, next), \
         .interp = offsetof(PyThreadState, interp), \
         .current_frame = offsetof(PyThreadState, current_frame), \
+        .base_frame = offsetof(PyThreadState, base_frame), \
+        .last_profiled_frame = offsetof(PyThreadState, last_profiled_frame), \
         .thread_id = offsetof(PyThreadState, thread_id), \
         .native_thread_id = offsetof(PyThreadState, native_thread_id), \
         .datastack_chunk = offsetof(PyThreadState, datastack_chunk), \
diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h
index b8fe360321d..1193f496da1 100644
--- a/Include/internal/pycore_dict.h
+++ b/Include/internal/pycore_dict.h
@@ -36,13 +36,6 @@ extern int _PyDict_DelItem_KnownHash_LockHeld(PyObject *mp, PyObject *key,
 
 extern int _PyDict_Contains_KnownHash(PyObject *, PyObject *, Py_hash_t);
 
-// "Id" variants
-extern PyObject* _PyDict_GetItemIdWithError(PyObject *dp,
-                                            _Py_Identifier *key);
-extern int _PyDict_ContainsId(PyObject *, _Py_Identifier *);
-extern int _PyDict_SetItemId(PyObject *dp, _Py_Identifier *key, PyObject *item);
-extern int _PyDict_DelItemId(PyObject *mp, _Py_Identifier *key);
-
 extern int _PyDict_Next(
     PyObject *mp, Py_ssize_t *pos, PyObject **key, PyObject **value, Py_hash_t *hash);
 
diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h
index ecef4364cc3..d23d6d4f91b 100644
--- a/Include/internal/pycore_global_objects_fini_generated.h
+++ b/Include/internal/pycore_global_objects_fini_generated.h
@@ -13,7 +13,7 @@ static inline void
 _PyStaticObject_CheckRefcnt(PyObject *obj) {
     if (!_Py_IsImmortal(obj)) {
         fprintf(stderr, "Immortal Object has less refcnt than expected.\n");
-        _PyObject_Dump(obj);
+        PyUnstable_Object_Dump(obj);
     }
 }
 #endif
@@ -1609,6 +1609,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(c_parameter_type));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(c_return));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(c_stack));
+    _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cache_frames));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cached_datetime_module));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cached_statements));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cadata));
@@ -2053,6 +2054,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(stacklevel));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(start));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(statement));
+    _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(stats));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(status));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(stderr));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(stdin));
@@ -2070,6 +2072,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) {
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(symmetric_difference_update));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(tabsize));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(tag));
+    _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(take_bytes));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(target));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(target_is_directory));
     _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(task));
diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h
index 4dd73291df4..5c3ea474ad0 100644
--- a/Include/internal/pycore_global_strings.h
+++ b/Include/internal/pycore_global_strings.h
@@ -332,6 +332,7 @@ struct _Py_global_strings {
         STRUCT_FOR_ID(c_parameter_type)
         STRUCT_FOR_ID(c_return)
         STRUCT_FOR_ID(c_stack)
+        STRUCT_FOR_ID(cache_frames)
         STRUCT_FOR_ID(cached_datetime_module)
         STRUCT_FOR_ID(cached_statements)
         STRUCT_FOR_ID(cadata)
@@ -776,6 +777,7 @@ struct _Py_global_strings {
         STRUCT_FOR_ID(stacklevel)
         STRUCT_FOR_ID(start)
         STRUCT_FOR_ID(statement)
+        STRUCT_FOR_ID(stats)
         STRUCT_FOR_ID(status)
         STRUCT_FOR_ID(stderr)
         STRUCT_FOR_ID(stdin)
@@ -793,6 +795,7 @@ struct _Py_global_strings {
         STRUCT_FOR_ID(symmetric_difference_update)
         STRUCT_FOR_ID(tabsize)
         STRUCT_FOR_ID(tag)
+        STRUCT_FOR_ID(take_bytes)
         STRUCT_FOR_ID(target)
         STRUCT_FOR_ID(target_is_directory)
         STRUCT_FOR_ID(task)
diff --git a/Include/internal/pycore_import.h b/Include/internal/pycore_import.h
index d64a18bb09e..4c8b8c0ed86 100644
--- a/Include/internal/pycore_import.h
+++ b/Include/internal/pycore_import.h
@@ -128,11 +128,18 @@ PyAPI_FUNC(int) _PyImport_ClearExtension(PyObject *name, PyObject *filename);
 // state of the module argument:
 // - If module is NULL or a PyModuleObject with md_gil == Py_MOD_GIL_NOT_USED,
 //   call _PyEval_DisableGIL().
-// - Otherwise, call _PyEval_EnableGILPermanent(). If the GIL was not already
-//   enabled permanently, issue a warning referencing the module's name.
+// - Otherwise, call _PyImport_EnableGILAndWarn
 //
 // This function may raise an exception.
 extern int _PyImport_CheckGILForModule(PyObject *module, PyObject *module_name);
+// Assuming that the GIL is enabled from a call to
+// _PyEval_EnableGILTransient(), call _PyEval_EnableGILPermanent().
+// If the GIL was not already enabled permanently, issue a warning referencing
+// the module's name.
+// Leave a message in verbose mode.
+//
+// This function may raise an exception.
+extern int _PyImport_EnableGILAndWarn(PyThreadState *, PyObject *module_name);
 #endif
 
 #ifdef __cplusplus
diff --git a/Include/internal/pycore_initconfig.h b/Include/internal/pycore_initconfig.h
index 368dafb9063..183b2d45c5e 100644
--- a/Include/internal/pycore_initconfig.h
+++ b/Include/internal/pycore_initconfig.h
@@ -153,10 +153,8 @@ typedef enum {
 } _PyConfigInitEnum;
 
 typedef enum {
-    /* For now, this means the GIL is enabled.
-
-       gh-116329: This will eventually change to "the GIL is disabled but can
-       be re-enabled by loading an incompatible extension module." */
+    /* In free threaded builds, this means that the GIL is disabled at startup,
+       but may be enabled by loading an incompatible extension module. */
     _PyConfig_GIL_DEFAULT = -1,
 
     /* The GIL has been forced off or on, and will not be affected by module loading. */
diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h
index f861d3abd96..6b3d5711b92 100644
--- a/Include/internal/pycore_interp_structs.h
+++ b/Include/internal/pycore_interp_structs.h
@@ -179,6 +179,10 @@ struct gc_collection_stats {
     Py_ssize_t collected;
     /* total number of uncollectable objects (put into gc.garbage) */
     Py_ssize_t uncollectable;
+    // Total number of objects considered for collection and traversed:
+    Py_ssize_t candidates;
+    // Duration of the collection in seconds:
+    double duration;
 };
 
 /* Running stats per generation */
@@ -189,6 +193,10 @@ struct gc_generation_stats {
     Py_ssize_t collected;
     /* total number of uncollectable objects (put into gc.garbage) */
     Py_ssize_t uncollectable;
+    // Total number of objects considered for collection and traversed:
+    Py_ssize_t candidates;
+    // Duration of the collection in seconds:
+    double duration;
 };
 
 enum _GCPhase {
diff --git a/Include/internal/pycore_jit.h b/Include/internal/pycore_jit.h
index 8a88cbf607b..b1550a6ddcf 100644
--- a/Include/internal/pycore_jit.h
+++ b/Include/internal/pycore_jit.h
@@ -13,6 +13,9 @@ extern "C" {
 #  error "this header requires Py_BUILD_CORE define"
 #endif
 
+/* To be able to reason about code layout and branches, keep code size below 1 MB */
+#define PY_MAX_JIT_CODE_SIZE ((1 << 20)-1)
+
 #ifdef _Py_JIT
 
 typedef _Py_CODEUNIT *(*jit_func)(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate);
diff --git a/Include/internal/pycore_pyatomic_ft_wrappers.h b/Include/internal/pycore_pyatomic_ft_wrappers.h
index c31c3365700..2ae0185226f 100644
--- a/Include/internal/pycore_pyatomic_ft_wrappers.h
+++ b/Include/internal/pycore_pyatomic_ft_wrappers.h
@@ -77,6 +77,10 @@ extern "C" {
     _Py_atomic_store_ushort_relaxed(&value, new_value)
 #define FT_ATOMIC_LOAD_USHORT_RELAXED(value) \
     _Py_atomic_load_ushort_relaxed(&value)
+#define FT_ATOMIC_LOAD_INT(value) \
+    _Py_atomic_load_int(&value)
+#define FT_ATOMIC_STORE_INT(value, new_value) \
+    _Py_atomic_store_int(&value, new_value)
 #define FT_ATOMIC_STORE_INT_RELAXED(value, new_value) \
     _Py_atomic_store_int_relaxed(&value, new_value)
 #define FT_ATOMIC_LOAD_INT_RELAXED(value) \
@@ -144,6 +148,8 @@ extern "C" {
 #define FT_ATOMIC_STORE_SHORT_RELAXED(value, new_value) value = new_value
 #define FT_ATOMIC_LOAD_USHORT_RELAXED(value) value
 #define FT_ATOMIC_STORE_USHORT_RELAXED(value, new_value) value = new_value
+#define FT_ATOMIC_LOAD_INT(value) value
+#define FT_ATOMIC_STORE_INT(value, new_value) value = new_value
 #define FT_ATOMIC_LOAD_INT_RELAXED(value) value
 #define FT_ATOMIC_STORE_INT_RELAXED(value, new_value) value = new_value
 #define FT_ATOMIC_LOAD_UINT_RELAXED(value) value
diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h
index ed943b51056..05484e847f1 100644
--- a/Include/internal/pycore_pymem.h
+++ b/Include/internal/pycore_pymem.h
@@ -70,6 +70,27 @@ static inline int _PyMem_IsPtrFreed(const void *ptr)
 #endif
 }
 
+// Similar to _PyMem_IsPtrFreed() but expects an 'unsigned long' instead of a
+// pointer.
+static inline int _PyMem_IsULongFreed(unsigned long value)
+{
+#if SIZEOF_LONG == 8
+    return (value == 0
+            || value == (unsigned long)0xCDCDCDCDCDCDCDCD
+            || value == (unsigned long)0xDDDDDDDDDDDDDDDD
+            || value == (unsigned long)0xFDFDFDFDFDFDFDFD
+            || value == (unsigned long)0xFFFFFFFFFFFFFFFF);
+#elif SIZEOF_LONG == 4
+    return (value == 0
+            || value == (unsigned long)0xCDCDCDCD
+            || value == (unsigned long)0xDDDDDDDD
+            || value == (unsigned long)0xFDFDFDFD
+            || value == (unsigned long)0xFFFFFFFF);
+#else
+#  error "unknown long size"
+#endif
+}
+
 extern int _PyMem_GetAllocatorName(
     const char *name,
     PyMemAllocatorName *allocator);
diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h
index 08f8d0e59d1..31d88339a13 100644
--- a/Include/internal/pycore_runtime_init_generated.h
+++ b/Include/internal/pycore_runtime_init_generated.h
@@ -1607,6 +1607,7 @@ extern "C" {
     INIT_ID(c_parameter_type), \
     INIT_ID(c_return), \
     INIT_ID(c_stack), \
+    INIT_ID(cache_frames), \
     INIT_ID(cached_datetime_module), \
     INIT_ID(cached_statements), \
     INIT_ID(cadata), \
@@ -2051,6 +2052,7 @@ extern "C" {
     INIT_ID(stacklevel), \
     INIT_ID(start), \
     INIT_ID(statement), \
+    INIT_ID(stats), \
     INIT_ID(status), \
     INIT_ID(stderr), \
     INIT_ID(stdin), \
@@ -2068,6 +2070,7 @@ extern "C" {
     INIT_ID(symmetric_difference_update), \
     INIT_ID(tabsize), \
     INIT_ID(tag), \
+    INIT_ID(take_bytes), \
     INIT_ID(target), \
     INIT_ID(target_is_directory), \
     INIT_ID(task), \
diff --git a/Include/internal/pycore_tracemalloc.h b/Include/internal/pycore_tracemalloc.h
index 572e8025876..693385f9a46 100644
--- a/Include/internal/pycore_tracemalloc.h
+++ b/Include/internal/pycore_tracemalloc.h
@@ -30,8 +30,8 @@ struct _PyTraceMalloc_Config {
 };
 
 
-/* Pack the frame_t structure to reduce the memory footprint on 64-bit
-   architectures: 12 bytes instead of 16. */
+/* Pack the tracemalloc_frame and tracemalloc_traceback structures to reduce
+   the memory footprint on 64-bit architectures: 12 bytes instead of 16. */
 #if defined(_MSC_VER)
 #pragma pack(push, 4)
 #endif
@@ -46,18 +46,22 @@ tracemalloc_frame {
     PyObject *filename;
     unsigned int lineno;
 };
-#ifdef _MSC_VER
-#pragma pack(pop)
-#endif
 
-struct tracemalloc_traceback {
+struct
+#ifdef __GNUC__
+__attribute__((packed))
+#endif
+tracemalloc_traceback {
     Py_uhash_t hash;
     /* Number of frames stored */
     uint16_t nframe;
     /* Total number of frames the traceback had */
     uint16_t total_nframe;
-    struct tracemalloc_frame frames[1];
+    struct tracemalloc_frame frames[];
 };
+#ifdef _MSC_VER
+#pragma pack(pop)
+#endif
 
 
 struct _tracemalloc_runtime_state {
@@ -95,7 +99,7 @@ struct _tracemalloc_runtime_state {
        Protected by TABLES_LOCK(). */
     _Py_hashtable_t *domains;
 
-    struct tracemalloc_traceback empty_traceback;
+    struct tracemalloc_traceback *empty_traceback;
 
     Py_tss_t reentrant_key;
 };
diff --git a/Include/internal/pycore_tstate.h b/Include/internal/pycore_tstate.h
index 50048801b2e..c4f723ac8ab 100644
--- a/Include/internal/pycore_tstate.h
+++ b/Include/internal/pycore_tstate.h
@@ -10,6 +10,7 @@ extern "C" {
 
 #include "pycore_brc.h"             // struct _brc_thread_state
 #include "pycore_freelist_state.h"  // struct _Py_freelists
+#include "pycore_interpframe_structs.h"  // _PyInterpreterFrame
 #include "pycore_mimalloc.h"        // struct _mimalloc_thread_state
 #include "pycore_qsbr.h"            // struct qsbr
 #include "pycore_uop.h"             // struct _PyUOpInstruction
@@ -61,6 +62,10 @@ typedef struct _PyThreadStateImpl {
     // semi-public fields are in PyThreadState.
     PyThreadState base;
 
+    // Embedded base frame - sentinel at the bottom of the frame stack.
+    // Used by profiling/sampling to detect incomplete stack traces.
+    _PyInterpreterFrame base_frame;
+
     // The reference count field is used to synchronize deallocation of the
     // thread state during runtime finalization.
     Py_ssize_t refcount;
diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h
index e7ca65a56b6..97dda73f9b5 100644
--- a/Include/internal/pycore_unicodeobject.h
+++ b/Include/internal/pycore_unicodeobject.h
@@ -307,14 +307,6 @@ PyAPI_FUNC(PyObject*) _PyUnicode_JoinArray(
     Py_ssize_t seqlen
     );
 
-/* Test whether a unicode is equal to ASCII identifier.  Return 1 if true,
-   0 otherwise.  The right argument must be ASCII identifier.
-   Any error occurs inside will be cleared before return. */
-extern int _PyUnicode_EqualToASCIIId(
-    PyObject *left,             /* Left string */
-    _Py_Identifier *right       /* Right identifier */
-    );
-
 // Test whether a unicode is equal to ASCII string.  Return 1 if true,
 // 0 otherwise.  The right argument must be ASCII-encoded string.
 // Any error occurs inside will be cleared before return.
diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h
index b1e57126b92..c5b01ff9876 100644
--- a/Include/internal/pycore_unicodeobject_generated.h
+++ b/Include/internal/pycore_unicodeobject_generated.h
@@ -1108,6 +1108,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));
     assert(PyUnicode_GET_LENGTH(string) != 1);
+    string = &_Py_ID(cache_frames);
+    _PyUnicode_InternStatic(interp, &string);
+    assert(_PyUnicode_CheckConsistency(string, 1));
+    assert(PyUnicode_GET_LENGTH(string) != 1);
     string = &_Py_ID(cached_datetime_module);
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));
@@ -2884,6 +2888,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));
     assert(PyUnicode_GET_LENGTH(string) != 1);
+    string = &_Py_ID(stats);
+    _PyUnicode_InternStatic(interp, &string);
+    assert(_PyUnicode_CheckConsistency(string, 1));
+    assert(PyUnicode_GET_LENGTH(string) != 1);
     string = &_Py_ID(status);
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));
@@ -2952,6 +2960,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) {
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));
     assert(PyUnicode_GET_LENGTH(string) != 1);
+    string = &_Py_ID(take_bytes);
+    _PyUnicode_InternStatic(interp, &string);
+    assert(_PyUnicode_CheckConsistency(string, 1));
+    assert(PyUnicode_GET_LENGTH(string) != 1);
     string = &_Py_ID(target);
     _PyUnicode_InternStatic(interp, &string);
     assert(_PyUnicode_CheckConsistency(string, 1));
diff --git a/Include/internal/pycore_uop.h b/Include/internal/pycore_uop.h
index 4e1b15af42c..70576046385 100644
--- a/Include/internal/pycore_uop.h
+++ b/Include/internal/pycore_uop.h
@@ -36,7 +36,12 @@ typedef struct _PyUOpInstruction{
 } _PyUOpInstruction;
 
 // This is the length of the trace we translate initially.
+#ifdef Py_DEBUG
+    // With asserts, the stencils are a lot larger
+#define UOP_MAX_TRACE_LENGTH 1000
+#else
 #define UOP_MAX_TRACE_LENGTH 3000
+#endif
 #define UOP_BUFFER_SIZE (UOP_MAX_TRACE_LENGTH * sizeof(_PyUOpInstruction))
 
 /* Bloom filter with m = 256
diff --git a/Include/modsupport.h b/Include/modsupport.h
index 094b9ff0e5c..cb47ad8cd27 100644
--- a/Include/modsupport.h
+++ b/Include/modsupport.h
@@ -132,7 +132,7 @@ PyAPI_FUNC(int) PyABIInfo_Check(PyABIInfo *info, const char *module_name);
     )                                                       \
     /////////////////////////////////////////////////////////
 
-#define _PyABIInfo_DEFAULT() {                              \
+#define _PyABIInfo_DEFAULT {                                \
         1, 0,                                               \
         PyABIInfo_DEFAULT_FLAGS,                            \
         PY_VERSION_HEX,                                     \
diff --git a/Include/object.h b/Include/object.h
index f17dcba4f47..ad452be8405 100644
--- a/Include/object.h
+++ b/Include/object.h
@@ -140,12 +140,12 @@ struct _object {
 #  endif
         };
 #else
-        Py_ssize_t ob_refcnt;
+        Py_ssize_t ob_refcnt;  // part of stable ABI; do not change
 #endif
         _Py_ALIGNED_DEF(_PyObject_MIN_ALIGNMENT, char) _aligner;
     };
 
-    PyTypeObject *ob_type;
+    PyTypeObject *ob_type;  // part of stable ABI; do not change
 };
 #else
 // Objects that are not owned by any thread use a thread id (tid) of zero.
@@ -173,7 +173,7 @@ struct _object {
 #ifndef _Py_OPAQUE_PYOBJECT
 struct PyVarObject {
     PyObject ob_base;
-    Py_ssize_t ob_size; /* Number of items in variable part */
+    Py_ssize_t ob_size; // Number of items in variable part. Part of stable ABI
 };
 #endif
 typedef struct PyVarObject PyVarObject;
@@ -265,56 +265,72 @@ _Py_IsOwnedByCurrentThread(PyObject *ob)
 }
 #endif
 
-// Py_TYPE() implementation for the stable ABI
-PyAPI_FUNC(PyTypeObject*) Py_TYPE(PyObject *ob);
-
-#if defined(Py_LIMITED_API) && Py_LIMITED_API+0 >= 0x030e0000
-    // Stable ABI implements Py_TYPE() as a function call
-    // on limited C API version 3.14 and newer.
-#else
-    static inline PyTypeObject* _Py_TYPE(PyObject *ob)
-    {
-        return ob->ob_type;
-    }
-    #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
-    #   define Py_TYPE(ob) _Py_TYPE(_PyObject_CAST(ob))
-    #else
-    #   define Py_TYPE(ob) _Py_TYPE(ob)
-    #endif
-#endif
-
 PyAPI_DATA(PyTypeObject) PyLong_Type;
 PyAPI_DATA(PyTypeObject) PyBool_Type;
 
+/* Definitions for the stable ABI */
+#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= _Py_PACK_VERSION(3, 14)
+PyAPI_FUNC(PyTypeObject*) Py_TYPE(PyObject *ob);
+#endif
+#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= _Py_PACK_VERSION(3, 15)
+PyAPI_FUNC(Py_ssize_t) Py_SIZE(PyObject *ob);
+PyAPI_FUNC(int) Py_IS_TYPE(PyObject *ob, PyTypeObject *type);
+PyAPI_FUNC(void) Py_SET_SIZE(PyVarObject *ob, Py_ssize_t size);
+#endif
+
 #ifndef _Py_OPAQUE_PYOBJECT
+
+static inline void
+Py_SET_TYPE(PyObject *ob, PyTypeObject *type)
+{
+    ob->ob_type = type;
+}
+
+#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < _Py_PACK_VERSION(3, 11)
+// Non-limited API & limited API 3.11 & below: use static inline functions and
+// use _PyObject_CAST so that users don't need their own casts
+#  define Py_TYPE(ob) _Py_TYPE_impl(_PyObject_CAST(ob))
+#  define Py_SIZE(ob) _Py_SIZE_impl(_PyObject_CAST(ob))
+#  define Py_IS_TYPE(ob, type) _Py_IS_TYPE_impl(_PyObject_CAST(ob), (type))
+#  define Py_SET_SIZE(ob, size) _Py_SET_SIZE_impl(_PyVarObject_CAST(ob), (size))
+#  define Py_SET_TYPE(ob, type) Py_SET_TYPE(_PyObject_CAST(ob), type)
+#elif Py_LIMITED_API+0 < _Py_PACK_VERSION(3, 15)
+// Limited API 3.11-3.14: use static inline functions, without casts
+#  define Py_SIZE(ob) _Py_SIZE_impl(ob)
+#  define Py_IS_TYPE(ob, type) _Py_IS_TYPE_impl((ob), (type))
+#  define Py_SET_SIZE(ob, size) _Py_SET_SIZE_impl((ob), (size))
+#  if Py_LIMITED_API+0 < _Py_PACK_VERSION(3, 14)
+//   Py_TYPE() is static inline only on Limited API 3.13 and below
+#    define Py_TYPE(ob) _Py_TYPE_impl(ob)
+#  endif
+#else
+// Limited API 3.15+: use function calls
+#endif
+
+static inline
+PyTypeObject* _Py_TYPE_impl(PyObject *ob)
+{
+    return ob->ob_type;
+}
+
 // bpo-39573: The Py_SET_SIZE() function must be used to set an object size.
-static inline Py_ssize_t Py_SIZE(PyObject *ob) {
+static inline Py_ssize_t
+_Py_SIZE_impl(PyObject *ob)
+{
     assert(Py_TYPE(ob) != &PyLong_Type);
     assert(Py_TYPE(ob) != &PyBool_Type);
     return  _PyVarObject_CAST(ob)->ob_size;
 }
-#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
-#  define Py_SIZE(ob) Py_SIZE(_PyObject_CAST(ob))
-#endif
-#endif // !defined(_Py_OPAQUE_PYOBJECT)
 
-static inline int Py_IS_TYPE(PyObject *ob, PyTypeObject *type) {
+static inline int
+_Py_IS_TYPE_impl(PyObject *ob, PyTypeObject *type)
+{
     return Py_TYPE(ob) == type;
 }
-#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
-#  define Py_IS_TYPE(ob, type) Py_IS_TYPE(_PyObject_CAST(ob), (type))
-#endif
 
-
-#ifndef _Py_OPAQUE_PYOBJECT
-static inline void Py_SET_TYPE(PyObject *ob, PyTypeObject *type) {
-    ob->ob_type = type;
-}
-#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
-#  define Py_SET_TYPE(ob, type) Py_SET_TYPE(_PyObject_CAST(ob), type)
-#endif
-
-static inline void Py_SET_SIZE(PyVarObject *ob, Py_ssize_t size) {
+static inline void
+_Py_SET_SIZE_impl(PyVarObject *ob, Py_ssize_t size)
+{
     assert(Py_TYPE(_PyObject_CAST(ob)) != &PyLong_Type);
     assert(Py_TYPE(_PyObject_CAST(ob)) != &PyBool_Type);
 #ifdef Py_GIL_DISABLED
@@ -323,9 +339,7 @@ static inline void Py_SET_SIZE(PyVarObject *ob, Py_ssize_t size) {
     ob->ob_size = size;
 #endif
 }
-#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000
-#  define Py_SET_SIZE(ob, size) Py_SET_SIZE(_PyVarObject_CAST(ob), (size))
-#endif
+
 #endif // !defined(_Py_OPAQUE_PYOBJECT)
 
 
diff --git a/Include/patchlevel.h b/Include/patchlevel.h
index 899c892631f..804aa1a0427 100644
--- a/Include/patchlevel.h
+++ b/Include/patchlevel.h
@@ -27,7 +27,7 @@
 #define PY_RELEASE_SERIAL       2
 
 /* Version as a string */
-#define PY_VERSION              "3.15.0a2"
+#define PY_VERSION              "3.15.0a2+"
 /*--end constants--*/
 
 
diff --git a/Include/pymacro.h b/Include/pymacro.h
index 857cdf12db9..7ecce44a0d2 100644
--- a/Include/pymacro.h
+++ b/Include/pymacro.h
@@ -116,6 +116,12 @@
 
 /* Absolute value of the number x */
 #define Py_ABS(x) ((x) < 0 ? -(x) : (x))
+/* Safer implementation that avoids an undefined behavior for the minimal
+   value of the signed integer type if its absolute value is larger than
+   the maximal value of the signed integer type (in the two's complement
+   representations, which is common).
+ */
+#define _Py_ABS_CAST(T, x) ((x) >= 0 ? ((T) (x)) : ((T) (((T) -((x) + 1)) + 1u)))
 
 #define _Py_XSTRINGIFY(x) #x
 
diff --git a/Include/pyport.h b/Include/pyport.h
index b250f9e308f..61e2317976e 100644
--- a/Include/pyport.h
+++ b/Include/pyport.h
@@ -504,13 +504,20 @@ extern "C" {
  * Thread support is stubbed and any attempt to create a new thread fails.
  */
 #if (!defined(HAVE_PTHREAD_STUBS) && \
+     !defined(__wasi__) && \
       (!defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__)))
 #  define Py_CAN_START_THREADS 1
 #endif
 
-#ifdef WITH_THREAD
-// HAVE_THREAD_LOCAL is just defined here for compatibility's sake
+
+/* gh-142163: Some libraries rely on HAVE_THREAD_LOCAL being undefined, so
+ * we can only define it only when Py_BUILD_CORE is set.*/
+#ifdef Py_BUILD_CORE
+// This is no longer coupled to _Py_thread_local.
 #  define HAVE_THREAD_LOCAL 1
+#endif
+
+#ifdef WITH_THREAD
 #  ifdef thread_local
 #    define _Py_thread_local thread_local
 #  elif __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_THREADS__)
diff --git a/InternalDocs/frames.md b/InternalDocs/frames.md
index 804d7436018..60ab2055afa 100644
--- a/InternalDocs/frames.md
+++ b/InternalDocs/frames.md
@@ -111,6 +111,55 @@ ### Shim frames
 instruction which cleans up the shim frame and returns.
 
 
+### Base frame
+
+Each thread state contains an embedded `_PyInterpreterFrame` called the "base frame"
+that serves as a sentinel at the bottom of the frame stack. This frame is allocated
+in `_PyThreadStateImpl` (the internal extension of `PyThreadState`) and initialized
+when the thread state is created. The `owner` field is set to `FRAME_OWNED_BY_INTERPRETER`.
+
+External profilers and sampling tools can validate that they have successfully unwound
+the complete call stack by checking that the frame chain terminates at the base frame.
+The `PyThreadState.base_frame` pointer provides the expected address to compare against.
+If a stack walk doesn't reach this frame, the sample is incomplete (possibly due to a
+race condition) and should be discarded.
+
+The base frame is embedded in `_PyThreadStateImpl` rather than `PyThreadState` because
+`_PyInterpreterFrame` is defined in internal headers that cannot be exposed in the
+public API. A pointer (`PyThreadState.base_frame`) is provided for profilers to access
+the address without needing internal headers.
+
+See the initialization in `new_threadstate()` in [Python/pystate.c](../Python/pystate.c).
+
+#### How profilers should use the base frame
+
+External profilers should read `tstate->base_frame` before walking the stack, then
+walk from `tstate->current_frame` following `frame->previous` pointers until reaching
+a frame with `owner == FRAME_OWNED_BY_INTERPRETER`. After the walk, verify that the
+last frame address matches `base_frame`. If not, discard the sample as incomplete
+since the frame chain may have been in an inconsistent state due to concurrent updates.
+
+
+### Remote Profiling Frame Cache
+
+The `last_profiled_frame` field in `PyThreadState` supports an optimization for
+remote profilers that sample call stacks from external processes. When a remote
+profiler reads the call stack, it writes the current frame address to this field.
+The eval loop then keeps this pointer valid by updating it to the parent frame
+whenever a frame returns (in `_PyEval_FrameClearAndPop`).
+
+This creates a "high-water mark" that always points to a frame still on the stack.
+On subsequent samples, the profiler can walk from `current_frame` until it reaches
+`last_profiled_frame`, knowing that frames from that point downward are unchanged
+and can be retrieved from a cache. This significantly reduces the amount of remote
+memory reads needed when call stacks are deep and stable at their base.
+
+The update in `_PyEval_FrameClearAndPop` is guarded: it only writes when
+`last_profiled_frame` is non-NULL AND matches the frame being popped. This
+prevents transient frames (called and returned between profiler samples) from
+corrupting the cache pointer, while avoiding any overhead when profiling is inactive.
+
+
 ### The Instruction Pointer
 
 `_PyInterpreterFrame` has two fields which are used to maintain the instruction
diff --git a/InternalDocs/stack_protection.md b/InternalDocs/stack_protection.md
index fa025bd930f..14802e57d09 100644
--- a/InternalDocs/stack_protection.md
+++ b/InternalDocs/stack_protection.md
@@ -38,12 +38,19 @@ # Stack Protection
 
 ```python
 kb_used = (stack_top - stack_pointer)>>10
-if stack_pointer < hard_limit:
+if stack_pointer < bottom_of_machine_stack:
+    pass # Our stack limits could be wrong so it is safest to do nothing.
+elif stack_pointer < hard_limit:
     FatalError(f"Unrecoverable stack overflow (used {kb_used} kB)")
 elif stack_pointer < soft_limit:
     raise RecursionError(f"Stack overflow (used {kb_used} kB)")
 ```
 
+### User space threads and other oddities
+
+Some libraries provide user-space threads. These will change the C stack at runtime.
+To guard against this we only raise if the stack pointer is in the window between the expected stack base and the soft limit.
+
 ### Diagnosing and fixing stack overflows
 
 For stack protection to work correctly the amount of stack consumed between calls to `_Py_EnterRecursiveCall()` must be less than `_PyOS_STACK_MARGIN_BYTES`.
diff --git a/Lib/_colorize.py b/Lib/_colorize.py
index 57b712bc068..29d7cc67b6e 100644
--- a/Lib/_colorize.py
+++ b/Lib/_colorize.py
@@ -1,4 +1,3 @@
-import io
 import os
 import sys
 
@@ -169,6 +168,8 @@ class Argparse(ThemeSection):
     short_option: str = ANSIColors.BOLD_GREEN
     label: str = ANSIColors.BOLD_YELLOW
     action: str = ANSIColors.BOLD_GREEN
+    default: str = ANSIColors.GREY
+    default_value: str = ANSIColors.YELLOW
     reset: str = ANSIColors.RESET
     error: str = ANSIColors.BOLD_MAGENTA
     warning: str = ANSIColors.BOLD_YELLOW
@@ -330,7 +331,7 @@ def _safe_getenv(k: str, fallback: str | None = None) -> str | None:
 
     try:
         return os.isatty(file.fileno())
-    except io.UnsupportedOperation:
+    except OSError:
         return hasattr(file, "isatty") and file.isatty()
 
 
diff --git a/Lib/_pyrepl/simple_interact.py b/Lib/_pyrepl/simple_interact.py
index ff1bdab9fea..3b0debf2ba0 100644
--- a/Lib/_pyrepl/simple_interact.py
+++ b/Lib/_pyrepl/simple_interact.py
@@ -31,7 +31,6 @@
 import sys
 import code
 import warnings
-import errno
 
 from .readline import _get_reader, multiline_input, append_history_file
 
diff --git a/Lib/annotationlib.py b/Lib/annotationlib.py
index 33907b1fc2a..a5788cdbfae 100644
--- a/Lib/annotationlib.py
+++ b/Lib/annotationlib.py
@@ -844,14 +844,9 @@ def call_annotate_function(annotate, format, *, owner=None, _is_evaluate=False):
 def _build_closure(annotate, owner, is_class, stringifier_dict, *, allow_evaluation):
     if not annotate.__closure__:
         return None, None
-    freevars = annotate.__code__.co_freevars
     new_closure = []
     cell_dict = {}
-    for i, cell in enumerate(annotate.__closure__):
-        if i < len(freevars):
-            name = freevars[i]
-        else:
-            name = "__cell__"
+    for name, cell in zip(annotate.__code__.co_freevars, annotate.__closure__, strict=True):
         cell_dict[name] = cell
         new_cell = None
         if allow_evaluation:
diff --git a/Lib/argparse.py b/Lib/argparse.py
index 6b79747572f..1d550264ae4 100644
--- a/Lib/argparse.py
+++ b/Lib/argparse.py
@@ -89,8 +89,8 @@
 import os as _os
 import re as _re
 import sys as _sys
-
-from gettext import gettext as _, ngettext
+from gettext import gettext as _
+from gettext import ngettext
 
 SUPPRESS = '==SUPPRESS=='
 
@@ -166,7 +166,6 @@ def __init__(
         indent_increment=2,
         max_help_position=24,
         width=None,
-        color=True,
     ):
         # default setting for width
         if width is None:
@@ -174,7 +173,6 @@ def __init__(
             width = shutil.get_terminal_size().columns
             width -= 2
 
-        self._set_color(color)
         self._prog = prog
         self._indent_increment = indent_increment
         self._max_help_position = min(max_help_position,
@@ -191,10 +189,12 @@ def __init__(
         self._whitespace_matcher = _re.compile(r'\s+', _re.ASCII)
         self._long_break_matcher = _re.compile(r'\n\n\n+')
 
-    def _set_color(self, color):
+        self._set_color(False)
+
+    def _set_color(self, color, *, file=None):
         from _colorize import can_colorize, decolor, get_theme
 
-        if color and can_colorize():
+        if color and can_colorize(file=file):
             self._theme = get_theme(force_color=True).argparse
             self._decolor = decolor
         else:
@@ -336,27 +336,17 @@ def _format_usage(self, usage, actions, groups, prefix):
         elif usage is None:
             prog = '%(prog)s' % dict(prog=self._prog)
 
-            # split optionals from positionals
-            optionals = []
-            positionals = []
-            for action in actions:
-                if action.option_strings:
-                    optionals.append(action)
-                else:
-                    positionals.append(action)
-
+            parts, pos_start = self._get_actions_usage_parts(actions, groups)
             # build full usage string
-            format = self._format_actions_usage
-            action_usage = format(optionals + positionals, groups)
-            usage = ' '.join([s for s in [prog, action_usage] if s])
+            usage = ' '.join(filter(None, [prog, *parts]))
 
             # wrap the usage parts if it's too long
             text_width = self._width - self._current_indent
             if len(prefix) + len(self._decolor(usage)) > text_width:
 
                 # break usage into wrappable parts
-                opt_parts = self._get_actions_usage_parts(optionals, groups)
-                pos_parts = self._get_actions_usage_parts(positionals, groups)
+                opt_parts = parts[:pos_start]
+                pos_parts = parts[pos_start:]
 
                 # helper for wrapping lines
                 def get_lines(parts, indent, prefix=None):
@@ -413,110 +403,114 @@ def get_lines(parts, indent, prefix=None):
         # prefix with 'usage:'
         return f'{t.usage}{prefix}{t.reset}{usage}\n\n'
 
-    def _format_actions_usage(self, actions, groups):
-        return ' '.join(self._get_actions_usage_parts(actions, groups))
-
     def _is_long_option(self, string):
         return len(string) > 2
 
     def _get_actions_usage_parts(self, actions, groups):
-        # find group indices and identify actions in groups
-        group_actions = set()
-        inserts = {}
+        """Get usage parts with split index for optionals/positionals.
+
+        Returns (parts, pos_start) where pos_start is the index in parts
+        where positionals begin.
+        This preserves mutually exclusive group formatting across the
+        optionals/positionals boundary (gh-75949).
+        """
+        actions = [action for action in actions if action.help is not SUPPRESS]
+        # group actions by mutually exclusive groups
+        action_groups = dict.fromkeys(actions)
         for group in groups:
-            if not group._group_actions:
-                raise ValueError(f'empty group {group}')
-
-            if all(action.help is SUPPRESS for action in group._group_actions):
-                continue
-
-            try:
-                start = min(actions.index(item) for item in group._group_actions)
-            except ValueError:
-                continue
-            else:
-                end = start + len(group._group_actions)
-                if set(actions[start:end]) == set(group._group_actions):
-                    group_actions.update(group._group_actions)
-                    inserts[start, end] = group
+            for action in group._group_actions:
+                if action in action_groups:
+                    action_groups[action] = group
+        # positional arguments keep their position
+        positionals = []
+        for action in actions:
+            if not action.option_strings:
+                group = action_groups.pop(action)
+                if group:
+                    group_actions = [
+                        action2 for action2 in group._group_actions
+                        if action2.option_strings and
+                           action_groups.pop(action2, None)
+                    ] + [action]
+                    positionals.append((group.required, group_actions))
+                else:
+                    positionals.append((None, [action]))
+        # the remaining optional arguments are sorted by the position of
+        # the first option in the group
+        optionals = []
+        for action in actions:
+            if action.option_strings and action in action_groups:
+                group = action_groups.pop(action)
+                if group:
+                    group_actions = [action] + [
+                        action2 for action2 in group._group_actions
+                        if action2.option_strings and
+                           action_groups.pop(action2, None)
+                    ]
+                    optionals.append((group.required, group_actions))
+                else:
+                    optionals.append((None, [action]))
 
         # collect all actions format strings
         parts = []
         t = self._theme
-        for action in actions:
+        pos_start = None
+        for i, (required, group) in enumerate(optionals + positionals):
+            start = len(parts)
+            if i == len(optionals):
+                pos_start = start
+            in_group = len(group) > 1
+            for action in group:
+                # produce all arg strings
+                if not action.option_strings:
+                    default = self._get_default_metavar_for_positional(action)
+                    part = self._format_args(action, default)
+                    # if it's in a group, strip the outer []
+                    if in_group:
+                        if part[0] == '[' and part[-1] == ']':
+                            part = part[1:-1]
+                    part = t.summary_action + part + t.reset
 
-            # suppressed arguments are marked with None
-            if action.help is SUPPRESS:
-                part = None
-
-            # produce all arg strings
-            elif not action.option_strings:
-                default = self._get_default_metavar_for_positional(action)
-                part = (
-                    t.summary_action
-                    + self._format_args(action, default)
-                    + t.reset
-                )
-
-                # if it's in a group, strip the outer []
-                if action in group_actions:
-                    if part[0] == '[' and part[-1] == ']':
-                        part = part[1:-1]
-
-            # produce the first way to invoke the option in brackets
-            else:
-                option_string = action.option_strings[0]
-                if self._is_long_option(option_string):
-                    option_color = t.summary_long_option
+                # produce the first way to invoke the option in brackets
                 else:
-                    option_color = t.summary_short_option
+                    option_string = action.option_strings[0]
+                    if self._is_long_option(option_string):
+                        option_color = t.summary_long_option
+                    else:
+                        option_color = t.summary_short_option
 
-                # if the Optional doesn't take a value, format is:
-                #    -s or --long
-                if action.nargs == 0:
-                    part = action.format_usage()
-                    part = f"{option_color}{part}{t.reset}"
+                    # if the Optional doesn't take a value, format is:
+                    #    -s or --long
+                    if action.nargs == 0:
+                        part = action.format_usage()
+                        part = f"{option_color}{part}{t.reset}"
 
-                # if the Optional takes a value, format is:
-                #    -s ARGS or --long ARGS
-                else:
-                    default = self._get_default_metavar_for_optional(action)
-                    args_string = self._format_args(action, default)
-                    part = (
-                        f"{option_color}{option_string} "
-                        f"{t.summary_label}{args_string}{t.reset}"
-                    )
+                    # if the Optional takes a value, format is:
+                    #    -s ARGS or --long ARGS
+                    else:
+                        default = self._get_default_metavar_for_optional(action)
+                        args_string = self._format_args(action, default)
+                        part = (
+                            f"{option_color}{option_string} "
+                            f"{t.summary_label}{args_string}{t.reset}"
+                        )
 
-                # make it look optional if it's not required or in a group
-                if not action.required and action not in group_actions:
-                    part = '[%s]' % part
+                    # make it look optional if it's not required or in a group
+                    if not (action.required or required or in_group):
+                        part = '[%s]' % part
 
-            # add the action string to the list
-            parts.append(part)
+                # add the action string to the list
+                parts.append(part)
 
-        # group mutually exclusive actions
-        inserted_separators_indices = set()
-        for start, end in sorted(inserts, reverse=True):
-            group = inserts[start, end]
-            group_parts = [item for item in parts[start:end] if item is not None]
-            group_size = len(group_parts)
-            if group.required:
-                open, close = "()" if group_size > 1 else ("", "")
-            else:
-                open, close = "[]"
-            group_parts[0] = open + group_parts[0]
-            group_parts[-1] = group_parts[-1] + close
-            for i, part in enumerate(group_parts[:-1], start=start):
-                # insert a separator if not already done in a nested group
-                if i not in inserted_separators_indices:
-                    parts[i] = part + ' |'
-                    inserted_separators_indices.add(i)
-            parts[start + group_size - 1] = group_parts[-1]
-            for i in range(start + group_size, end):
-                parts[i] = None
+            if in_group:
+                parts[start] = ('(' if required else '[') + parts[start]
+                for i in range(start, len(parts) - 1):
+                    parts[i] += ' |'
+                parts[-1] += ')' if required else ']'
 
-        # return the usage parts
-        return [item for item in parts if item is not None]
+        if pos_start is None:
+            pos_start = len(parts)
+        return parts, pos_start
 
     def _format_text(self, text):
         if '%(prog)' in text:
@@ -748,7 +742,14 @@ def _get_help_string(self, action):
             if action.default is not SUPPRESS:
                 defaulting_nargs = [OPTIONAL, ZERO_OR_MORE]
                 if action.option_strings or action.nargs in defaulting_nargs:
-                    help += _(' (default: %(default)s)')
+                    t = self._theme
+                    default_str = _(" (default: %(default)s)")
+                    prefix, suffix = default_str.split("%(default)s")
+                    help += (
+                        f" {t.default}{prefix.lstrip()}"
+                        f"{t.default_value}%(default)s"
+                        f"{t.default}{suffix}{t.reset}"
+                    )
         return help
 
 
@@ -932,15 +933,26 @@ def __init__(self,
                  deprecated=False):
 
         _option_strings = []
+        neg_option_strings = []
         for option_string in option_strings:
             _option_strings.append(option_string)
 
-            if option_string.startswith('--'):
-                if option_string.startswith('--no-'):
+            if len(option_string) > 2 and option_string[0] == option_string[1]:
+                # two-dash long option: '--foo' -> '--no-foo'
+                if option_string.startswith('no-', 2):
                     raise ValueError(f'invalid option name {option_string!r} '
                                      f'for BooleanOptionalAction')
-                option_string = '--no-' + option_string[2:]
+                option_string = option_string[:2] + 'no-' + option_string[2:]
                 _option_strings.append(option_string)
+                neg_option_strings.append(option_string)
+            elif len(option_string) > 2 and option_string[0] != option_string[1]:
+                # single-dash long option: '-foo' -> '-nofoo'
+                if option_string.startswith('no', 1):
+                    raise ValueError(f'invalid option name {option_string!r} '
+                                     f'for BooleanOptionalAction')
+                option_string = option_string[:1] + 'no' + option_string[1:]
+                _option_strings.append(option_string)
+                neg_option_strings.append(option_string)
 
         super().__init__(
             option_strings=_option_strings,
@@ -950,11 +962,12 @@ def __init__(self,
             required=required,
             help=help,
             deprecated=deprecated)
+        self.neg_option_strings = neg_option_strings
 
 
     def __call__(self, parser, namespace, values, option_string=None):
         if option_string in self.option_strings:
-            setattr(namespace, self.dest, not option_string.startswith('--no-'))
+            setattr(namespace, self.dest, option_string not in self.neg_option_strings)
 
     def format_usage(self):
         return ' | '.join(self.option_strings)
@@ -1551,8 +1564,8 @@ def add_argument(self, *args, **kwargs):
                             f'instance of it must be passed')
 
         # raise an error if the metavar does not match the type
-        if hasattr(self, "_get_formatter"):
-            formatter = self._get_formatter()
+        if hasattr(self, "_get_validation_formatter"):
+            formatter = self._get_validation_formatter()
             try:
                 formatter._format_args(action, None)
             except TypeError:
@@ -1660,29 +1673,35 @@ def _get_positional_kwargs(self, dest, **kwargs):
     def _get_optional_kwargs(self, *args, **kwargs):
         # determine short and long option strings
         option_strings = []
-        long_option_strings = []
         for option_string in args:
             # error on strings that don't start with an appropriate prefix
-            if not option_string[0] in self.prefix_chars:
+            if option_string[0] not in self.prefix_chars:
                 raise ValueError(
                     f'invalid option string {option_string!r}: '
                     f'must start with a character {self.prefix_chars!r}')
-
-            # strings starting with two prefix characters are long options
             option_strings.append(option_string)
-            if len(option_string) > 1 and option_string[1] in self.prefix_chars:
-                long_option_strings.append(option_string)
 
         # infer destination, '--foo-bar' -> 'foo_bar' and '-x' -> 'x'
         dest = kwargs.pop('dest', None)
         if dest is None:
-            if long_option_strings:
-                dest_option_string = long_option_strings[0]
-            else:
-                dest_option_string = option_strings[0]
-            dest = dest_option_string.lstrip(self.prefix_chars)
+            priority = 0
+            for option_string in option_strings:
+                if len(option_string) <= 2:
+                    # short option: '-x' -> 'x'
+                    if priority < 1:
+                        dest = option_string.lstrip(self.prefix_chars)
+                        priority = 1
+                elif option_string[1] not in self.prefix_chars:
+                    # single-dash long option: '-foo' -> 'foo'
+                    if priority < 2:
+                        dest = option_string.lstrip(self.prefix_chars)
+                        priority = 2
+                else:
+                    # two-dash long option: '--foo' -> 'foo'
+                    dest = option_string.lstrip(self.prefix_chars)
+                    break
             if not dest:
-                msg = f'dest= is required for options like {option_string!r}'
+                msg = f'dest= is required for options like {repr(option_strings)[1:-1]}'
                 raise TypeError(msg)
             dest = dest.replace('-', '_')
 
@@ -1740,8 +1759,8 @@ def _handle_conflict_resolve(self, action, conflicting_actions):
                 action.container._remove_action(action)
 
     def _check_help(self, action):
-        if action.help and hasattr(self, "_get_formatter"):
-            formatter = self._get_formatter()
+        if action.help and hasattr(self, "_get_validation_formatter"):
+            formatter = self._get_validation_formatter()
             try:
                 formatter._expand_help(action)
             except (ValueError, TypeError, KeyError) as exc:
@@ -1896,6 +1915,9 @@ def __init__(self,
         self.suggest_on_error = suggest_on_error
         self.color = color
 
+        # Cached formatter for validation (avoids repeated _set_color calls)
+        self._cached_formatter = None
+
         add_group = self.add_argument_group
         self._positionals = add_group(_('positional arguments'))
         self._optionals = add_group(_('options'))
@@ -1957,14 +1979,16 @@ def add_subparsers(self, **kwargs):
             self._subparsers = self._positionals
 
         # prog defaults to the usage message of this parser, skipping
-        # optional arguments and with no "usage:" prefix
+        # non-required optional arguments and with no "usage:" prefix
         if kwargs.get('prog') is None:
             # Create formatter without color to avoid storing ANSI codes in prog
             formatter = self.formatter_class(prog=self.prog)
             formatter._set_color(False)
             positionals = self._get_positional_actions()
+            required_optionals = [action for action in self._get_optional_actions()
+                                  if action.required]
             groups = self._mutually_exclusive_groups
-            formatter.add_usage(None, positionals, groups, '')
+            formatter.add_usage(None, required_optionals + positionals, groups, '')
             kwargs['prog'] = formatter.format_help().strip()
 
         # create the parsers action and add it to the positionals list
@@ -2431,7 +2455,7 @@ def _parse_optional(self, arg_string):
             return None
 
         # if it doesn't start with a prefix, it was meant to be positional
-        if not arg_string[0] in self.prefix_chars:
+        if arg_string[0] not in self.prefix_chars:
             return None
 
         # if the option string is present in the parser, return the action
@@ -2693,14 +2717,16 @@ def _check_value(self, action, value):
     # Help-formatting methods
     # =======================
 
-    def format_usage(self):
-        formatter = self._get_formatter()
+    def format_usage(self, formatter=None):
+        if formatter is None:
+            formatter = self._get_formatter()
         formatter.add_usage(self.usage, self._actions,
                             self._mutually_exclusive_groups)
         return formatter.format_help()
 
-    def format_help(self):
-        formatter = self._get_formatter()
+    def format_help(self, formatter=None):
+        if formatter is None:
+            formatter = self._get_formatter()
 
         # usage
         formatter.add_usage(self.usage, self._actions,
@@ -2722,11 +2748,18 @@ def format_help(self):
         # determine help from format above
         return formatter.format_help()
 
-    def _get_formatter(self):
+    def _get_formatter(self, file=None):
         formatter = self.formatter_class(prog=self.prog)
-        formatter._set_color(self.color)
+        formatter._set_color(self.color, file=file)
         return formatter
 
+    def _get_validation_formatter(self):
+        # Return cached formatter for read-only validation operations
+        # (_expand_help and _format_args). Avoids repeated slow _set_color calls.
+        if self._cached_formatter is None:
+            self._cached_formatter = self._get_formatter()
+        return self._cached_formatter
+
     # =====================
     # Help-printing methods
     # =====================
@@ -2734,12 +2767,26 @@ def _get_formatter(self):
     def print_usage(self, file=None):
         if file is None:
             file = _sys.stdout
-        self._print_message(self.format_usage(), file)
+        formatter = self._get_formatter(file=file)
+        try:
+            usage_text = self.format_usage(formatter=formatter)
+        except TypeError:
+            # Backward compatibility for formatter classes that
+            # do not accept the 'formatter' keyword argument.
+            usage_text = self.format_usage()
+        self._print_message(usage_text, file)
 
     def print_help(self, file=None):
         if file is None:
             file = _sys.stdout
-        self._print_message(self.format_help(), file)
+        formatter = self._get_formatter(file=file)
+        try:
+            help_text = self.format_help(formatter=formatter)
+        except TypeError:
+            # Backward compatibility for formatter classes that
+            # do not accept the 'formatter' keyword argument.
+            help_text = self.format_help()
+        self._print_message(help_text, file)
 
     def _print_message(self, message, file=None):
         if message:
diff --git a/Lib/asyncio/futures.py b/Lib/asyncio/futures.py
index 6bd00a64478..29652295218 100644
--- a/Lib/asyncio/futures.py
+++ b/Lib/asyncio/futures.py
@@ -389,7 +389,7 @@ def _set_state(future, other):
 
     def _call_check_cancel(destination):
         if destination.cancelled():
-            if source_loop is None or source_loop is dest_loop:
+            if source_loop is None or source_loop is events._get_running_loop():
                 source.cancel()
             else:
                 source_loop.call_soon_threadsafe(source.cancel)
@@ -398,7 +398,7 @@ def _call_set_state(source):
         if (destination.cancelled() and
                 dest_loop is not None and dest_loop.is_closed()):
             return
-        if dest_loop is None or dest_loop is source_loop:
+        if dest_loop is None or dest_loop is events._get_running_loop():
             _set_state(destination, source)
         else:
             if dest_loop.is_closed():
diff --git a/Lib/asyncio/streams.py b/Lib/asyncio/streams.py
index 59e22f523a8..d2db1a930c2 100644
--- a/Lib/asyncio/streams.py
+++ b/Lib/asyncio/streams.py
@@ -667,8 +667,7 @@ async def readuntil(self, separator=b'\n'):
             # adds data which makes separator be found. That's why we check for
             # EOF *after* inspecting the buffer.
             if self._eof:
-                chunk = bytes(self._buffer)
-                self._buffer.clear()
+                chunk = self._buffer.take_bytes()
                 raise exceptions.IncompleteReadError(chunk, None)
 
             # _wait_for_data() will resume reading if stream was paused.
@@ -678,10 +677,9 @@ async def readuntil(self, separator=b'\n'):
             raise exceptions.LimitOverrunError(
                 'Separator is found, but chunk is longer than limit', match_start)
 
-        chunk = self._buffer[:match_end]
-        del self._buffer[:match_end]
+        chunk = self._buffer.take_bytes(match_end)
         self._maybe_resume_transport()
-        return bytes(chunk)
+        return chunk
 
     async def read(self, n=-1):
         """Read up to `n` bytes from the stream.
@@ -716,20 +714,16 @@ async def read(self, n=-1):
             # collect everything in self._buffer, but that would
             # deadlock if the subprocess sends more than self.limit
             # bytes.  So just call self.read(self._limit) until EOF.
-            blocks = []
-            while True:
-                block = await self.read(self._limit)
-                if not block:
-                    break
-                blocks.append(block)
-            return b''.join(blocks)
+            joined = bytearray()
+            while block := await self.read(self._limit):
+                joined += block
+            return joined.take_bytes()
 
         if not self._buffer and not self._eof:
             await self._wait_for_data('read')
 
         # This will work right even if buffer is less than n bytes
-        data = bytes(memoryview(self._buffer)[:n])
-        del self._buffer[:n]
+        data = self._buffer.take_bytes(min(len(self._buffer), n))
 
         self._maybe_resume_transport()
         return data
@@ -760,18 +754,12 @@ async def readexactly(self, n):
 
         while len(self._buffer) < n:
             if self._eof:
-                incomplete = bytes(self._buffer)
-                self._buffer.clear()
+                incomplete = self._buffer.take_bytes()
                 raise exceptions.IncompleteReadError(incomplete, n)
 
             await self._wait_for_data('readexactly')
 
-        if len(self._buffer) == n:
-            data = bytes(self._buffer)
-            self._buffer.clear()
-        else:
-            data = bytes(memoryview(self._buffer)[:n])
-            del self._buffer[:n]
+        data = self._buffer.take_bytes(n)
         self._maybe_resume_transport()
         return data
 
diff --git a/Lib/asyncio/tools.py b/Lib/asyncio/tools.py
index f39e11fdd51..1d463ea09ba 100644
--- a/Lib/asyncio/tools.py
+++ b/Lib/asyncio/tools.py
@@ -1,6 +1,6 @@
 """Tools to analyze tasks running in asyncio programs."""
 
-from collections import defaultdict, namedtuple
+from collections import defaultdict
 from itertools import count
 from enum import Enum
 import sys
diff --git a/Lib/base64.py b/Lib/base64.py
index f95132a4274..341bf8eaf18 100644
--- a/Lib/base64.py
+++ b/Lib/base64.py
@@ -193,7 +193,7 @@ def _b32encode(alphabet, s):
         encoded[-3:] = b'==='
     elif leftover == 4:
         encoded[-1:] = b'='
-    return bytes(encoded)
+    return encoded.take_bytes()
 
 def _b32decode(alphabet, s, casefold=False, map01=None):
     # Delay the initialization of the table to not waste memory
@@ -238,7 +238,7 @@ def _b32decode(alphabet, s, casefold=False, map01=None):
         last = acc.to_bytes(5)  # big endian
         leftover = (43 - 5 * padchars) // 8  # 1: 4, 3: 3, 4: 2, 6: 1
         decoded[-5:] = last[:leftover]
-    return bytes(decoded)
+    return decoded.take_bytes()
 
 
 def b32encode(s):
diff --git a/Lib/cProfile.py b/Lib/cProfile.py
index 4af82f2cb8c..cc6255f61ae 100644
--- a/Lib/cProfile.py
+++ b/Lib/cProfile.py
@@ -9,6 +9,5 @@
 __all__ = ["run", "runctx", "Profile"]
 
 if __name__ == "__main__":
-    import sys
     from profiling.tracing.__main__ import main
     main()
diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py
index 25ac4d1d524..55ffc36ea5b 100644
--- a/Lib/collections/__init__.py
+++ b/Lib/collections/__init__.py
@@ -1542,6 +1542,8 @@ def format_map(self, mapping):
         return self.data.format_map(mapping)
 
     def index(self, sub, start=0, end=_sys.maxsize):
+        if isinstance(sub, UserString):
+            sub = sub.data
         return self.data.index(sub, start, end)
 
     def isalpha(self):
@@ -1610,6 +1612,8 @@ def rfind(self, sub, start=0, end=_sys.maxsize):
         return self.data.rfind(sub, start, end)
 
     def rindex(self, sub, start=0, end=_sys.maxsize):
+        if isinstance(sub, UserString):
+            sub = sub.data
         return self.data.rindex(sub, start, end)
 
     def rjust(self, width, *args):
diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py
index 3ccb7246928..730ced72998 100644
--- a/Lib/dataclasses.py
+++ b/Lib/dataclasses.py
@@ -550,7 +550,12 @@ def __annotate__(format, /):
 
                 new_annotations = {}
                 for k in annotation_fields:
-                    new_annotations[k] = cls_annotations[k]
+                    # gh-142214: The annotation may be missing in unusual dynamic cases.
+                    # If so, just skip it.
+                    try:
+                        new_annotations[k] = cls_annotations[k]
+                    except KeyError:
+                        pass
 
                 if return_type is not MISSING:
                     if format == Format.STRING:
@@ -1399,9 +1404,10 @@ def _add_slots(cls, is_frozen, weakref_slot, defined_fields):
             f.type = ann
 
     # Fix the class reference in the __annotate__ method
-    init_annotate = newcls.__init__.__annotate__
-    if getattr(init_annotate, "__generated_by_dataclasses__", False):
-        _update_func_cell_for__class__(init_annotate, cls, newcls)
+    init = newcls.__init__
+    if init_annotate := getattr(init, "__annotate__", None):
+        if getattr(init_annotate, "__generated_by_dataclasses__", False):
+            _update_func_cell_for__class__(init_annotate, cls, newcls)
 
     return newcls
 
diff --git a/Lib/doctest.py b/Lib/doctest.py
index ad8fb900f69..0fcfa1e3e97 100644
--- a/Lib/doctest.py
+++ b/Lib/doctest.py
@@ -1167,6 +1167,32 @@ def _find_lineno(self, obj, source_lines):
                 if pat.match(source_lines[lineno]):
                     return lineno
 
+        # Handle __test__ string doctests formatted as triple-quoted
+        # strings. Find a non-blank line in the test string and match it
+        # in the source, verifying subsequent lines also match to handle
+        # duplicate lines.
+        if isinstance(obj, str) and source_lines is not None:
+            obj_lines = obj.splitlines(keepends=True)
+            # Skip the first line (may be on same line as opening quotes)
+            # and any blank lines to find a meaningful line to match.
+            start_index = 1
+            while (start_index < len(obj_lines)
+                   and not obj_lines[start_index].strip()):
+                start_index += 1
+            if start_index < len(obj_lines):
+                target_line = obj_lines[start_index]
+                for lineno, source_line in enumerate(source_lines):
+                    if source_line == target_line:
+                        # Verify subsequent lines also match
+                        for i in range(start_index + 1, len(obj_lines) - 1):
+                            source_idx = lineno + i - start_index
+                            if source_idx >= len(source_lines):
+                                break
+                            if obj_lines[i] != source_lines[source_idx]:
+                                break
+                        else:
+                            return lineno - start_index
+
         # We couldn't find the line number.
         return None
 
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index c7f665b3990..cbff9694742 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -2792,6 +2792,9 @@ def _steal_trailing_WSP_if_exists(lines):
     if lines and lines[-1] and lines[-1][-1] in WSP:
         wsp = lines[-1][-1]
         lines[-1] = lines[-1][:-1]
+        # gh-142006: if the line is now empty, remove it entirely.
+        if not lines[-1]:
+            lines.pop()
     return wsp
 
 def _refold_parse_tree(parse_tree, *, policy):
diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py
index 6479b9bab7a..ae8ef32792b 100644
--- a/Lib/email/feedparser.py
+++ b/Lib/email/feedparser.py
@@ -504,10 +504,9 @@ def _parse_headers(self, lines):
                     self._input.unreadline(line)
                     return
                 else:
-                    # Weirdly placed unix-from line.  Note this as a defect
-                    # and ignore it.
+                    # Weirdly placed unix-from line.
                     defect = errors.MisplacedEnvelopeHeaderDefect(line)
-                    self._cur.defects.append(defect)
+                    self.policy.handle_defect(self._cur, defect)
                     continue
             # Split the line on the colon separating field name from value.
             # There will always be a colon, because if there wasn't the part of
@@ -519,7 +518,7 @@ def _parse_headers(self, lines):
             # message. Track the error but keep going.
             if i == 0:
                 defect = errors.InvalidHeaderDefect("Missing header name.")
-                self._cur.defects.append(defect)
+                self.policy.handle_defect(self._cur, defect)
                 continue
 
             assert i>0, "_parse_headers fed line with no : and no leading WS"
diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py
index 0c90b4c9fe1..d31ee07ab45 100644
--- a/Lib/encodings/idna.py
+++ b/Lib/encodings/idna.py
@@ -226,7 +226,8 @@ def encode(self, input, errors='strict'):
                     offset + exc.end,
                     exc.reason,
                 )
-        return bytes(result+trailing_dot), len(input)
+        result += trailing_dot
+        return result.take_bytes(), len(input)
 
     def decode(self, input, errors='strict'):
 
@@ -311,7 +312,7 @@ def _buffer_encode(self, input, errors, final):
 
         result += trailing_dot
         size += len(trailing_dot)
-        return (bytes(result), size)
+        return (result.take_bytes(), size)
 
 class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
     def _buffer_decode(self, input, errors, final):
diff --git a/Lib/encodings/punycode.py b/Lib/encodings/punycode.py
index 4622fc8c920..268fccbd539 100644
--- a/Lib/encodings/punycode.py
+++ b/Lib/encodings/punycode.py
@@ -17,7 +17,7 @@ def segregate(str):
         else:
             extended.add(c)
     extended = sorted(extended)
-    return bytes(base), extended
+    return base.take_bytes(), extended
 
 def selective_len(str, max):
     """Return the length of str, considering only characters below max."""
@@ -83,7 +83,7 @@ def generate_generalized_integer(N, bias):
         t = T(j, bias)
         if N < t:
             result.append(digits[N])
-            return bytes(result)
+            return result.take_bytes()
         result.append(digits[t + ((N - t) % (36 - t))])
         N = (N - t) // (36 - t)
         j += 1
@@ -112,7 +112,7 @@ def generate_integers(baselen, deltas):
         s = generate_generalized_integer(delta, bias)
         result.extend(s)
         bias = adapt(delta, points==0, baselen+points+1)
-    return bytes(result)
+    return result.take_bytes()
 
 def punycode_encode(text):
     base, extended = segregate(text)
diff --git a/Lib/html/parser.py b/Lib/html/parser.py
index e50620de800..80fb8c3f929 100644
--- a/Lib/html/parser.py
+++ b/Lib/html/parser.py
@@ -24,6 +24,7 @@
 
 entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
 charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]')
+incomplete_charref = re.compile('&#(?:[0-9]|[xX][0-9a-fA-F])')
 attr_charref = re.compile(r'&(#[0-9]+|#[xX][0-9a-fA-F]+|[a-zA-Z][a-zA-Z0-9]*)[;=]?')
 
 starttagopen = re.compile('<[a-zA-Z]')
@@ -304,10 +305,20 @@ def goahead(self, end):
                         k = k - 1
                     i = self.updatepos(i, k)
                     continue
+                match = incomplete_charref.match(rawdata, i)
+                if match:
+                    if end:
+                        self.handle_charref(rawdata[i+2:])
+                        i = self.updatepos(i, n)
+                        break
+                    # incomplete
+                    break
+                elif i + 3 < n:  # larger than "&#x"
+                    # not the end of the buffer, and can't be confused
+                    # with some other construct
+                    self.handle_data("&#")
+                    i = self.updatepos(i, i + 2)
                 else:
-                    if ";" in rawdata[i:]:  # bail by consuming &#
-                        self.handle_data(rawdata[i:i+2])
-                        i = self.updatepos(i, i+2)
                     break
             elif startswith('&', i):
                 match = entityref.match(rawdata, i)
@@ -321,15 +332,13 @@ def goahead(self, end):
                     continue
                 match = incomplete.match(rawdata, i)
                 if match:
-                    # match.group() will contain at least 2 chars
-                    if end and match.group() == rawdata[i:]:
-                        k = match.end()
-                        if k <= i:
-                            k = n
-                        i = self.updatepos(i, i + 1)
+                    if end:
+                        self.handle_entityref(rawdata[i+1:])
+                        i = self.updatepos(i, n)
+                        break
                     # incomplete
                     break
-                elif (i + 1) < n:
+                elif i + 1 < n:
                     # not the end of the buffer, and can't be confused
                     # with some other construct
                     self.handle_data("&")
diff --git a/Lib/http/client.py b/Lib/http/client.py
index 4b9a61cfc11..73c3256734a 100644
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -111,6 +111,11 @@
 _MAXLINE = 65536
 _MAXHEADERS = 100
 
+# Data larger than this will be read in chunks, to prevent extreme
+# overallocation.
+_MIN_READ_BUF_SIZE = 1 << 20
+
+
 # Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2)
 #
 # VCHAR          = %x21-7E
@@ -642,10 +647,25 @@ def _safe_read(self, amt):
         reading. If the bytes are truly not available (due to EOF), then the
         IncompleteRead exception can be used to detect the problem.
         """
-        data = self.fp.read(amt)
-        if len(data) < amt:
-            raise IncompleteRead(data, amt-len(data))
-        return data
+        cursize = min(amt, _MIN_READ_BUF_SIZE)
+        data = self.fp.read(cursize)
+        if len(data) >= amt:
+            return data
+        if len(data) < cursize:
+            raise IncompleteRead(data, amt - len(data))
+
+        data = io.BytesIO(data)
+        data.seek(0, 2)
+        while True:
+            # This is a geometric increase in read size (never more than
+            # doubling out the current length of data per loop iteration).
+            delta = min(cursize, amt - cursize)
+            data.write(self.fp.read(delta))
+            if data.tell() >= amt:
+                return data.getvalue()
+            cursize += delta
+            if data.tell() < cursize:
+                raise IncompleteRead(data.getvalue(), amt - data.tell())
 
     def _safe_readinto(self, b):
         """Same as _safe_read, but for reading into a buffer."""
diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py
index 192c0261408..2f9307cba4f 100644
--- a/Lib/importlib/_bootstrap_external.py
+++ b/Lib/importlib/_bootstrap_external.py
@@ -208,12 +208,8 @@ def _write_atomic(path, data, mode=0o666):
     try:
         # We first write data to a temporary file, and then use os.replace() to
         # perform an atomic rename.
-        with _io.FileIO(fd, 'wb') as file:
-            bytes_written = file.write(data)
-        if bytes_written != len(data):
-            # Raise an OSError so the 'except' below cleans up the partially
-            # written file.
-            raise OSError("os.write() didn't write the full pyc file")
+        with _io.open(fd, 'wb') as file:
+            file.write(data)
         _os.replace(path_tmp, path)
     except OSError:
         try:
diff --git a/Lib/json/encoder.py b/Lib/json/encoder.py
index 5cf6d64f3ea..4c70e8b75ed 100644
--- a/Lib/json/encoder.py
+++ b/Lib/json/encoder.py
@@ -264,17 +264,6 @@ def floatstr(o, allow_nan=self.allow_nan,
 
 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
         _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
-        ## HACK: hand-optimized bytecode; turn globals into locals
-        ValueError=ValueError,
-        dict=dict,
-        float=float,
-        id=id,
-        int=int,
-        isinstance=isinstance,
-        list=list,
-        str=str,
-        tuple=tuple,
-        _intstr=int.__repr__,
     ):
 
     def _iterencode_list(lst, _current_indent_level):
@@ -311,7 +300,7 @@ def _iterencode_list(lst, _current_indent_level):
                     # Subclasses of int/float may override __repr__, but we still
                     # want to encode them as integers/floats in JSON. One example
                     # within the standard library is IntEnum.
-                    yield buf + _intstr(value)
+                    yield buf + int.__repr__(value)
                 elif isinstance(value, float):
                     # see comment above for int
                     yield buf + _floatstr(value)
@@ -374,7 +363,7 @@ def _iterencode_dict(dct, _current_indent_level):
                 key = 'null'
             elif isinstance(key, int):
                 # see comment for int/float in _make_iterencode
-                key = _intstr(key)
+                key = int.__repr__(key)
             elif _skipkeys:
                 continue
             else:
@@ -399,7 +388,7 @@ def _iterencode_dict(dct, _current_indent_level):
                     yield 'false'
                 elif isinstance(value, int):
                     # see comment for int/float in _make_iterencode
-                    yield _intstr(value)
+                    yield int.__repr__(value)
                 elif isinstance(value, float):
                     # see comment for int/float in _make_iterencode
                     yield _floatstr(value)
@@ -434,7 +423,7 @@ def _iterencode(o, _current_indent_level):
             yield 'false'
         elif isinstance(o, int):
             # see comment for int/float in _make_iterencode
-            yield _intstr(o)
+            yield int.__repr__(o)
         elif isinstance(o, float):
             # see comment for int/float in _make_iterencode
             yield _floatstr(o)
@@ -458,4 +447,13 @@ def _iterencode(o, _current_indent_level):
                 raise
             if markers is not None:
                 del markers[markerid]
-    return _iterencode
+
+    def _iterencode_once(o, _current_indent_level):
+        nonlocal _iterencode, _iterencode_dict, _iterencode_list
+        try:
+            yield from _iterencode(o, _current_indent_level)
+        finally:
+            # Break reference cycles due to mutually recursive closures:
+            del _iterencode, _iterencode_dict, _iterencode_list
+
+    return _iterencode_once
diff --git a/Lib/multiprocessing/queues.py b/Lib/multiprocessing/queues.py
index 925f0439000..981599acf5e 100644
--- a/Lib/multiprocessing/queues.py
+++ b/Lib/multiprocessing/queues.py
@@ -121,7 +121,7 @@ def get(self, block=True, timeout=None):
 
     def qsize(self):
         # Raises NotImplementedError on Mac OSX because of broken sem_getvalue()
-        return self._maxsize - self._sem._semlock._get_value()
+        return self._maxsize - self._sem.get_value()
 
     def empty(self):
         return not self._poll()
diff --git a/Lib/multiprocessing/resource_tracker.py b/Lib/multiprocessing/resource_tracker.py
index b0f9099f4a5..3606d1effb4 100644
--- a/Lib/multiprocessing/resource_tracker.py
+++ b/Lib/multiprocessing/resource_tracker.py
@@ -68,6 +68,13 @@ def __init__(self):
         self._exitcode = None
         self._reentrant_messages = deque()
 
+        # True to use colon-separated lines, rather than JSON lines,
+        # for internal communication. (Mainly for testing).
+        # Filenames not supported by the simple format will always be sent
+        # using JSON.
+        # The reader should understand all formats.
+        self._use_simple_format = False
+
     def _reentrant_call_error(self):
         # gh-109629: this happens if an explicit call to the ResourceTracker
         # gets interrupted by a garbage collection, invoking a finalizer (*)
@@ -200,7 +207,9 @@ def _launch(self):
             os.close(r)
 
     def _make_probe_message(self):
-        """Return a JSON-encoded probe message."""
+        """Return a probe message."""
+        if self._use_simple_format:
+            return b'PROBE:0:noop\n'
         return (
             json.dumps(
                 {"cmd": "PROBE", "rtype": "noop"},
@@ -267,6 +276,15 @@ def _write(self, msg):
         assert nbytes == len(msg), f"{nbytes=} != {len(msg)=}"
 
     def _send(self, cmd, name, rtype):
+        if self._use_simple_format and '\n' not in name:
+            msg = f"{cmd}:{name}:{rtype}\n".encode("ascii")
+            if len(msg) > 512:
+                # posix guarantees that writes to a pipe of less than PIPE_BUF
+                # bytes are atomic, and that PIPE_BUF >= 512
+                raise ValueError('msg too long')
+            self._ensure_running_and_write(msg)
+            return
+
         # POSIX guarantees that writes to a pipe of less than PIPE_BUF (512 on Linux)
         # bytes are atomic. Therefore, we want the message to be shorter than 512 bytes.
         # POSIX shm_open() and sem_open() require the name, including its leading slash,
@@ -286,6 +304,7 @@ def _send(self, cmd, name, rtype):
 
         # The entire JSON message is guaranteed < PIPE_BUF (512 bytes) by construction.
         assert len(msg) <= 512, f"internal error: message too long ({len(msg)} bytes)"
+        assert msg.startswith(b'{')
 
         self._ensure_running_and_write(msg)
 
@@ -296,6 +315,30 @@ def _send(self, cmd, name, rtype):
 getfd = _resource_tracker.getfd
 
 
+def _decode_message(line):
+    if line.startswith(b'{'):
+        try:
+            obj = json.loads(line.decode('ascii'))
+        except Exception as e:
+            raise ValueError("malformed resource_tracker message: %r" % (line,)) from e
+
+        cmd = obj["cmd"]
+        rtype = obj["rtype"]
+        b64  = obj.get("base64_name", "")
+
+        if not isinstance(cmd, str) or not isinstance(rtype, str) or not isinstance(b64, str):
+            raise ValueError("malformed resource_tracker fields: %r" % (obj,))
+
+        try:
+            name = base64.urlsafe_b64decode(b64).decode('utf-8', 'surrogateescape')
+        except ValueError as e:
+            raise ValueError("malformed resource_tracker base64_name: %r" % (b64,)) from e
+    else:
+        cmd, rest = line.strip().decode('ascii').split(':', maxsplit=1)
+        name, rtype = rest.rsplit(':', maxsplit=1)
+    return cmd, rtype, name
+
+
 def main(fd):
     '''Run resource tracker.'''
     # protect the process from ^C and "killall python" etc
@@ -318,23 +361,7 @@ def main(fd):
         with open(fd, 'rb') as f:
             for line in f:
                 try:
-                    try:
-                        obj = json.loads(line.decode('ascii'))
-                    except Exception as e:
-                        raise ValueError("malformed resource_tracker message: %r" % (line,)) from e
-
-                    cmd = obj["cmd"]
-                    rtype = obj["rtype"]
-                    b64  = obj.get("base64_name", "")
-
-                    if not isinstance(cmd, str) or not isinstance(rtype, str) or not isinstance(b64, str):
-                        raise ValueError("malformed resource_tracker fields: %r" % (obj,))
-
-                    try:
-                        name = base64.urlsafe_b64decode(b64).decode('utf-8', 'surrogateescape')
-                    except ValueError as e:
-                        raise ValueError("malformed resource_tracker base64_name: %r" % (b64,)) from e
-
+                    cmd, rtype, name = _decode_message(line)
                     cleanup_func = _CLEANUP_FUNCS.get(rtype, None)
                     if cleanup_func is None:
                         raise ValueError(
diff --git a/Lib/multiprocessing/synchronize.py b/Lib/multiprocessing/synchronize.py
index 30425047e98..9188114ae28 100644
--- a/Lib/multiprocessing/synchronize.py
+++ b/Lib/multiprocessing/synchronize.py
@@ -135,11 +135,16 @@ def __init__(self, value=1, *, ctx):
         SemLock.__init__(self, SEMAPHORE, value, SEM_VALUE_MAX, ctx=ctx)
 
     def get_value(self):
+        '''Returns current value of Semaphore.
+
+        Raises NotImplementedError on Mac OSX
+        because of broken sem_getvalue().
+        '''
         return self._semlock._get_value()
 
     def __repr__(self):
         try:
-            value = self._semlock._get_value()
+            value = self.get_value()
         except Exception:
             value = 'unknown'
         return '<%s(value=%s)>' % (self.__class__.__name__, value)
@@ -155,7 +160,7 @@ def __init__(self, value=1, *, ctx):
 
     def __repr__(self):
         try:
-            value = self._semlock._get_value()
+            value = self.get_value()
         except Exception:
             value = 'unknown'
         return '<%s(value=%s, maxvalue=%s)>' % \
@@ -247,8 +252,8 @@ def _make_methods(self):
 
     def __repr__(self):
         try:
-            num_waiters = (self._sleeping_count._semlock._get_value() -
-                           self._woken_count._semlock._get_value())
+            num_waiters = (self._sleeping_count.get_value() -
+                           self._woken_count.get_value())
         except Exception:
             num_waiters = 'unknown'
         return '<%s(%s, %s)>' % (self.__class__.__name__, self._lock, num_waiters)
diff --git a/Lib/pdb.py b/Lib/pdb.py
index 76bb28d7396..1506e3d4709 100644
--- a/Lib/pdb.py
+++ b/Lib/pdb.py
@@ -130,7 +130,7 @@ def find_first_executable_line(code):
     return code.co_firstlineno
 
 def find_function(funcname, filename):
-    cre = re.compile(r'def\s+%s(\s*\[.+\])?\s*[(]' % re.escape(funcname))
+    cre = re.compile(r'(?:async\s+)?def\s+%s(\s*\[.+\])?\s*[(]' % re.escape(funcname))
     try:
         fp = tokenize.open(filename)
     except OSError:
@@ -346,8 +346,8 @@ def __init__(self, completekey='tab', stdin=None, stdout=None, skip=None,
         bdb.Bdb.__init__(self, skip=skip, backend=backend if backend else get_default_backend())
         cmd.Cmd.__init__(self, completekey, stdin, stdout)
         sys.audit("pdb.Pdb")
-        if stdout:
-            self.use_rawinput = 0
+        if stdin:
+            self.use_rawinput = False
         self.prompt = '(Pdb) '
         self.aliases = {}
         self.displaying = {}
@@ -654,7 +654,7 @@ def _show_display(self):
 
     def _get_tb_and_exceptions(self, tb_or_exc):
         """
-        Given a tracecack or an exception, return a tuple of chained exceptions
+        Given a traceback or an exception, return a tuple of chained exceptions
         and current traceback to inspect.
 
         This will deal with selecting the right ``__cause__`` or ``__context__``
@@ -1487,7 +1487,9 @@ def lineinfo(self, identifier):
             f = self.lookupmodule(parts[0])
             if f:
                 fname = f
-            item = parts[1]
+                item = parts[1]
+            else:
+                return failed
         answer = find_function(item, self.canonic(fname))
         return answer or failed
 
@@ -2429,7 +2431,9 @@ def print_stack_trace(self, count=None):
         except KeyboardInterrupt:
             pass
 
-    def print_stack_entry(self, frame_lineno, prompt_prefix=line_prefix):
+    def print_stack_entry(self, frame_lineno, prompt_prefix=None):
+        if prompt_prefix is None:
+            prompt_prefix = line_prefix
         frame, lineno = frame_lineno
         if frame is self.curframe:
             prefix = '> '
diff --git a/Lib/pickle.py b/Lib/pickle.py
index 729c215514a..f3025776623 100644
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -189,6 +189,11 @@ def __init__(self, value):
 __all__.extend(x for x in dir() if x.isupper() and not x.startswith('_'))
 
 
+# Data larger than this will be read in chunks, to prevent extreme
+# overallocation.
+_MIN_READ_BUF_SIZE = (1 << 20)
+
+
 class _Framer:
 
     _FRAME_SIZE_MIN = 4
@@ -287,7 +292,7 @@ def read(self, n):
                     "pickle exhausted before end of frame")
             return data
         else:
-            return self.file_read(n)
+            return self._chunked_file_read(n)
 
     def readline(self):
         if self.current_frame:
@@ -302,11 +307,23 @@ def readline(self):
         else:
             return self.file_readline()
 
+    def _chunked_file_read(self, size):
+        cursize = min(size, _MIN_READ_BUF_SIZE)
+        b = self.file_read(cursize)
+        while cursize < size and len(b) == cursize:
+            delta = min(cursize, size - cursize)
+            b += self.file_read(delta)
+            cursize += delta
+        return b
+
     def load_frame(self, frame_size):
         if self.current_frame and self.current_frame.read() != b'':
             raise UnpicklingError(
                 "beginning of a new frame before end of current frame")
-        self.current_frame = io.BytesIO(self.file_read(frame_size))
+        data = self._chunked_file_read(frame_size)
+        if len(data) < frame_size:
+            raise EOFError
+        self.current_frame = io.BytesIO(data)
 
 
 # Tools used for pickling.
@@ -1496,12 +1513,17 @@ def load_binbytes8(self):
     dispatch[BINBYTES8[0]] = load_binbytes8
 
     def load_bytearray8(self):
-        len, = unpack('<Q', self.read(8))
-        if len > maxsize:
+        size, = unpack('<Q', self.read(8))
+        if size > maxsize:
             raise UnpicklingError("BYTEARRAY8 exceeds system's maximum size "
                                   "of %d bytes" % maxsize)
-        b = bytearray(len)
-        self.readinto(b)
+        cursize = min(size, _MIN_READ_BUF_SIZE)
+        b = bytearray(cursize)
+        if self.readinto(b) == cursize:
+            while cursize < size and len(b) == cursize:
+                delta = min(cursize, size - cursize)
+                b += self.read(delta)
+                cursize += delta
         self.append(b)
     dispatch[BYTEARRAY8[0]] = load_bytearray8
 
diff --git a/Lib/pickletools.py b/Lib/pickletools.py
index 254b6c7fcc9..29baf3be7eb 100644
--- a/Lib/pickletools.py
+++ b/Lib/pickletools.py
@@ -2839,7 +2839,7 @@ def __init__(self, value):
            }
 
 
-if __name__ == "__main__":
+def _main(args=None):
     import argparse
     parser = argparse.ArgumentParser(
         description='disassemble one or more pickle files',
@@ -2864,7 +2864,7 @@ def __init__(self, value):
         '-p', '--preamble', default="==> {name} <==",
         help='if more than one pickle file is specified, print this before'
         ' each disassembly')
-    args = parser.parse_args()
+    args = parser.parse_args(args)
     annotate = 30 if args.annotate else 0
     memo = {} if args.memo else None
     if args.output is None:
@@ -2885,3 +2885,7 @@ def __init__(self, value):
     finally:
         if output is not sys.stdout:
             output.close()
+
+
+if __name__ == "__main__":
+    _main()
diff --git a/Lib/platform.py b/Lib/platform.py
index 4db93bea2a3..b5017dbdb02 100644
--- a/Lib/platform.py
+++ b/Lib/platform.py
@@ -197,7 +197,7 @@ def libc_ver(executable=None, lib='', version='', chunksize=16384):
         | (GLIBC_([0-9.]+))
         | (libc(_\w+)?\.so(?:\.(\d[0-9.]*))?)
         | (musl-([0-9.]+))
-        | (libc.musl(?:-\w+)?.so(?:\.(\d[0-9.]*))?)
+        | ((?:libc\.|ld-)musl(?:-\w+)?.so(?:\.(\d[0-9.]*))?)
         """,
         re.ASCII | re.VERBOSE)
 
@@ -236,7 +236,7 @@ def libc_ver(executable=None, lib='', version='', chunksize=16384):
                 elif V(glibcversion) > V(ver):
                     ver = glibcversion
             elif so:
-                if lib != 'glibc':
+                if lib not in ('glibc', 'musl'):
                     lib = 'libc'
                     if soversion and (not ver or V(soversion) > V(ver)):
                         ver = soversion
diff --git a/Lib/plistlib.py b/Lib/plistlib.py
index 67e832db217..655c51eea3d 100644
--- a/Lib/plistlib.py
+++ b/Lib/plistlib.py
@@ -73,6 +73,9 @@
 PlistFormat = enum.Enum('PlistFormat', 'FMT_XML FMT_BINARY', module=__name__)
 globals().update(PlistFormat.__members__)
 
+# Data larger than this will be read in chunks, to prevent extreme
+# overallocation.
+_MIN_READ_BUF_SIZE = 1 << 20
 
 class UID:
     def __init__(self, data):
@@ -508,12 +511,24 @@ def _get_size(self, tokenL):
 
         return tokenL
 
+    def _read(self, size):
+        cursize = min(size, _MIN_READ_BUF_SIZE)
+        data = self._fp.read(cursize)
+        while True:
+            if len(data) != cursize:
+                raise InvalidFileException
+            if cursize == size:
+                return data
+            delta = min(cursize, size - cursize)
+            data += self._fp.read(delta)
+            cursize += delta
+
     def _read_ints(self, n, size):
-        data = self._fp.read(size * n)
+        data = self._read(size * n)
         if size in _BINARY_FORMAT:
             return struct.unpack(f'>{n}{_BINARY_FORMAT[size]}', data)
         else:
-            if not size or len(data) != size * n:
+            if not size:
                 raise InvalidFileException()
             return tuple(int.from_bytes(data[i: i + size], 'big')
                          for i in range(0, size * n, size))
@@ -573,22 +588,16 @@ def _read_object(self, ref):
 
         elif tokenH == 0x40:  # data
             s = self._get_size(tokenL)
-            result = self._fp.read(s)
-            if len(result) != s:
-                raise InvalidFileException()
+            result = self._read(s)
 
         elif tokenH == 0x50:  # ascii string
             s = self._get_size(tokenL)
-            data = self._fp.read(s)
-            if len(data) != s:
-                raise InvalidFileException()
+            data = self._read(s)
             result = data.decode('ascii')
 
         elif tokenH == 0x60:  # unicode string
             s = self._get_size(tokenL) * 2
-            data = self._fp.read(s)
-            if len(data) != s:
-                raise InvalidFileException()
+            data = self._read(s)
             result = data.decode('utf-16be')
 
         elif tokenH == 0x80:  # UID
diff --git a/Lib/profiling/sampling/__init__.py b/Lib/profiling/sampling/__init__.py
index b493c6aa7eb..6a0bb5e5c2f 100644
--- a/Lib/profiling/sampling/__init__.py
+++ b/Lib/profiling/sampling/__init__.py
@@ -7,7 +7,8 @@
 from .collector import Collector
 from .pstats_collector import PstatsCollector
 from .stack_collector import CollapsedStackCollector
+from .heatmap_collector import HeatmapCollector
 from .gecko_collector import GeckoCollector
 from .string_table import StringTable
 
-__all__ = ("Collector", "PstatsCollector", "CollapsedStackCollector", "GeckoCollector", "StringTable")
+__all__ = ("Collector", "PstatsCollector", "CollapsedStackCollector", "HeatmapCollector", "GeckoCollector", "StringTable")
diff --git a/Lib/profiling/sampling/__main__.py b/Lib/profiling/sampling/__main__.py
index cd1425b8b9c..47bd3a0113e 100644
--- a/Lib/profiling/sampling/__main__.py
+++ b/Lib/profiling/sampling/__main__.py
@@ -45,7 +45,7 @@
 system restrictions or missing privileges.
 """
 
-from .sample import main
+from .cli import main
 
 def handle_permission_error():
     """Handle PermissionError by displaying appropriate error message."""
diff --git a/Lib/profiling/sampling/_css_utils.py b/Lib/profiling/sampling/_css_utils.py
new file mode 100644
index 00000000000..40912e9b352
--- /dev/null
+++ b/Lib/profiling/sampling/_css_utils.py
@@ -0,0 +1,22 @@
+import importlib.resources
+
+
+def get_combined_css(component: str) -> str:
+    template_dir = importlib.resources.files(__package__)
+
+    base_css = (template_dir / "_shared_assets" / "base.css").read_text(encoding="utf-8")
+
+    if component == "flamegraph":
+        component_css = (
+            template_dir / "_flamegraph_assets" / "flamegraph.css"
+        ).read_text(encoding="utf-8")
+    elif component == "heatmap":
+        component_css = (template_dir / "_heatmap_assets" / "heatmap.css").read_text(
+            encoding="utf-8"
+        )
+    else:
+        raise ValueError(
+            f"Unknown component: {component}. Expected 'flamegraph' or 'heatmap'."
+        )
+
+    return f"{base_css}\n\n{component_css}"
diff --git a/Lib/profiling/sampling/_flamegraph_assets/flamegraph.css b/Lib/profiling/sampling/_flamegraph_assets/flamegraph.css
new file mode 100644
index 00000000000..18d2279da9b
--- /dev/null
+++ b/Lib/profiling/sampling/_flamegraph_assets/flamegraph.css
@@ -0,0 +1,901 @@
+/* ==========================================================================
+   Flamegraph Viewer - Component-Specific CSS
+
+   DEPENDENCY: Requires _shared_assets/base.css to be loaded first
+   This file extends the shared foundation with flamegraph-specific styles.
+   ========================================================================== */
+
+/* --------------------------------------------------------------------------
+   Layout Overrides (Flamegraph-specific)
+   -------------------------------------------------------------------------- */
+
+html, body {
+  height: 100%;
+  overflow: hidden;
+}
+
+.app-layout {
+  height: 100vh;
+}
+
+.main-content {
+  display: flex;
+  flex: 1;
+  min-height: 0;
+}
+
+/* --------------------------------------------------------------------------
+   Search Input (Flamegraph-specific)
+   -------------------------------------------------------------------------- */
+
+.search-wrapper {
+  flex: 1;
+  max-width: 360px;
+  position: relative;
+}
+
+.search-input {
+  width: 100%;
+  padding: 8px 36px 8px 14px;
+  font-family: var(--font-sans);
+  font-size: 13px;
+  color: #2e3338;
+  background: rgba(255, 255, 255, 0.95);
+  border: 2px solid rgba(255, 255, 255, 0.3);
+  border-radius: 20px;
+  outline: none;
+  transition: all var(--transition-fast);
+  box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
+}
+
+.search-input::placeholder {
+  color: #6c757d;
+}
+
+.search-input:focus {
+  border-color: rgba(255, 255, 255, 0.8);
+  background: white;
+  box-shadow: 0 4px 16px rgba(0, 0, 0, 0.15);
+}
+
+/* Dark theme search input */
+[data-theme="dark"] .search-input {
+  color: #e6edf3;
+  background: rgba(33, 38, 45, 0.95);
+  border: 2px solid rgba(88, 166, 255, 0.3);
+  box-shadow: 0 2px 8px rgba(0, 0, 0, 0.3);
+}
+
+[data-theme="dark"] .search-input::placeholder {
+  color: #8b949e;
+}
+
+[data-theme="dark"] .search-input:focus {
+  border-color: rgba(88, 166, 255, 0.6);
+  background: rgba(33, 38, 45, 1);
+  box-shadow: 0 4px 16px rgba(88, 166, 255, 0.2);
+}
+
+.search-input.has-matches {
+  border-color: rgba(40, 167, 69, 0.8);
+  box-shadow: 0 4px 16px rgba(40, 167, 69, 0.2);
+}
+
+.search-input.no-matches {
+  border-color: rgba(220, 53, 69, 0.8);
+  box-shadow: 0 4px 16px rgba(220, 53, 69, 0.2);
+}
+
+.search-clear {
+  position: absolute;
+  right: 10px;
+  top: 50%;
+  transform: translateY(-50%);
+  width: 20px;
+  height: 20px;
+  padding: 0;
+  display: none;
+  align-items: center;
+  justify-content: center;
+  font-size: 14px;
+  line-height: 1;
+  color: #6c757d;
+  background: transparent;
+  border: none;
+  border-radius: 50%;
+  cursor: pointer;
+  transition: color var(--transition-fast);
+}
+
+.search-clear:hover {
+  color: #2e3338;
+}
+
+[data-theme="dark"] .search-clear {
+  color: #8b949e;
+}
+
+[data-theme="dark"] .search-clear:hover {
+  color: #e6edf3;
+}
+
+.search-wrapper.has-value .search-clear {
+  display: flex;
+}
+
+/* --------------------------------------------------------------------------
+   Sidebar
+   -------------------------------------------------------------------------- */
+
+.sidebar {
+  width: var(--sidebar-width);
+  background: var(--bg-secondary);
+  border-right: 1px solid var(--border);
+  display: flex;
+  flex-direction: column;
+  flex-shrink: 0;
+  overflow: hidden;
+  position: relative;
+}
+
+.sidebar.collapsed {
+  width: var(--sidebar-collapsed) !important;
+  transition: width var(--transition-normal);
+}
+
+.sidebar-toggle {
+  position: absolute;
+  top: 12px;
+  right: 10px;
+  width: 26px;
+  height: 26px;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  color: var(--text-muted);
+  background: var(--bg-primary);
+  border: 1px solid var(--border);
+  border-radius: 6px;
+  cursor: pointer;
+  transition: all var(--transition-fast);
+  z-index: 10;
+}
+
+.sidebar-toggle svg {
+  transition: transform var(--transition-fast);
+}
+
+.sidebar-toggle:hover {
+  color: var(--accent);
+  border-color: var(--accent);
+  background: var(--accent-glow);
+}
+
+.sidebar.collapsed .sidebar-toggle {
+  right: 9px;
+}
+
+.sidebar.collapsed .sidebar-toggle svg {
+  transform: rotate(180deg);
+}
+
+.sidebar-content {
+  flex: 1;
+  overflow-y: auto;
+  padding: 44px 14px 14px;
+}
+
+.sidebar.collapsed .sidebar-content {
+  display: none;
+}
+
+.sidebar-resize-handle {
+  position: absolute;
+  top: 0;
+  right: 0;
+  width: 6px;
+  height: 100%;
+  cursor: col-resize;
+  background: transparent;
+  transition: background var(--transition-fast);
+  z-index: 11;
+}
+
+.sidebar-resize-handle:hover,
+.sidebar-resize-handle.resizing {
+  background: var(--python-gold);
+}
+
+.sidebar-resize-handle::before {
+  content: '';
+  position: absolute;
+  top: 50%;
+  left: 50%;
+  transform: translate(-50%, -50%);
+  width: 2px;
+  height: 40px;
+  background: var(--border);
+  border-radius: 1px;
+  opacity: 0;
+  transition: opacity var(--transition-fast);
+}
+
+.sidebar-resize-handle:hover::before {
+  opacity: 1;
+}
+
+.sidebar.collapsed .sidebar-resize-handle {
+  display: none;
+}
+
+body.resizing-sidebar {
+  cursor: col-resize;
+  user-select: none;
+}
+
+/* Sidebar Logo */
+.sidebar-logo {
+  display: flex;
+  justify-content: center;
+  margin-bottom: 16px;
+}
+
+.sidebar-logo-img {
+  width: 90px;
+  height: 90px;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+}
+
+.sidebar-logo-img svg,
+.sidebar-logo-img img {
+  width: 100%;
+  height: 100%;
+  object-fit: contain;
+}
+
+/* Sidebar sections */
+.sidebar-section {
+  margin-bottom: 20px;
+}
+
+.sidebar-section:last-child {
+  margin-bottom: 0;
+}
+
+.section-title {
+  font-size: 10px;
+  font-weight: 700;
+  text-transform: uppercase;
+  letter-spacing: 0.8px;
+  color: var(--accent);
+  margin: 0;
+  flex: 1;
+}
+
+/* Collapsible sections */
+.collapsible .section-header {
+  display: flex;
+  align-items: center;
+  width: 100%;
+  padding: 0 0 8px 0;
+  margin-bottom: 10px;
+  background: none;
+  border: none;
+  border-bottom: 2px solid var(--python-gold);
+  cursor: pointer;
+  transition: all var(--transition-fast);
+}
+
+.collapsible .section-header:hover {
+  opacity: 0.8;
+}
+
+.section-chevron {
+  color: var(--text-muted);
+  transition: transform var(--transition-fast);
+}
+
+.collapsible.collapsed .section-chevron {
+  transform: rotate(-90deg);
+}
+
+.section-content {
+  overflow: hidden;
+  transition: max-height var(--transition-normal), opacity var(--transition-normal);
+  max-height: 1000px;
+  opacity: 1;
+}
+
+.collapsible.collapsed .section-content {
+  max-height: 0;
+  opacity: 0;
+  margin-bottom: -10px;
+}
+
+/* --------------------------------------------------------------------------
+   Profile Summary Cards
+   -------------------------------------------------------------------------- */
+
+.summary-grid {
+  display: grid;
+  grid-template-columns: 1fr 1fr;
+  gap: 6px;
+}
+
+.summary-card {
+  display: flex;
+  align-items: center;
+  gap: 8px;
+  padding: 8px 10px;
+  background: var(--bg-primary);
+  border: 1px solid var(--border);
+  border-radius: 8px;
+  transition: all var(--transition-fast);
+  animation: slideUp 0.4s ease-out backwards;
+  animation-delay: calc(var(--i, 0) * 0.05s);
+  overflow: hidden;
+}
+
+.summary-card:nth-child(1) { --i: 0; }
+.summary-card:nth-child(2) { --i: 1; }
+.summary-card:nth-child(3) { --i: 2; }
+.summary-card:nth-child(4) { --i: 3; }
+
+.summary-card:hover {
+  border-color: var(--accent);
+  background: var(--accent-glow);
+}
+
+.summary-icon {
+  font-size: 16px;
+  width: 28px;
+  height: 28px;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  background: var(--bg-tertiary);
+  border-radius: 6px;
+  flex-shrink: 0;
+}
+
+.summary-data {
+  min-width: 0;
+  flex: 1;
+  overflow: hidden;
+}
+
+.summary-value {
+  font-family: var(--font-mono);
+  font-size: 13px;
+  font-weight: 700;
+  color: var(--accent);
+  line-height: 1.2;
+  white-space: nowrap;
+  overflow: hidden;
+  text-overflow: ellipsis;
+}
+
+.summary-label {
+  font-size: 8px;
+  font-weight: 600;
+  color: var(--text-muted);
+  text-transform: uppercase;
+  letter-spacing: 0.2px;
+  white-space: nowrap;
+  overflow: hidden;
+  text-overflow: ellipsis;
+}
+
+/* Efficiency Bar */
+.efficiency-section {
+  margin-top: 10px;
+  padding-top: 10px;
+  border-top: 1px solid var(--border);
+}
+
+.efficiency-header {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  margin-bottom: 5px;
+}
+
+.efficiency-label {
+  font-size: 9px;
+  font-weight: 600;
+  color: var(--text-secondary);
+  text-transform: uppercase;
+  letter-spacing: 0.2px;
+}
+
+.efficiency-value {
+  font-family: var(--font-mono);
+  font-size: 11px;
+  font-weight: 700;
+  color: var(--accent);
+}
+
+.efficiency-bar {
+  height: 6px;
+  background: var(--bg-tertiary);
+  border-radius: 3px;
+  overflow: hidden;
+}
+
+.efficiency-fill {
+  height: 100%;
+  background: linear-gradient(90deg, #28a745 0%, #20c997 50%, #17a2b8 100%);
+  border-radius: 3px;
+  transition: width 0.6s ease-out;
+  position: relative;
+  overflow: hidden;
+}
+
+.efficiency-fill::after {
+  content: '';
+  position: absolute;
+  top: 0;
+  left: -100%;
+  width: 100%;
+  height: 100%;
+  background: linear-gradient(
+    90deg,
+    transparent 0%,
+    rgba(255, 255, 255, 0.4) 50%,
+    transparent 100%
+  );
+  animation: shimmer 2s ease-in-out infinite;
+}
+
+/* --------------------------------------------------------------------------
+   Thread Stats Grid (in Sidebar)
+   -------------------------------------------------------------------------- */
+
+.thread-stats-section {
+  display: block;
+}
+
+.stats-grid {
+  display: grid;
+  grid-template-columns: 1fr 1fr;
+  gap: 8px;
+}
+
+.stat-tile {
+  background: var(--bg-primary);
+  border-radius: 8px;
+  padding: 10px;
+  text-align: center;
+  border: 2px solid var(--border);
+  transition: all var(--transition-fast);
+  animation: fadeIn 0.4s ease-out backwards;
+  animation-delay: calc(var(--i, 0) * 0.05s);
+}
+
+.stat-tile:nth-child(1) { --i: 0; }
+.stat-tile:nth-child(2) { --i: 1; }
+.stat-tile:nth-child(3) { --i: 2; }
+.stat-tile:nth-child(4) { --i: 3; }
+
+.stat-tile:hover {
+  transform: translateY(-2px);
+  box-shadow: var(--shadow-sm);
+}
+
+.stat-tile-value {
+  font-family: var(--font-mono);
+  font-size: 16px;
+  font-weight: 700;
+  color: var(--text-primary);
+  line-height: 1.2;
+}
+
+.stat-tile-label {
+  font-size: 9px;
+  font-weight: 600;
+  text-transform: uppercase;
+  letter-spacing: 0.3px;
+  color: var(--text-muted);
+  margin-top: 2px;
+}
+
+/* Stat tile color variants */
+.stat-tile--green  { --tile-color: 40, 167, 69; --tile-text: #28a745; }
+.stat-tile--red    { --tile-color: 220, 53, 69; --tile-text: #dc3545; }
+.stat-tile--yellow { --tile-color: 255, 193, 7; --tile-text: #d39e00; }
+.stat-tile--purple { --tile-color: 111, 66, 193; --tile-text: #6f42c1; }
+
+.stat-tile[class*="--"] {
+  border-color: rgba(var(--tile-color), 0.4);
+  background: linear-gradient(135deg, rgba(var(--tile-color), 0.08) 0%, var(--bg-primary) 100%);
+}
+.stat-tile[class*="--"] .stat-tile-value { color: var(--tile-text); }
+
+/* --------------------------------------------------------------------------
+   Hotspot Cards
+   -------------------------------------------------------------------------- */
+
+.hotspot {
+  display: flex;
+  align-items: flex-start;
+  gap: 10px;
+  padding: 10px;
+  margin-bottom: 8px;
+  background: var(--bg-primary);
+  border: 1px solid var(--border);
+  border-radius: 8px;
+  cursor: pointer;
+  transition: all var(--transition-fast);
+  opacity: 0;
+  transform: translateY(8px);
+  box-shadow: var(--shadow-sm);
+}
+
+.hotspot.visible {
+  opacity: 1;
+  transform: translateY(0);
+}
+
+.hotspot:hover {
+  border-color: var(--accent);
+  box-shadow: var(--shadow-md);
+  transform: translateY(-2px);
+}
+
+.hotspot.active {
+  border-color: var(--python-gold);
+  background: var(--accent-glow);
+  box-shadow: 0 0 0 3px var(--accent-glow);
+}
+
+.hotspot:last-child {
+  margin-bottom: 0;
+}
+
+.hotspot-rank {
+  width: 26px;
+  height: 26px;
+  border-radius: 50%;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  font-weight: 700;
+  font-size: 12px;
+  flex-shrink: 0;
+  background: linear-gradient(135deg, var(--python-blue) 0%, var(--python-blue-light) 100%);
+  color: white;
+  box-shadow: 0 2px 4px rgba(55, 118, 171, 0.3);
+}
+
+.hotspot-rank--1 { background: linear-gradient(135deg, #d4af37, #f4d03f); color: #5a4a00; }
+.hotspot-rank--2 { background: linear-gradient(135deg, #a8a8a8, #c0c0c0); color: #4a4a4a; }
+.hotspot-rank--3 { background: linear-gradient(135deg, #cd7f32, #e6a55a); color: #5a3d00; }
+
+.hotspot-info {
+  flex: 1;
+  min-width: 0;
+}
+
+.hotspot-func {
+  font-family: var(--font-mono);
+  font-size: 11px;
+  font-weight: 600;
+  color: var(--text-primary);
+  line-height: 1.3;
+  word-break: break-word;
+  margin-bottom: 2px;
+}
+
+.hotspot-file {
+  font-family: var(--font-mono);
+  font-size: 10px;
+  color: var(--text-muted);
+  margin-bottom: 3px;
+  word-break: break-all;
+}
+
+.hotspot-stats {
+  font-family: var(--font-mono);
+  font-size: 10px;
+  color: var(--text-secondary);
+}
+
+.hotspot-percent {
+  color: var(--accent);
+  font-weight: 600;
+}
+
+/* --------------------------------------------------------------------------
+   Legend
+   -------------------------------------------------------------------------- */
+
+.legend-section {
+  margin-top: auto;
+  padding-top: 12px;
+}
+
+.legend {
+  display: flex;
+  flex-direction: column;
+  gap: 4px;
+}
+
+.legend-item {
+  display: flex;
+  align-items: center;
+  gap: 8px;
+  padding: 5px 8px;
+  background: var(--bg-primary);
+  border-radius: 4px;
+  border: 1px solid var(--border-subtle);
+  font-size: 11px;
+}
+
+.legend-color {
+  width: 20px;
+  height: 10px;
+  border-radius: 2px;
+  flex-shrink: 0;
+  border: 1px solid rgba(0, 0, 0, 0.08);
+}
+
+.legend-label {
+  color: var(--text-primary);
+  font-weight: 500;
+  flex: 1;
+}
+
+.legend-range {
+  font-family: var(--font-mono);
+  font-size: 9px;
+  color: var(--text-muted);
+}
+
+/* --------------------------------------------------------------------------
+   Thread Filter
+   -------------------------------------------------------------------------- */
+
+.filter-section {
+  padding-top: 12px;
+  border-top: 1px solid var(--border);
+}
+
+.filter-label {
+  display: block;
+  font-size: 10px;
+  font-weight: 600;
+  color: var(--text-muted);
+  margin-bottom: 6px;
+}
+
+.filter-select {
+  width: 100%;
+  padding: 7px 28px 7px 10px;
+  font-family: var(--font-mono);
+  font-size: 11px;
+  color: var(--text-primary);
+  background: var(--bg-primary);
+  border: 2px solid var(--accent);
+  border-radius: 6px;
+  cursor: pointer;
+  outline: none;
+  appearance: none;
+  background-image: url("data:image/svg+xml;charset=UTF-8,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' fill='none' stroke='%233776ab' stroke-width='2' stroke-linecap='round' stroke-linejoin='round'%3e%3cpolyline points='6 9 12 15 18 9'%3e%3c/polyline%3e%3c/svg%3e");
+  background-repeat: no-repeat;
+  background-position: right 6px center;
+  background-size: 14px;
+  transition: all var(--transition-fast);
+}
+
+.filter-select:hover {
+  border-color: var(--accent-hover);
+  box-shadow: 0 2px 6px var(--accent-glow);
+}
+
+.filter-select:focus {
+  border-color: var(--accent);
+  box-shadow: 0 0 0 3px var(--accent-glow);
+}
+
+/* --------------------------------------------------------------------------
+   Chart Area
+   -------------------------------------------------------------------------- */
+
+.chart-area {
+  flex: 1;
+  min-width: 0;
+  overflow: hidden;
+  background: var(--bg-primary);
+  position: relative;
+}
+
+#chart {
+  width: 100%;
+  height: 100%;
+  padding: 16px;
+  overflow: auto;
+}
+
+/* D3 Flamegraph overrides */
+.d3-flame-graph rect {
+  stroke: rgba(55, 118, 171, 0.3);
+  stroke-width: 1px;
+  cursor: pointer;
+  transition: filter 0.1s ease;
+}
+
+.d3-flame-graph rect:hover {
+  stroke: var(--python-blue);
+  stroke-width: 2px;
+  filter: brightness(1.08);
+}
+
+.d3-flame-graph text {
+  font-family: var(--font-sans);
+  font-size: 12px;
+  font-weight: 500;
+  fill: var(--text-primary);
+  pointer-events: none;
+}
+
+/* Search highlight */
+.d3-flame-graph rect.search-match {
+  stroke: #ff6b35 !important;
+  stroke-width: 2px !important;
+  stroke-dasharray: 4 2;
+}
+
+.d3-flame-graph rect.search-dim {
+  opacity: 0.25;
+}
+
+/* --------------------------------------------------------------------------
+   Tooltip
+   -------------------------------------------------------------------------- */
+
+.python-tooltip {
+  position: absolute;
+  z-index: 1000;
+  pointer-events: none;
+  background: var(--bg-primary);
+  border: 1px solid var(--border);
+  border-radius: 8px;
+  padding: 14px;
+  max-width: 480px;
+  box-shadow: var(--shadow-lg);
+  font-family: var(--font-sans);
+  font-size: 13px;
+  color: var(--text-primary);
+  word-wrap: break-word;
+  overflow-wrap: break-word;
+  line-height: 1.5;
+}
+
+.tooltip-header {
+  margin-bottom: 10px;
+}
+
+.tooltip-title {
+  font-size: 14px;
+  font-weight: 600;
+  color: var(--accent);
+  line-height: 1.3;
+  word-break: break-word;
+  margin-bottom: 4px;
+}
+
+.tooltip-location {
+  font-family: var(--font-mono);
+  font-size: 11px;
+  color: var(--text-secondary);
+  background: var(--bg-tertiary);
+  padding: 4px 8px;
+  border-radius: 4px;
+  word-break: break-all;
+}
+
+.tooltip-stats {
+  display: grid;
+  grid-template-columns: auto 1fr;
+  gap: 4px 14px;
+  font-size: 12px;
+}
+
+.tooltip-stat-label {
+  color: var(--text-secondary);
+  font-weight: 500;
+}
+
+.tooltip-stat-value {
+  color: var(--text-primary);
+  font-weight: 600;
+}
+
+.tooltip-stat-value.accent {
+  color: var(--accent);
+}
+
+.tooltip-source {
+  margin-top: 10px;
+  padding-top: 10px;
+  border-top: 1px solid var(--border);
+}
+
+.tooltip-source-title {
+  font-size: 11px;
+  font-weight: 600;
+  color: var(--accent);
+  margin-bottom: 6px;
+}
+
+.tooltip-source-code {
+  font-family: var(--font-mono);
+  font-size: 10px;
+  line-height: 1.5;
+  background: var(--bg-tertiary);
+  border: 1px solid var(--border);
+  border-radius: 6px;
+  padding: 8px;
+  max-height: 140px;
+  overflow-y: auto;
+  white-space: pre-wrap;
+  word-break: break-all;
+}
+
+.tooltip-source-line {
+  color: var(--text-secondary);
+  padding: 1px 0;
+}
+
+.tooltip-source-line.current {
+  color: var(--accent);
+  font-weight: 600;
+}
+
+.tooltip-hint {
+  margin-top: 10px;
+  padding-top: 8px;
+  border-top: 1px solid var(--border);
+  font-size: 11px;
+  color: var(--text-muted);
+  text-align: center;
+}
+
+/* --------------------------------------------------------------------------
+   Responsive (Flamegraph-specific)
+   -------------------------------------------------------------------------- */
+
+@media (max-width: 900px) {
+  .sidebar {
+    position: fixed;
+    left: 0;
+    top: var(--topbar-height);
+    bottom: var(--statusbar-height);
+    z-index: 100;
+    box-shadow: var(--shadow-lg);
+  }
+
+  .sidebar.collapsed {
+    width: var(--sidebar-collapsed);
+  }
+
+  .search-wrapper {
+    max-width: 220px;
+  }
+}
+
+@media (max-width: 600px) {
+  .search-wrapper {
+    max-width: 160px;
+  }
+
+  .brand-info {
+    display: none;
+  }
+
+  .stats-grid {
+    grid-template-columns: 1fr;
+  }
+}
diff --git a/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js b/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js
new file mode 100644
index 00000000000..494d156a8dd
--- /dev/null
+++ b/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js
@@ -0,0 +1,1012 @@
+const EMBEDDED_DATA = {{FLAMEGRAPH_DATA}};
+
+// Global string table for resolving string indices
+let stringTable = [];
+let originalData = null;
+let currentThreadFilter = 'all';
+
+// Heat colors are now defined in CSS variables (--heat-1 through --heat-8)
+// and automatically switch with theme changes - no JS color arrays needed!
+
+// ============================================================================
+// String Resolution
+// ============================================================================
+
+function resolveString(index) {
+  if (index === null || index === undefined) {
+    return null;
+  }
+  if (typeof index === 'number' && index >= 0 && index < stringTable.length) {
+    return stringTable[index];
+  }
+  return String(index);
+}
+
+function resolveStringIndices(node) {
+  if (!node) return node;
+
+  const resolved = { ...node };
+
+  if (typeof resolved.name === 'number') {
+    resolved.name = resolveString(resolved.name);
+  }
+  if (typeof resolved.filename === 'number') {
+    resolved.filename = resolveString(resolved.filename);
+  }
+  if (typeof resolved.funcname === 'number') {
+    resolved.funcname = resolveString(resolved.funcname);
+  }
+
+  if (Array.isArray(resolved.source)) {
+    resolved.source = resolved.source.map(index =>
+      typeof index === 'number' ? resolveString(index) : index
+    );
+  }
+
+  if (Array.isArray(resolved.children)) {
+    resolved.children = resolved.children.map(child => resolveStringIndices(child));
+  }
+
+  return resolved;
+}
+
+// ============================================================================
+// Theme & UI Controls
+// ============================================================================
+
+function toggleTheme() {
+  const html = document.documentElement;
+  const current = html.getAttribute('data-theme') || 'light';
+  const next = current === 'light' ? 'dark' : 'light';
+  html.setAttribute('data-theme', next);
+  localStorage.setItem('flamegraph-theme', next);
+
+  // Update theme button icon
+  const btn = document.getElementById('theme-btn');
+  if (btn) {
+    btn.innerHTML = next === 'dark' ? '&#9788;' : '&#9790;';  // sun or moon
+  }
+
+  // Re-render flamegraph with new theme colors
+  if (window.flamegraphData && originalData) {
+    const tooltip = createPythonTooltip(originalData);
+    const chart = createFlamegraph(tooltip, originalData.value);
+    renderFlamegraph(chart, window.flamegraphData);
+  }
+}
+
+function toggleSidebar() {
+  const sidebar = document.getElementById('sidebar');
+  if (sidebar) {
+    const isCollapsing = !sidebar.classList.contains('collapsed');
+
+    if (isCollapsing) {
+      // Save current width before collapsing
+      const currentWidth = sidebar.offsetWidth;
+      sidebar.dataset.expandedWidth = currentWidth;
+      localStorage.setItem('flamegraph-sidebar-width', currentWidth);
+    } else {
+      // Restore width when expanding
+      const savedWidth = sidebar.dataset.expandedWidth || localStorage.getItem('flamegraph-sidebar-width');
+      if (savedWidth) {
+        sidebar.style.width = savedWidth + 'px';
+      }
+    }
+
+    sidebar.classList.toggle('collapsed');
+    localStorage.setItem('flamegraph-sidebar', sidebar.classList.contains('collapsed') ? 'collapsed' : 'expanded');
+
+    // Resize chart after sidebar animation
+    setTimeout(() => {
+      resizeChart();
+    }, 300);
+  }
+}
+
+function resizeChart() {
+  if (window.flamegraphChart && window.flamegraphData) {
+    const chartArea = document.querySelector('.chart-area');
+    if (chartArea) {
+      window.flamegraphChart.width(chartArea.clientWidth - 32);
+      d3.select("#chart").datum(window.flamegraphData).call(window.flamegraphChart);
+    }
+  }
+}
+
+function toggleSection(sectionId) {
+  const section = document.getElementById(sectionId);
+  if (section) {
+    section.classList.toggle('collapsed');
+    // Save state
+    const collapsedSections = JSON.parse(localStorage.getItem('flamegraph-collapsed-sections') || '{}');
+    collapsedSections[sectionId] = section.classList.contains('collapsed');
+    localStorage.setItem('flamegraph-collapsed-sections', JSON.stringify(collapsedSections));
+  }
+}
+
+function restoreUIState() {
+  // Restore theme
+  const savedTheme = localStorage.getItem('flamegraph-theme');
+  if (savedTheme) {
+    document.documentElement.setAttribute('data-theme', savedTheme);
+    const btn = document.getElementById('theme-btn');
+    if (btn) {
+      btn.innerHTML = savedTheme === 'dark' ? '&#9788;' : '&#9790;';
+    }
+  }
+
+  // Restore sidebar state
+  const savedSidebar = localStorage.getItem('flamegraph-sidebar');
+  if (savedSidebar === 'collapsed') {
+    const sidebar = document.getElementById('sidebar');
+    if (sidebar) sidebar.classList.add('collapsed');
+  }
+
+  // Restore sidebar width
+  const savedWidth = localStorage.getItem('flamegraph-sidebar-width');
+  if (savedWidth) {
+    const sidebar = document.getElementById('sidebar');
+    if (sidebar) {
+      sidebar.style.width = savedWidth + 'px';
+    }
+  }
+
+  // Restore collapsed sections
+  const collapsedSections = JSON.parse(localStorage.getItem('flamegraph-collapsed-sections') || '{}');
+  for (const [sectionId, isCollapsed] of Object.entries(collapsedSections)) {
+    if (isCollapsed) {
+      const section = document.getElementById(sectionId);
+      if (section) section.classList.add('collapsed');
+    }
+  }
+}
+
+// ============================================================================
+// Status Bar
+// ============================================================================
+
+function updateStatusBar(nodeData, rootValue) {
+  const funcname = resolveString(nodeData.funcname) || resolveString(nodeData.name) || "--";
+  const filename = resolveString(nodeData.filename) || "";
+  const lineno = nodeData.lineno;
+  const timeMs = (nodeData.value / 1000).toFixed(2);
+  const percent = rootValue > 0 ? ((nodeData.value / rootValue) * 100).toFixed(1) : "0.0";
+
+  const locationEl = document.getElementById('status-location');
+  const funcItem = document.getElementById('status-func-item');
+  const timeItem = document.getElementById('status-time-item');
+  const percentItem = document.getElementById('status-percent-item');
+
+  if (locationEl) locationEl.style.display = filename && filename !== "~" ? 'flex' : 'none';
+  if (funcItem) funcItem.style.display = 'flex';
+  if (timeItem) timeItem.style.display = 'flex';
+  if (percentItem) percentItem.style.display = 'flex';
+
+  const fileEl = document.getElementById('status-file');
+  if (fileEl && filename && filename !== "~") {
+    const basename = filename.split('/').pop();
+    fileEl.textContent = lineno ? `${basename}:${lineno}` : basename;
+  }
+
+  const funcEl = document.getElementById('status-func');
+  if (funcEl) funcEl.textContent = funcname.length > 40 ? funcname.substring(0, 37) + '...' : funcname;
+
+  const timeEl = document.getElementById('status-time');
+  if (timeEl) timeEl.textContent = `${timeMs} ms`;
+
+  const percentEl = document.getElementById('status-percent');
+  if (percentEl) percentEl.textContent = `${percent}%`;
+}
+
+function clearStatusBar() {
+  const ids = ['status-location', 'status-func-item', 'status-time-item', 'status-percent-item'];
+  ids.forEach(id => {
+    const el = document.getElementById(id);
+    if (el) el.style.display = 'none';
+  });
+}
+
+// ============================================================================
+// Tooltip
+// ============================================================================
+
+function createPythonTooltip(data) {
+  const pythonTooltip = flamegraph.tooltip.defaultFlamegraphTooltip();
+
+  pythonTooltip.show = function (d, element) {
+    if (!this._tooltip) {
+      this._tooltip = d3.select("body")
+        .append("div")
+        .attr("class", "python-tooltip")
+        .style("opacity", 0);
+    }
+
+    const timeMs = (d.data.value / 1000).toFixed(2);
+    const percentage = ((d.data.value / data.value) * 100).toFixed(2);
+    const calls = d.data.calls || 0;
+    const childCount = d.children ? d.children.length : 0;
+    const source = d.data.source;
+
+    const funcname = resolveString(d.data.funcname) || resolveString(d.data.name);
+    const filename = resolveString(d.data.filename) || "";
+    const isSpecialFrame = filename === "~";
+
+    // Build source section
+    let sourceSection = "";
+    if (source && Array.isArray(source) && source.length > 0) {
+      const sourceLines = source
+        .map((line) => {
+          const isCurrent = line.startsWith("→");
+          const escaped = line.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
+          return `<div class="tooltip-source-line${isCurrent ? ' current' : ''}">${escaped}</div>`;
+        })
+        .join("");
+
+      sourceSection = `
+        <div class="tooltip-source">
+          <div class="tooltip-source-title">Source Code:</div>
+          <div class="tooltip-source-code">${sourceLines}</div>
+        </div>`;
+    }
+
+    const fileLocationHTML = isSpecialFrame ? "" : `
+      <div class="tooltip-location">${filename}${d.data.lineno ? ":" + d.data.lineno : ""}</div>`;
+
+    const tooltipHTML = `
+      <div class="tooltip-header">
+        <div class="tooltip-title">${funcname}</div>
+        ${fileLocationHTML}
+      </div>
+      <div class="tooltip-stats">
+        <span class="tooltip-stat-label">Execution Time:</span>
+        <span class="tooltip-stat-value">${timeMs} ms</span>
+
+        <span class="tooltip-stat-label">Percentage:</span>
+        <span class="tooltip-stat-value accent">${percentage}%</span>
+
+        ${calls > 0 ? `
+          <span class="tooltip-stat-label">Function Calls:</span>
+          <span class="tooltip-stat-value">${calls.toLocaleString()}</span>
+        ` : ''}
+
+        ${childCount > 0 ? `
+          <span class="tooltip-stat-label">Child Functions:</span>
+          <span class="tooltip-stat-value">${childCount}</span>
+        ` : ''}
+      </div>
+      ${sourceSection}
+      <div class="tooltip-hint">
+        ${childCount > 0 ? "Click to zoom into this function" : "Leaf function - no children"}
+      </div>
+    `;
+
+    // Position tooltip
+    const event = d3.event || window.event;
+    const mouseX = event.pageX || event.clientX;
+    const mouseY = event.pageY || event.clientY;
+    const padding = 12;
+
+    this._tooltip.html(tooltipHTML);
+
+    // Measure tooltip
+    const node = this._tooltip.style("display", "block").style("opacity", 0).node();
+    const tooltipWidth = node.offsetWidth || 320;
+    const tooltipHeight = node.offsetHeight || 200;
+
+    // Calculate position
+    let left = mouseX + padding;
+    let top = mouseY + padding;
+
+    if (left + tooltipWidth > window.innerWidth) {
+      left = mouseX - tooltipWidth - padding;
+      if (left < 0) left = padding;
+    }
+
+    if (top + tooltipHeight > window.innerHeight) {
+      top = mouseY - tooltipHeight - padding;
+      if (top < 0) top = padding;
+    }
+
+    this._tooltip
+      .style("left", left + "px")
+      .style("top", top + "px")
+      .transition()
+      .duration(150)
+      .style("opacity", 1);
+
+    // Update status bar
+    updateStatusBar(d.data, data.value);
+  };
+
+  pythonTooltip.hide = function () {
+    if (this._tooltip) {
+      this._tooltip.transition().duration(150).style("opacity", 0);
+    }
+    clearStatusBar();
+  };
+
+  return pythonTooltip;
+}
+
+// ============================================================================
+// Flamegraph Creation
+// ============================================================================
+
+function ensureLibraryLoaded() {
+  if (typeof flamegraph === "undefined") {
+    console.error("d3-flame-graph library not loaded");
+    document.getElementById("chart").innerHTML =
+      '<div style="padding: 40px; text-align: center; color: var(--text-muted);">Error: d3-flame-graph library failed to load</div>';
+    throw new Error("d3-flame-graph library failed to load");
+  }
+}
+
+const HEAT_THRESHOLDS = [
+  [0.6, 8],
+  [0.35, 7],
+  [0.18, 6],
+  [0.12, 5],
+  [0.06, 4],
+  [0.03, 3],
+  [0.01, 2],
+];
+
+function getHeatLevel(percentage) {
+  for (const [threshold, level] of HEAT_THRESHOLDS) {
+    if (percentage >= threshold) return level;
+  }
+  return 1;
+}
+
+function getHeatColors() {
+  const style = getComputedStyle(document.documentElement);
+  const colors = {};
+  for (let i = 1; i <= 8; i++) {
+    colors[i] = style.getPropertyValue(`--heat-${i}`).trim();
+  }
+  return colors;
+}
+
+function createFlamegraph(tooltip, rootValue) {
+  const chartArea = document.querySelector('.chart-area');
+  const width = chartArea ? chartArea.clientWidth - 32 : window.innerWidth - 320;
+  const heatColors = getHeatColors();
+
+  let chart = flamegraph()
+    .width(width)
+    .cellHeight(20)
+    .transitionDuration(300)
+    .minFrameSize(1)
+    .tooltip(tooltip)
+    .inverted(true)
+    .setColorMapper(function (d) {
+      const percentage = d.data.value / rootValue;
+      const level = getHeatLevel(percentage);
+      return heatColors[level];
+    });
+
+  return chart;
+}
+
+function renderFlamegraph(chart, data) {
+  d3.select("#chart").datum(data).call(chart);
+  window.flamegraphChart = chart;
+  window.flamegraphData = data;
+  populateStats(data);
+}
+
+// ============================================================================
+// Search
+// ============================================================================
+
+function updateSearchHighlight(searchTerm, searchInput) {
+  d3.selectAll("#chart rect")
+    .classed("search-match", false)
+    .classed("search-dim", false);
+
+  // Clear active state from all hotspots
+  document.querySelectorAll('.hotspot').forEach(h => h.classList.remove('active'));
+
+  if (searchTerm && searchTerm.length > 0) {
+    let matchCount = 0;
+
+    d3.selectAll("#chart rect").each(function (d) {
+      if (d && d.data) {
+        const name = resolveString(d.data.name) || "";
+        const funcname = resolveString(d.data.funcname) || "";
+        const filename = resolveString(d.data.filename) || "";
+        const lineno = d.data.lineno;
+        const term = searchTerm.toLowerCase();
+
+        // Check if search term looks like file:line pattern
+        const fileLineMatch = term.match(/^(.+):(\d+)$/);
+        let matches = false;
+
+        if (fileLineMatch) {
+          // Exact file:line matching
+          const searchFile = fileLineMatch[1];
+          const searchLine = parseInt(fileLineMatch[2], 10);
+          const basename = filename.split('/').pop().toLowerCase();
+          matches = basename.includes(searchFile) && lineno === searchLine;
+        } else {
+          // Regular substring search
+          matches =
+            name.toLowerCase().includes(term) ||
+            funcname.toLowerCase().includes(term) ||
+            filename.toLowerCase().includes(term);
+        }
+
+        if (matches) {
+          matchCount++;
+          d3.select(this).classed("search-match", true);
+        } else {
+          d3.select(this).classed("search-dim", true);
+        }
+      }
+    });
+
+    if (searchInput) {
+      searchInput.classList.remove("has-matches", "no-matches");
+      searchInput.classList.add(matchCount > 0 ? "has-matches" : "no-matches");
+    }
+
+    // Mark matching hotspot as active
+    document.querySelectorAll('.hotspot').forEach(h => {
+      if (h.dataset.searchterm && h.dataset.searchterm.toLowerCase() === searchTerm.toLowerCase()) {
+        h.classList.add('active');
+      }
+    });
+  } else if (searchInput) {
+    searchInput.classList.remove("has-matches", "no-matches");
+  }
+}
+
+function searchForHotspot(funcname) {
+  const searchInput = document.getElementById('search-input');
+  const searchWrapper = document.querySelector('.search-wrapper');
+  if (searchInput) {
+    // Toggle: if already searching for this term, clear it
+    if (searchInput.value.trim() === funcname) {
+      clearSearch();
+    } else {
+      searchInput.value = funcname;
+      if (searchWrapper) {
+        searchWrapper.classList.add('has-value');
+      }
+      performSearch();
+    }
+  }
+}
+
+function initSearchHandlers() {
+  const searchInput = document.getElementById("search-input");
+  const searchWrapper = document.querySelector(".search-wrapper");
+  if (!searchInput) return;
+
+  let searchTimeout;
+  function performSearch() {
+    const term = searchInput.value.trim();
+    updateSearchHighlight(term, searchInput);
+    // Toggle has-value class for clear button visibility
+    if (searchWrapper) {
+      searchWrapper.classList.toggle("has-value", term.length > 0);
+    }
+  }
+
+  searchInput.addEventListener("input", function () {
+    clearTimeout(searchTimeout);
+    searchTimeout = setTimeout(performSearch, 150);
+  });
+
+  window.performSearch = performSearch;
+}
+
+function clearSearch() {
+  const searchInput = document.getElementById("search-input");
+  const searchWrapper = document.querySelector(".search-wrapper");
+  if (searchInput) {
+    searchInput.value = "";
+    searchInput.classList.remove("has-matches", "no-matches");
+    if (searchWrapper) {
+      searchWrapper.classList.remove("has-value");
+    }
+    // Clear highlights
+    d3.selectAll("#chart rect")
+      .classed("search-match", false)
+      .classed("search-dim", false);
+    // Clear active hotspot
+    document.querySelectorAll('.hotspot').forEach(h => h.classList.remove('active'));
+  }
+}
+
+// ============================================================================
+// Resize Handler
+// ============================================================================
+
+function handleResize() {
+  let resizeTimeout;
+  window.addEventListener("resize", function () {
+    clearTimeout(resizeTimeout);
+    resizeTimeout = setTimeout(resizeChart, 100);
+  });
+}
+
+function initSidebarResize() {
+  const sidebar = document.getElementById('sidebar');
+  const resizeHandle = document.getElementById('sidebar-resize-handle');
+  if (!sidebar || !resizeHandle) return;
+
+  let isResizing = false;
+  let startX = 0;
+  let startWidth = 0;
+  const minWidth = 200;
+  const maxWidth = 600;
+
+  resizeHandle.addEventListener('mousedown', function(e) {
+    isResizing = true;
+    startX = e.clientX;
+    startWidth = sidebar.offsetWidth;
+    resizeHandle.classList.add('resizing');
+    document.body.classList.add('resizing-sidebar');
+    e.preventDefault();
+  });
+
+  document.addEventListener('mousemove', function(e) {
+    if (!isResizing) return;
+
+    const deltaX = e.clientX - startX;
+    const newWidth = Math.min(Math.max(startWidth + deltaX, minWidth), maxWidth);
+    sidebar.style.width = newWidth + 'px';
+    e.preventDefault();
+  });
+
+  document.addEventListener('mouseup', function() {
+    if (isResizing) {
+      isResizing = false;
+      resizeHandle.classList.remove('resizing');
+      document.body.classList.remove('resizing-sidebar');
+
+      // Save the new width
+      const width = sidebar.offsetWidth;
+      localStorage.setItem('flamegraph-sidebar-width', width);
+
+      // Resize chart after sidebar resize
+      setTimeout(() => {
+        resizeChart();
+      }, 10);
+    }
+  });
+}
+
+// ============================================================================
+// Thread Stats
+// ============================================================================
+
+// Mode constants (must match constants.py)
+const PROFILING_MODE_WALL = 0;
+const PROFILING_MODE_CPU = 1;
+const PROFILING_MODE_GIL = 2;
+const PROFILING_MODE_ALL = 3;
+
+function populateThreadStats(data, selectedThreadId = null) {
+  const stats = data?.stats;
+  if (!stats || !stats.thread_stats) {
+    return;
+  }
+
+  const mode = stats.mode !== undefined ? stats.mode : PROFILING_MODE_WALL;
+  let threadStats;
+
+  if (selectedThreadId !== null && stats.per_thread_stats && stats.per_thread_stats[selectedThreadId]) {
+    threadStats = stats.per_thread_stats[selectedThreadId];
+  } else {
+    threadStats = stats.thread_stats;
+  }
+
+  if (!threadStats || typeof threadStats.total !== 'number' || threadStats.total <= 0) {
+    return;
+  }
+
+  const section = document.getElementById('thread-stats-bar');
+  if (!section) {
+    return;
+  }
+
+  section.style.display = 'block';
+
+  const gilHeldStat = document.getElementById('gil-held-stat');
+  const gilReleasedStat = document.getElementById('gil-released-stat');
+  const gilWaitingStat = document.getElementById('gil-waiting-stat');
+
+  if (mode === PROFILING_MODE_GIL) {
+    // In GIL mode, hide GIL-related stats
+    if (gilHeldStat) gilHeldStat.style.display = 'none';
+    if (gilReleasedStat) gilReleasedStat.style.display = 'none';
+    if (gilWaitingStat) gilWaitingStat.style.display = 'none';
+  } else {
+    // Show all stats
+    if (gilHeldStat) gilHeldStat.style.display = 'block';
+    if (gilReleasedStat) gilReleasedStat.style.display = 'block';
+    if (gilWaitingStat) gilWaitingStat.style.display = 'block';
+
+    const gilHeldPctElem = document.getElementById('gil-held-pct');
+    if (gilHeldPctElem) gilHeldPctElem.textContent = `${(threadStats.has_gil_pct || 0).toFixed(1)}%`;
+
+    const gilReleasedPctElem = document.getElementById('gil-released-pct');
+    // GIL Released = not holding GIL and not waiting for it
+    const gilReleasedPct = Math.max(0, 100 - (threadStats.has_gil_pct || 0) - (threadStats.gil_requested_pct || 0));
+    if (gilReleasedPctElem) gilReleasedPctElem.textContent = `${gilReleasedPct.toFixed(1)}%`;
+
+    const gilWaitingPctElem = document.getElementById('gil-waiting-pct');
+    if (gilWaitingPctElem) gilWaitingPctElem.textContent = `${(threadStats.gil_requested_pct || 0).toFixed(1)}%`;
+  }
+
+  const gcPctElem = document.getElementById('gc-pct');
+  if (gcPctElem) gcPctElem.textContent = `${(threadStats.gc_pct || 0).toFixed(1)}%`;
+}
+
+// ============================================================================
+// Profile Summary Stats
+// ============================================================================
+
+function formatNumber(num) {
+  if (num >= 1000000) return (num / 1000000).toFixed(1) + 'M';
+  if (num >= 1000) return (num / 1000).toFixed(1) + 'K';
+  return num.toLocaleString();
+}
+
+function formatDuration(seconds) {
+  if (seconds >= 3600) {
+    const h = Math.floor(seconds / 3600);
+    const m = Math.floor((seconds % 3600) / 60);
+    return `${h}h ${m}m`;
+  }
+  if (seconds >= 60) {
+    const m = Math.floor(seconds / 60);
+    const s = Math.floor(seconds % 60);
+    return `${m}m ${s}s`;
+  }
+  return seconds.toFixed(2) + 's';
+}
+
+function populateProfileSummary(data) {
+  const stats = data.stats || {};
+  const totalSamples = stats.total_samples || data.value || 0;
+  const duration = stats.duration_sec || 0;
+  const sampleRate = stats.sample_rate || (duration > 0 ? totalSamples / duration : 0);
+  const errorRate = stats.error_rate || 0;
+  const missedSamples= stats.missed_samples || 0;
+
+  const samplesEl = document.getElementById('stat-total-samples');
+  if (samplesEl) samplesEl.textContent = formatNumber(totalSamples);
+
+  const durationEl = document.getElementById('stat-duration');
+  if (durationEl) durationEl.textContent = duration > 0 ? formatDuration(duration) : '--';
+
+  const rateEl = document.getElementById('stat-sample-rate');
+  if (rateEl) rateEl.textContent = sampleRate > 0 ? formatNumber(Math.round(sampleRate)) : '--';
+
+  // Count unique functions
+  let functionCount = 0;
+  function countFunctions(node) {
+    if (!node) return;
+    functionCount++;
+    if (node.children) node.children.forEach(countFunctions);
+  }
+  countFunctions(data);
+
+  const functionsEl = document.getElementById('stat-functions');
+  if (functionsEl) functionsEl.textContent = formatNumber(functionCount);
+
+  // Efficiency bar
+  if (errorRate !== undefined && errorRate !== null) {
+    const efficiency = Math.max(0, Math.min(100, (100 - errorRate)));
+
+    const efficiencySection = document.getElementById('efficiency-section');
+    if (efficiencySection) efficiencySection.style.display = 'block';
+
+    const efficiencyValue = document.getElementById('stat-efficiency');
+    if (efficiencyValue) efficiencyValue.textContent = efficiency.toFixed(1) + '%';
+
+    const efficiencyFill = document.getElementById('efficiency-fill');
+    if (efficiencyFill) efficiencyFill.style.width = efficiency + '%';
+  }
+  // MissedSamples bar
+  if (missedSamples !== undefined && missedSamples !== null) {
+    const sampleEfficiency = Math.max(0, missedSamples);
+
+    const efficiencySection = document.getElementById('efficiency-section');
+    if (efficiencySection) efficiencySection.style.display = 'block';
+
+    const sampleEfficiencyValue = document.getElementById('stat-missed-samples');
+    if (sampleEfficiencyValue) sampleEfficiencyValue.textContent = sampleEfficiency.toFixed(1) + '%';
+
+    const sampleEfficiencyFill = document.getElementById('missed-samples-fill');
+    if (sampleEfficiencyFill) sampleEfficiencyFill.style.width = sampleEfficiency + '%';
+  }
+}
+
+// ============================================================================
+// Hotspot Stats
+// ============================================================================
+
+function populateStats(data) {
+  const totalSamples = data.value || 0;
+
+  // Populate profile summary
+  populateProfileSummary(data);
+
+  // Populate thread statistics if available
+  populateThreadStats(data);
+
+  const functionMap = new Map();
+
+  function collectFunctions(node) {
+    if (!node) return;
+
+    let filename = resolveString(node.filename);
+    let funcname = resolveString(node.funcname);
+
+    if (!filename || !funcname) {
+      const nameStr = resolveString(node.name);
+      if (nameStr?.includes('(')) {
+        const match = nameStr.match(/^(.+?)\s*\((.+?):(\d+)\)$/);
+        if (match) {
+          funcname = funcname || match[1];
+          filename = filename || match[2];
+        }
+      }
+    }
+
+    filename = filename || 'unknown';
+    funcname = funcname || 'unknown';
+
+    if (filename !== 'unknown' && funcname !== 'unknown' && node.value > 0) {
+      let childrenValue = 0;
+      if (node.children) {
+        childrenValue = node.children.reduce((sum, child) => sum + child.value, 0);
+      }
+      const directSamples = Math.max(0, node.value - childrenValue);
+
+      const funcKey = `${filename}:${node.lineno || '?'}:${funcname}`;
+
+      if (functionMap.has(funcKey)) {
+        const existing = functionMap.get(funcKey);
+        existing.directSamples += directSamples;
+        existing.directPercent = (existing.directSamples / totalSamples) * 100;
+        if (directSamples > existing.maxSingleSamples) {
+          existing.filename = filename;
+          existing.lineno = node.lineno || '?';
+          existing.maxSingleSamples = directSamples;
+        }
+      } else {
+        functionMap.set(funcKey, {
+          filename: filename,
+          lineno: node.lineno || '?',
+          funcname: funcname,
+          directSamples,
+          directPercent: (directSamples / totalSamples) * 100,
+          maxSingleSamples: directSamples
+        });
+      }
+    }
+
+    if (node.children) {
+      node.children.forEach(child => collectFunctions(child));
+    }
+  }
+
+  collectFunctions(data);
+
+  const hotSpots = Array.from(functionMap.values())
+    .filter(f => f.directPercent > 0.5)
+    .sort((a, b) => b.directPercent - a.directPercent)
+    .slice(0, 3);
+
+  // Populate and animate hotspot cards
+  for (let i = 0; i < 3; i++) {
+    const num = i + 1;
+    const card = document.getElementById(`hotspot-${num}`);
+    const funcEl = document.getElementById(`hotspot-func-${num}`);
+    const fileEl = document.getElementById(`hotspot-file-${num}`);
+    const percentEl = document.getElementById(`hotspot-percent-${num}`);
+    const samplesEl = document.getElementById(`hotspot-samples-${num}`);
+
+    if (i < hotSpots.length && hotSpots[i]) {
+      const h = hotSpots[i];
+      const filename = h.filename || 'unknown';
+      const lineno = h.lineno ?? '?';
+      const isSpecialFrame = filename === '~' && (lineno === 0 || lineno === '?');
+
+      let funcDisplay = h.funcname || 'unknown';
+      if (funcDisplay.length > 28) funcDisplay = funcDisplay.substring(0, 25) + '...';
+
+      if (funcEl) funcEl.textContent = funcDisplay;
+      if (fileEl) {
+        if (isSpecialFrame) {
+          fileEl.textContent = '--';
+        } else {
+          const basename = filename !== 'unknown' ? filename.split('/').pop() : 'unknown';
+          fileEl.textContent = `${basename}:${lineno}`;
+        }
+      }
+      if (percentEl) percentEl.textContent = `${h.directPercent.toFixed(1)}%`;
+      if (samplesEl) samplesEl.textContent = ` (${h.directSamples.toLocaleString()})`;
+    } else {
+      if (funcEl) funcEl.textContent = '--';
+      if (fileEl) fileEl.textContent = '--';
+      if (percentEl) percentEl.textContent = '--';
+      if (samplesEl) samplesEl.textContent = '';
+    }
+
+    // Add click handler and animate entrance
+    if (card) {
+      if (i < hotSpots.length && hotSpots[i]) {
+        const h = hotSpots[i];
+        const basename = h.filename !== 'unknown' ? h.filename.split('/').pop() : '';
+        const searchTerm = basename && h.lineno !== '?' ? `${basename}:${h.lineno}` : h.funcname;
+        card.dataset.searchterm = searchTerm;
+        card.onclick = () => searchForHotspot(searchTerm);
+        card.style.cursor = 'pointer';
+      } else {
+        card.onclick = null;
+        delete card.dataset.searchterm;
+        card.style.cursor = 'default';
+      }
+
+      setTimeout(() => {
+        card.classList.add('visible');
+      }, 100 + i * 80);
+    }
+  }
+}
+
+// ============================================================================
+// Thread Filter
+// ============================================================================
+
+function initThreadFilter(data) {
+  const threadFilter = document.getElementById('thread-filter');
+  const threadSection = document.getElementById('thread-section');
+
+  if (!threadFilter || !data.threads) return;
+
+  threadFilter.innerHTML = '<option value="all">All Threads</option>';
+
+  const threads = data.threads || [];
+  threads.forEach(threadId => {
+    const option = document.createElement('option');
+    option.value = threadId;
+    option.textContent = `Thread ${threadId}`;
+    threadFilter.appendChild(option);
+  });
+
+  if (threads.length > 1 && threadSection) {
+    threadSection.style.display = 'block';
+  }
+}
+
+function filterByThread() {
+  const threadFilter = document.getElementById('thread-filter');
+  if (!threadFilter || !originalData) return;
+
+  const selectedThread = threadFilter.value;
+  currentThreadFilter = selectedThread;
+
+  let filteredData;
+  let selectedThreadId = null;
+
+  if (selectedThread === 'all') {
+    filteredData = originalData;
+  } else {
+    selectedThreadId = parseInt(selectedThread, 10);
+    filteredData = filterDataByThread(originalData, selectedThreadId);
+
+    if (filteredData.strings) {
+      stringTable = filteredData.strings;
+      filteredData = resolveStringIndices(filteredData);
+    }
+  }
+
+  const tooltip = createPythonTooltip(filteredData);
+  const chart = createFlamegraph(tooltip, filteredData.value);
+  renderFlamegraph(chart, filteredData);
+
+  populateThreadStats(originalData, selectedThreadId);
+}
+
+function filterDataByThread(data, threadId) {
+  function filterNode(node) {
+    if (!node.threads || !node.threads.includes(threadId)) {
+      return null;
+    }
+
+    const filteredNode = { ...node, children: [] };
+
+    if (node.children && Array.isArray(node.children)) {
+      filteredNode.children = node.children
+        .map(child => filterNode(child))
+        .filter(child => child !== null);
+    }
+
+    return filteredNode;
+  }
+
+  function recalculateValue(node) {
+    if (!node.children || node.children.length === 0) {
+      return node.value || 0;
+    }
+    const childrenValue = node.children.reduce((sum, child) => sum + recalculateValue(child), 0);
+    node.value = Math.max(node.value || 0, childrenValue);
+    return node.value;
+  }
+
+  const filteredRoot = { ...data, children: [] };
+
+  if (data.children && Array.isArray(data.children)) {
+    filteredRoot.children = data.children
+      .map(child => filterNode(child))
+      .filter(child => child !== null);
+  }
+
+  recalculateValue(filteredRoot);
+  return filteredRoot;
+}
+
+// ============================================================================
+// Control Functions
+// ============================================================================
+
+function resetZoom() {
+  if (window.flamegraphChart) {
+    window.flamegraphChart.resetZoom();
+  }
+}
+
+function exportSVG() {
+  const svgElement = document.querySelector("#chart svg");
+  if (!svgElement) {
+    console.warn("Cannot export: No flamegraph SVG found");
+    return;
+  }
+  const serializer = new XMLSerializer();
+  const svgString = serializer.serializeToString(svgElement);
+  const blob = new Blob([svgString], { type: "image/svg+xml" });
+  const url = URL.createObjectURL(blob);
+  const a = document.createElement("a");
+  a.href = url;
+  a.download = "python-performance-flamegraph.svg";
+  a.click();
+  URL.revokeObjectURL(url);
+}
+
+// ============================================================================
+// Initialization
+// ============================================================================
+
+function initFlamegraph() {
+  ensureLibraryLoaded();
+  restoreUIState();
+
+  let processedData = EMBEDDED_DATA;
+  if (EMBEDDED_DATA.strings) {
+    stringTable = EMBEDDED_DATA.strings;
+    processedData = resolveStringIndices(EMBEDDED_DATA);
+  }
+
+  originalData = processedData;
+  initThreadFilter(processedData);
+
+  const tooltip = createPythonTooltip(processedData);
+  const chart = createFlamegraph(tooltip, processedData.value);
+  renderFlamegraph(chart, processedData);
+  initSearchHandlers();
+  initSidebarResize();
+  handleResize();
+}
+
+if (document.readyState === "loading") {
+  document.addEventListener("DOMContentLoaded", initFlamegraph);
+} else {
+  initFlamegraph();
+}
diff --git a/Lib/profiling/sampling/_flamegraph_assets/flamegraph_template.html b/Lib/profiling/sampling/_flamegraph_assets/flamegraph_template.html
new file mode 100644
index 00000000000..82102c229e7
--- /dev/null
+++ b/Lib/profiling/sampling/_flamegraph_assets/flamegraph_template.html
@@ -0,0 +1,309 @@
+<!doctype html>
+<html lang="en" data-theme="light">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Tachyon Profiler - Flamegraph</title>
+    <!-- INLINE_VENDOR_D3_JS -->
+    <!-- INLINE_VENDOR_FLAMEGRAPH_CSS -->
+    <!-- INLINE_VENDOR_FLAMEGRAPH_JS -->
+    <!-- INLINE_VENDOR_FLAMEGRAPH_TOOLTIP_JS -->
+    <!-- INLINE_CSS -->
+  </head>
+  <body>
+    <div class="app-layout">
+      <!-- Top Bar -->
+      <header class="top-bar">
+        <div class="brand">
+          <span class="brand-text">Tachyon</span>
+          <span class="brand-divider"></span>
+          <span class="brand-subtitle">Profiler</span>
+        </div>
+        <div class="search-wrapper">
+          <input
+            type="text"
+            id="search-input"
+            class="search-input"
+            placeholder="Search functions..."
+          />
+          <button
+            class="search-clear"
+            id="search-clear"
+            onclick="clearSearch()"
+            title="Clear search"
+          >&times;</button>
+        </div>
+        <div class="toolbar">
+          <button
+            class="toolbar-btn"
+            onclick="resetZoom()"
+            title="Reset zoom"
+          >&#8962;</button>
+          <button
+            class="toolbar-btn"
+            onclick="exportSVG()"
+            title="Export SVG"
+          >&#8595;</button>
+          <button
+            class="toolbar-btn theme-toggle"
+            onclick="toggleTheme()"
+            title="Toggle theme"
+            id="theme-btn"
+          >&#9790;</button>
+        </div>
+      </header>
+
+      <!-- Main Content -->
+      <div class="main-content">
+        <!-- Sidebar -->
+        <aside class="sidebar" id="sidebar">
+          <button
+            class="sidebar-toggle"
+            onclick="toggleSidebar()"
+            title="Toggle sidebar"
+            aria-label="Toggle sidebar"
+          >
+            <svg width="16" height="16" viewBox="0 0 16 16" fill="none">
+              <path d="M10 12L6 8L10 4" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/>
+            </svg>
+          </button>
+          <div class="sidebar-resize-handle" id="sidebar-resize-handle"></div>
+
+          <div class="sidebar-content">
+            <!-- Logo Section -->
+            <div class="sidebar-logo">
+              <div class="sidebar-logo-img"><!-- INLINE_LOGO --></div>
+            </div>
+
+            <!-- Profile Summary Section -->
+            <section class="sidebar-section collapsible" id="summary-section">
+              <button class="section-header" onclick="toggleSection('summary-section')">
+                <h3 class="section-title">Profile Summary</h3>
+                <svg class="section-chevron" width="12" height="12" viewBox="0 0 12 12" fill="none">
+                  <path d="M3 4.5L6 7.5L9 4.5" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
+                </svg>
+              </button>
+              <div class="section-content">
+                <div class="summary-grid">
+                  <div class="summary-card" id="summary-samples">
+                    <div class="summary-icon">📊</div>
+                    <div class="summary-data">
+                      <div class="summary-value" id="stat-total-samples">--</div>
+                      <div class="summary-label">Total Samples</div>
+                    </div>
+                  </div>
+                  <div class="summary-card" id="summary-duration">
+                    <div class="summary-icon">⏱</div>
+                    <div class="summary-data">
+                      <div class="summary-value" id="stat-duration">--</div>
+                      <div class="summary-label">Duration</div>
+                    </div>
+                  </div>
+                  <div class="summary-card" id="summary-rate">
+                    <div class="summary-icon">⚡</div>
+                    <div class="summary-data">
+                      <div class="summary-value" id="stat-sample-rate">--</div>
+                      <div class="summary-label">Samples/sec</div>
+                    </div>
+                  </div>
+                  <div class="summary-card" id="summary-functions">
+                    <div class="summary-icon">λ</div>
+                    <div class="summary-data">
+                      <div class="summary-value" id="stat-functions">--</div>
+                      <div class="summary-label">Functions</div>
+                    </div>
+                  </div>
+                </div>
+                <!-- Efficiency Bar -->
+                <div class="efficiency-section" id="efficiency-section" style="display: none;">
+                  <div class="efficiency-header">
+                    <span class="efficiency-label">Sampling Efficiency</span>
+                    <span class="efficiency-value" id="stat-efficiency">--</span>
+                  </div>
+                  <div class="efficiency-bar">
+                    <div class="efficiency-fill" id="efficiency-fill"></div>
+                  </div>
+                  <div class="missed-samples-header">
+                    <span class="efficiency-label">Missed samples</span>
+                    <span class="efficiency-value" id="stat-missed-samples">--</span>
+                  </div>
+                  <div class="efficiency-bar">
+                    <div class="efficiency-fill" id="missed-samples-fill"></div>
+                  </div>
+ 
+                </div>
+              </div>
+            </section>
+
+            <!-- Thread Stats Section (GIL/GC) -->
+            <section class="sidebar-section thread-stats-section collapsible" id="thread-stats-bar" style="display: none;">
+              <button class="section-header" onclick="toggleSection('thread-stats-bar')">
+                <h3 class="section-title">Runtime Stats</h3>
+                <svg class="section-chevron" width="12" height="12" viewBox="0 0 12 12" fill="none">
+                  <path d="M3 4.5L6 7.5L9 4.5" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
+                </svg>
+              </button>
+              <div class="section-content">
+                <div class="stats-grid">
+                  <div class="stat-tile stat-tile--green" id="gil-held-stat">
+                    <div class="stat-tile-value" id="gil-held-pct">--</div>
+                    <div class="stat-tile-label">GIL Held</div>
+                  </div>
+                  <div class="stat-tile stat-tile--red" id="gil-released-stat">
+                    <div class="stat-tile-value" id="gil-released-pct">--</div>
+                    <div class="stat-tile-label">GIL Released</div>
+                  </div>
+                  <div class="stat-tile stat-tile--yellow" id="gil-waiting-stat">
+                    <div class="stat-tile-value" id="gil-waiting-pct">--</div>
+                    <div class="stat-tile-label">Waiting GIL</div>
+                  </div>
+                  <div class="stat-tile stat-tile--purple" id="gc-stat">
+                    <div class="stat-tile-value" id="gc-pct">--</div>
+                    <div class="stat-tile-label">GC</div>
+                  </div>
+                </div>
+              </div>
+            </section>
+
+            <!-- Hotspots Section -->
+            <section class="sidebar-section collapsible" id="hotspots-section">
+              <button class="section-header" onclick="toggleSection('hotspots-section')">
+                <h3 class="section-title">Hotspots</h3>
+                <svg class="section-chevron" width="12" height="12" viewBox="0 0 12 12" fill="none">
+                  <path d="M3 4.5L6 7.5L9 4.5" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
+                </svg>
+              </button>
+              <div class="section-content">
+              <div class="hotspot" id="hotspot-1">
+                <div class="hotspot-rank hotspot-rank--1">1</div>
+                <div class="hotspot-info">
+                  <div class="hotspot-func" id="hotspot-func-1">--</div>
+                  <div class="hotspot-file" id="hotspot-file-1">--</div>
+                  <div class="hotspot-stats">
+                    <span class="hotspot-percent" id="hotspot-percent-1">--</span>
+                    <span id="hotspot-samples-1"></span>
+                  </div>
+                </div>
+              </div>
+              <div class="hotspot" id="hotspot-2">
+                <div class="hotspot-rank hotspot-rank--2">2</div>
+                <div class="hotspot-info">
+                  <div class="hotspot-func" id="hotspot-func-2">--</div>
+                  <div class="hotspot-file" id="hotspot-file-2">--</div>
+                  <div class="hotspot-stats">
+                    <span class="hotspot-percent" id="hotspot-percent-2">--</span>
+                    <span id="hotspot-samples-2"></span>
+                  </div>
+                </div>
+              </div>
+              <div class="hotspot" id="hotspot-3">
+                <div class="hotspot-rank hotspot-rank--3">3</div>
+                <div class="hotspot-info">
+                  <div class="hotspot-func" id="hotspot-func-3">--</div>
+                  <div class="hotspot-file" id="hotspot-file-3">--</div>
+                  <div class="hotspot-stats">
+                    <span class="hotspot-percent" id="hotspot-percent-3">--</span>
+                    <span id="hotspot-samples-3"></span>
+                  </div>
+                </div>
+              </div>
+              </div>
+            </section>
+
+            <!-- Thread Filter Section -->
+            <section class="sidebar-section filter-section" id="thread-section" style="display: none;">
+              <label class="filter-label" for="thread-filter">Thread Filter</label>
+              <select
+                id="thread-filter"
+                class="filter-select"
+                onchange="filterByThread()"
+              >
+                <option value="all">All Threads</option>
+              </select>
+            </section>
+
+            <!-- Legend Section -->
+            <section class="sidebar-section legend-section collapsible" id="legend-section">
+              <button class="section-header" onclick="toggleSection('legend-section')">
+                <h3 class="section-title">Heat Map</h3>
+                <svg class="section-chevron" width="12" height="12" viewBox="0 0 12 12" fill="none">
+                  <path d="M3 4.5L6 7.5L9 4.5" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
+                </svg>
+              </button>
+              <div class="section-content">
+              <div class="legend">
+                <div class="legend-item">
+                  <div class="legend-color" style="background: var(--heat-8)"></div>
+                  <span class="legend-label">Hottest</span>
+                  <span class="legend-range">&ge;60%</span>
+                </div>
+                <div class="legend-item">
+                  <div class="legend-color" style="background: var(--heat-7)"></div>
+                  <span class="legend-label">Very Hot</span>
+                  <span class="legend-range">35-60%</span>
+                </div>
+                <div class="legend-item">
+                  <div class="legend-color" style="background: var(--heat-6)"></div>
+                  <span class="legend-label">Hot</span>
+                  <span class="legend-range">18-35%</span>
+                </div>
+                <div class="legend-item">
+                  <div class="legend-color" style="background: var(--heat-5)"></div>
+                  <span class="legend-label">Warm</span>
+                  <span class="legend-range">12-18%</span>
+                </div>
+                <div class="legend-item">
+                  <div class="legend-color" style="background: var(--heat-4)"></div>
+                  <span class="legend-label">Medium</span>
+                  <span class="legend-range">6-12%</span>
+                </div>
+                <div class="legend-item">
+                  <div class="legend-color" style="background: var(--heat-3)"></div>
+                  <span class="legend-label">Cool</span>
+                  <span class="legend-range">3-6%</span>
+                </div>
+                <div class="legend-item">
+                  <div class="legend-color" style="background: var(--heat-2)"></div>
+                  <span class="legend-label">Cold</span>
+                  <span class="legend-range">1-3%</span>
+                </div>
+                <div class="legend-item">
+                  <div class="legend-color" style="background: var(--heat-1)"></div>
+                  <span class="legend-label">Coldest</span>
+                  <span class="legend-range">&lt;1%</span>
+                </div>
+              </div>
+              </div>
+            </section>
+          </div>
+        </aside>
+
+        <!-- Chart Area -->
+        <main class="chart-area">
+          <div id="chart"></div>
+        </main>
+      </div>
+
+      <!-- Status Bar -->
+      <footer class="status-bar">
+        <span class="status-item" id="status-location" style="display: none;">
+          <span class="status-label">File:</span>
+          <span class="status-value" id="status-file">--</span>
+        </span>
+        <span class="status-item" id="status-func-item" style="display: none;">
+          <span class="status-label">Func:</span>
+          <span class="status-value" id="status-func">--</span>
+        </span>
+        <span class="status-item" id="status-time-item" style="display: none;">
+          <span class="status-label">Time:</span>
+          <span class="status-value" id="status-time">--</span>
+        </span>
+        <span class="status-item" id="status-percent-item" style="display: none;">
+          <span class="status-value accent" id="status-percent">--</span>
+        </span>
+      </footer>
+    </div>
+
+    <!-- INLINE_JS -->
+  </body>
+</html>
diff --git a/Lib/profiling/sampling/_heatmap_assets/heatmap.css b/Lib/profiling/sampling/_heatmap_assets/heatmap.css
new file mode 100644
index 00000000000..ada6d2f2ee1
--- /dev/null
+++ b/Lib/profiling/sampling/_heatmap_assets/heatmap.css
@@ -0,0 +1,1162 @@
+/* ==========================================================================
+   Heatmap Viewer - Component-Specific CSS
+
+   DEPENDENCY: Requires _shared_assets/base.css to be loaded first
+   This file extends the shared foundation with heatmap-specific styles.
+   ========================================================================== */
+
+/* --------------------------------------------------------------------------
+   Layout Overrides (Heatmap-specific)
+   -------------------------------------------------------------------------- */
+
+.app-layout {
+  min-height: 100vh;
+}
+
+/* Sticky top bar for heatmap views */
+.top-bar {
+  position: sticky;
+  top: 0;
+  z-index: 100;
+}
+
+/* Back link in toolbar */
+.back-link {
+  color: white;
+  text-decoration: none;
+  padding: 6px 14px;
+  background: rgba(255, 255, 255, 0.12);
+  border: 1px solid rgba(255, 255, 255, 0.18);
+  border-radius: 6px;
+  font-size: 13px;
+  font-weight: 500;
+  transition: all var(--transition-fast);
+}
+
+.back-link:hover {
+  background: rgba(255, 255, 255, 0.22);
+  border-color: rgba(255, 255, 255, 0.35);
+}
+
+/* --------------------------------------------------------------------------
+   Main Content Area
+   -------------------------------------------------------------------------- */
+
+.main-content {
+  flex: 1;
+  padding: 24px 3%;
+  width: 100%;
+  max-width: 100%;
+}
+
+/* --------------------------------------------------------------------------
+   Stats Summary Cards - Enhanced with Icons & Animations
+   -------------------------------------------------------------------------- */
+
+.stats-summary {
+  display: grid;
+  grid-template-columns: repeat(3, 1fr);
+  gap: 12px;
+  margin-bottom: 24px;
+}
+
+.stat-card {
+  display: flex;
+  align-items: center;
+  gap: 12px;
+  background: var(--bg-primary);
+  border: 2px solid var(--border);
+  border-radius: 10px;
+  padding: 14px 16px;
+  transition: all var(--transition-fast);
+  animation: slideUp 0.5s ease-out backwards;
+  animation-delay: calc(var(--i, 0) * 0.08s);
+  position: relative;
+  overflow: hidden;
+}
+
+.stat-card::before {
+  content: '';
+  position: absolute;
+  top: 0;
+  left: 0;
+  right: 0;
+  height: 3px;
+  background: linear-gradient(90deg, var(--python-blue), var(--python-gold));
+  opacity: 0;
+  transition: opacity var(--transition-fast);
+}
+
+.stat-card:nth-child(1) { --i: 0; --card-color: 55, 118, 171; }
+.stat-card:nth-child(2) { --i: 1; --card-color: 40, 167, 69; }
+.stat-card:nth-child(3) { --i: 2; --card-color: 255, 193, 7; }
+.stat-card:nth-child(4) { --i: 3; --card-color: 111, 66, 193; }
+.stat-card:nth-child(5) { --i: 4; --card-color: 220, 53, 69; }
+.stat-card:nth-child(6) { --i: 5; --card-color: 23, 162, 184; }
+
+.stat-card:hover {
+  border-color: rgba(var(--card-color), 0.6);
+  background: linear-gradient(135deg, rgba(var(--card-color), 0.08) 0%, var(--bg-primary) 100%);
+  transform: translateY(-2px);
+  box-shadow: 0 4px 16px rgba(var(--card-color), 0.15);
+}
+
+.stat-card:hover::before {
+  opacity: 1;
+}
+
+.stat-icon {
+  width: 40px;
+  height: 40px;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  font-size: 18px;
+  background: linear-gradient(135deg, rgba(var(--card-color), 0.15) 0%, rgba(var(--card-color), 0.05) 100%);
+  border: 1px solid rgba(var(--card-color), 0.2);
+  border-radius: 10px;
+  flex-shrink: 0;
+  transition: all var(--transition-fast);
+}
+
+.stat-card:hover .stat-icon {
+  transform: scale(1.05) rotate(-2deg);
+  background: linear-gradient(135deg, rgba(var(--card-color), 0.25) 0%, rgba(var(--card-color), 0.1) 100%);
+}
+
+.stat-data {
+  flex: 1;
+  min-width: 0;
+}
+
+.stat-value {
+  font-family: var(--font-mono);
+  font-size: 1.35em;
+  font-weight: 800;
+  color: rgb(var(--card-color));
+  display: block;
+  line-height: 1.1;
+  letter-spacing: -0.3px;
+}
+
+.stat-label {
+  font-size: 10px;
+  font-weight: 600;
+  color: var(--text-muted);
+  text-transform: uppercase;
+  letter-spacing: 0.3px;
+  margin-top: 2px;
+}
+
+/* Sparkline decoration for stats */
+.stat-sparkline {
+  position: absolute;
+  bottom: 0;
+  left: 0;
+  right: 0;
+  height: 30px;
+  opacity: 0.1;
+  background: linear-gradient(180deg,
+    transparent 0%,
+    rgba(var(--card-color), 0.3) 100%
+  );
+  pointer-events: none;
+}
+
+/* --------------------------------------------------------------------------
+   Rate Cards (Error Rate, Missed Samples) with Progress Bars
+   -------------------------------------------------------------------------- */
+
+.rate-card {
+  display: flex;
+  flex-direction: column;
+  gap: 12px;
+  background: var(--bg-primary);
+  border: 2px solid var(--border);
+  border-radius: 12px;
+  padding: 18px 20px;
+  transition: all var(--transition-fast);
+  animation: slideUp 0.5s ease-out backwards;
+  position: relative;
+  overflow: hidden;
+}
+
+.rate-card:nth-child(5) { animation-delay: 0.32s; --rate-color: 220, 53, 69; }
+.rate-card:nth-child(6) { animation-delay: 0.40s; --rate-color: 255, 152, 0; }
+
+.rate-card:hover {
+  border-color: rgba(var(--rate-color), 0.5);
+  transform: translateY(-2px);
+  box-shadow: 0 6px 20px rgba(var(--rate-color), 0.15);
+}
+
+.rate-header {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+}
+
+.rate-info {
+  display: flex;
+  align-items: center;
+  gap: 10px;
+}
+
+.rate-icon {
+  width: 36px;
+  height: 36px;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  font-size: 18px;
+  background: linear-gradient(135deg, rgba(var(--rate-color), 0.15) 0%, rgba(var(--rate-color), 0.05) 100%);
+  border: 1px solid rgba(var(--rate-color), 0.2);
+  border-radius: 10px;
+  flex-shrink: 0;
+}
+
+.rate-label {
+  font-size: 12px;
+  font-weight: 600;
+  color: var(--text-secondary);
+  text-transform: uppercase;
+  letter-spacing: 0.3px;
+}
+
+.rate-value {
+  font-family: var(--font-mono);
+  font-size: 1.4em;
+  font-weight: 800;
+  color: rgb(var(--rate-color));
+}
+
+.rate-bar {
+  height: 8px;
+  background: var(--bg-tertiary);
+  border-radius: 4px;
+  overflow: hidden;
+  position: relative;
+}
+
+.rate-fill {
+  height: 100%;
+  border-radius: 4px;
+  transition: width 0.8s ease-out;
+  position: relative;
+  overflow: hidden;
+}
+
+.rate-fill.error {
+  background: linear-gradient(90deg, #dc3545 0%, #ff6b6b 100%);
+}
+
+.rate-fill.warning {
+  background: linear-gradient(90deg, #ff9800 0%, #ffc107 100%);
+}
+
+.rate-fill.good {
+  background: linear-gradient(90deg, #28a745 0%, #20c997 100%);
+}
+
+/* Shimmer animation on rate bars */
+.rate-fill::after {
+  content: '';
+  position: absolute;
+  top: 0;
+  left: -100%;
+  width: 100%;
+  height: 100%;
+  background: linear-gradient(
+    90deg,
+    transparent 0%,
+    rgba(255, 255, 255, 0.4) 50%,
+    transparent 100%
+  );
+  animation: shimmer 2.5s ease-in-out infinite;
+}
+
+/* --------------------------------------------------------------------------
+   Section Headers
+   -------------------------------------------------------------------------- */
+
+.section-header {
+  display: flex;
+  align-items: center;
+  gap: 12px;
+  margin-bottom: 16px;
+  padding-bottom: 12px;
+  border-bottom: 2px solid var(--python-gold);
+}
+
+.section-title {
+  font-size: 18px;
+  font-weight: 700;
+  color: var(--text-primary);
+  margin: 0;
+  flex: 1;
+}
+
+/* --------------------------------------------------------------------------
+   Filter Controls
+   -------------------------------------------------------------------------- */
+
+.filter-controls {
+  display: flex;
+  gap: 8px;
+  flex-wrap: wrap;
+  align-items: center;
+  margin-bottom: 16px;
+}
+
+.control-btn {
+  padding: 8px 16px;
+  background: var(--bg-secondary);
+  color: var(--text-primary);
+  border: 1px solid var(--border);
+  border-radius: 6px;
+  font-size: 13px;
+  font-weight: 500;
+  cursor: pointer;
+  transition: all var(--transition-fast);
+}
+
+.control-btn:hover {
+  background: var(--accent);
+  color: white;
+  border-color: var(--accent);
+}
+
+/* --------------------------------------------------------------------------
+   Type Sections (stdlib, project, etc)
+   -------------------------------------------------------------------------- */
+
+.type-section {
+  background: var(--bg-primary);
+  border: 1px solid var(--border);
+  border-radius: 8px;
+  overflow: hidden;
+  margin-bottom: 12px;
+}
+
+.type-header {
+  padding: 12px 16px;
+  background: var(--header-gradient);
+  color: white;
+  cursor: pointer;
+  display: flex;
+  align-items: center;
+  gap: 10px;
+  user-select: none;
+  transition: all var(--transition-fast);
+  font-weight: 600;
+}
+
+.type-header:hover {
+  opacity: 0.95;
+}
+
+.type-icon {
+  font-size: 12px;
+  transition: transform var(--transition-fast);
+  min-width: 12px;
+}
+
+.type-title {
+  font-size: 14px;
+  flex: 1;
+}
+
+.type-stats {
+  font-size: 12px;
+  opacity: 0.9;
+  background: rgba(255, 255, 255, 0.15);
+  padding: 4px 10px;
+  border-radius: 4px;
+  font-family: var(--font-mono);
+}
+
+.type-content {
+  padding: 12px;
+}
+
+/* --------------------------------------------------------------------------
+   Folder Nodes (hierarchical structure)
+   -------------------------------------------------------------------------- */
+
+.folder-node {
+  margin-bottom: 6px;
+}
+
+.folder-header {
+  padding: 8px 12px;
+  background: var(--bg-secondary);
+  border: 1px solid var(--border);
+  border-radius: 6px;
+  cursor: pointer;
+  display: flex;
+  align-items: center;
+  gap: 8px;
+  user-select: none;
+  transition: all var(--transition-fast);
+}
+
+.folder-header:hover {
+  background: var(--accent-glow);
+  border-color: var(--accent);
+}
+
+.folder-icon {
+  font-size: 10px;
+  color: var(--accent);
+  transition: transform var(--transition-fast);
+  min-width: 12px;
+}
+
+.folder-name {
+  flex: 1;
+  font-weight: 500;
+  color: var(--text-primary);
+  font-size: 13px;
+}
+
+.folder-stats {
+  font-size: 11px;
+  color: var(--text-secondary);
+  background: var(--bg-tertiary);
+  padding: 2px 8px;
+  border-radius: 4px;
+  font-family: var(--font-mono);
+}
+
+.folder-content {
+  padding-left: 20px;
+  margin-top: 6px;
+}
+
+/* --------------------------------------------------------------------------
+   File Items
+   -------------------------------------------------------------------------- */
+
+.files-list {
+  display: flex;
+  flex-direction: column;
+  gap: 4px;
+  margin-top: 8px;
+}
+
+.file-item {
+  display: flex;
+  align-items: center;
+  gap: 12px;
+  padding: 8px 12px;
+  background: var(--bg-primary);
+  border: 1px solid var(--border-subtle);
+  border-radius: 6px;
+  transition: all var(--transition-fast);
+}
+
+.file-item:hover {
+  background: var(--bg-secondary);
+  border-color: var(--border);
+}
+
+.file-item .file-link {
+  flex: 1;
+  min-width: 0;
+  font-size: 13px;
+}
+
+.file-samples {
+  font-size: 12px;
+  color: var(--text-secondary);
+  font-weight: 600;
+  white-space: nowrap;
+  width: 130px;
+  flex-shrink: 0;
+  text-align: right;
+  font-family: var(--font-mono);
+}
+
+.heatmap-bar-container {
+  width: 120px;
+  flex-shrink: 0;
+  display: flex;
+  align-items: center;
+}
+
+.heatmap-bar {
+  flex-shrink: 0;
+  border-radius: 2px;
+}
+
+/* Links */
+.file-link {
+  color: var(--accent);
+  text-decoration: none;
+  font-weight: 500;
+  transition: color var(--transition-fast);
+}
+
+.file-link:hover {
+  color: var(--accent-hover);
+  text-decoration: underline;
+}
+
+/* --------------------------------------------------------------------------
+   Module Badges
+   -------------------------------------------------------------------------- */
+
+.module-badge {
+  display: inline-block;
+  padding: 3px 8px;
+  border-radius: 4px;
+  font-size: 11px;
+  font-weight: 600;
+}
+
+.badge-stdlib {
+  background: rgba(40, 167, 69, 0.15);
+  color: #28a745;
+}
+
+.badge-site-packages {
+  background: rgba(0, 123, 255, 0.15);
+  color: #007bff;
+}
+
+.badge-project {
+  background: rgba(255, 193, 7, 0.2);
+  color: #d39e00;
+}
+
+.badge-other {
+  background: var(--bg-tertiary);
+  color: var(--text-secondary);
+}
+
+[data-theme="dark"] .badge-stdlib {
+  background: rgba(40, 167, 69, 0.25);
+  color: #5dd879;
+}
+
+[data-theme="dark"] .badge-site-packages {
+  background: rgba(88, 166, 255, 0.25);
+  color: #79b8ff;
+}
+
+[data-theme="dark"] .badge-project {
+  background: rgba(255, 212, 59, 0.25);
+  color: #ffd43b;
+}
+
+/* ==========================================================================
+   FILE VIEW STYLES (Code Display)
+   ========================================================================== */
+
+.code-view {
+  font-family: var(--font-mono);
+  min-height: 100vh;
+}
+
+/* Code Header (Top Bar for file view) */
+.code-header {
+  height: var(--topbar-height);
+  background: var(--header-gradient);
+  display: flex;
+  align-items: center;
+  padding: 0 16px;
+  gap: 16px;
+  box-shadow: 0 2px 10px rgba(55, 118, 171, 0.25);
+  border-bottom: 2px solid var(--python-gold);
+  position: sticky;
+  top: 0;
+  z-index: 100;
+}
+
+.code-header-content {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  width: 94%;
+  max-width: 100%;
+  margin: 0 auto;
+}
+
+.code-header h1 {
+  font-size: 14px;
+  font-weight: 600;
+  color: white;
+  margin: 0;
+  font-family: var(--font-mono);
+  display: flex;
+  align-items: center;
+  gap: 8px;
+}
+
+/* File Stats Bar */
+.file-stats {
+  background: var(--bg-secondary);
+  padding: 16px 24px;
+  border-bottom: 1px solid var(--border);
+}
+
+.file-stats .stats-grid {
+  width: 94%;
+  max-width: 100%;
+  margin: 0 auto;
+  display: grid;
+  grid-template-columns: repeat(auto-fit, minmax(120px, 1fr));
+  gap: 12px;
+}
+
+.stat-item {
+  background: var(--bg-primary);
+  padding: 12px;
+  border-radius: 8px;
+  box-shadow: var(--shadow-sm);
+  text-align: center;
+  border: 1px solid var(--border);
+  transition: all var(--transition-fast);
+}
+
+.stat-item:hover {
+  transform: translateY(-2px);
+  box-shadow: var(--shadow-md);
+  border-color: var(--accent);
+}
+
+.stat-item .stat-value {
+  font-size: 1.4em;
+  font-weight: 700;
+  color: var(--accent);
+}
+
+.stat-item .stat-label {
+  color: var(--text-muted);
+  font-size: 10px;
+  margin-top: 2px;
+}
+
+/* Legend */
+.legend {
+  background: var(--bg-secondary);
+  padding: 12px 24px;
+  border-bottom: 1px solid var(--border);
+}
+
+.legend-content {
+  width: 94%;
+  max-width: 100%;
+  margin: 0 auto;
+  display: flex;
+  align-items: center;
+  gap: 20px;
+  flex-wrap: wrap;
+}
+
+.legend-title {
+  font-weight: 600;
+  color: var(--text-primary);
+  font-size: 13px;
+  font-family: var(--font-sans);
+}
+
+.legend-gradient {
+  flex: 1;
+  max-width: 300px;
+  height: 24px;
+  background: linear-gradient(90deg,
+    var(--bg-tertiary) 0%,
+    var(--heat-2) 25%,
+    var(--heat-4) 50%,
+    var(--heat-6) 75%,
+    var(--heat-8) 100%
+  );
+  border-radius: 4px;
+  border: 1px solid var(--border);
+}
+
+.legend-labels {
+  display: flex;
+  gap: 12px;
+  font-size: 11px;
+  color: var(--text-muted);
+  font-family: var(--font-sans);
+}
+
+/* Toggle Switch Styles */
+.toggle-switch {
+  display: inline-flex;
+  align-items: center;
+  gap: 8px;
+  cursor: pointer;
+  user-select: none;
+  font-family: var(--font-sans);
+  transition: opacity var(--transition-fast);
+}
+
+.toggle-switch:hover {
+  opacity: 0.85;
+}
+
+.toggle-switch .toggle-label {
+  font-size: 11px;
+  font-weight: 500;
+  color: var(--text-muted);
+  min-width: 55px;
+  text-align: right;
+  transition: color var(--transition-fast);
+}
+
+.toggle-switch .toggle-label:last-child {
+  text-align: left;
+}
+
+.toggle-switch .toggle-label.active {
+  color: var(--text-primary);
+  font-weight: 600;
+}
+
+.toggle-track {
+  position: relative;
+  width: 36px;
+  height: 20px;
+  background: var(--bg-tertiary);
+  border: 2px solid var(--border);
+  border-radius: 12px;
+  transition: all var(--transition-fast);
+  box-shadow: inset var(--shadow-sm);
+}
+
+.toggle-track:hover {
+  border-color: var(--text-muted);
+}
+
+.toggle-track.on {
+  background: var(--accent);
+  border-color: var(--accent);
+  box-shadow: 0 0 8px var(--accent-glow);
+}
+
+.toggle-track::after {
+  content: '';
+  position: absolute;
+  top: 1px;
+  left: 1px;
+  width: 14px;
+  height: 14px;
+  background: white;
+  border-radius: 50%;
+  box-shadow: var(--shadow-sm);
+  transition: all var(--transition-fast);
+}
+
+.toggle-track.on::after {
+  transform: translateX(16px);
+  box-shadow: var(--shadow-md);
+}
+
+/* Specific toggle overrides */
+#toggle-color-mode .toggle-track.on {
+  background: #8e44ad;
+  border-color: #8e44ad;
+  box-shadow: 0 0 8px rgba(142, 68, 173, 0.3);
+}
+
+#toggle-cold .toggle-track.on {
+  background: #e67e22;
+  border-color: #e67e22;
+  box-shadow: 0 0 8px rgba(230, 126, 34, 0.3);
+}
+
+/* Code Container */
+.code-container {
+  width: 94%;
+  max-width: 100%;
+  margin: 16px auto;
+  background: var(--bg-primary);
+  border: 1px solid var(--border);
+  border-radius: 8px 8px 8px 8px;
+  box-shadow: var(--shadow-sm);
+  /* Allow horizontal scroll for long lines, but don't clip sticky header */
+}
+
+/* Code Header Row */
+.code-header-row {
+  position: sticky;
+  top: var(--topbar-height);
+  z-index: 50;
+  display: flex;
+  background: var(--bg-secondary);
+  border-bottom: 2px solid var(--border);
+  font-weight: 700;
+  font-size: 11px;
+  color: var(--text-muted);
+  text-transform: uppercase;
+  letter-spacing: 0.5px;
+  border-radius: 8px 8px 0 0;
+}
+
+.header-line-number {
+  flex-shrink: 0;
+  width: 60px;
+  padding: 8px 10px;
+  text-align: right;
+  border-right: 1px solid var(--border);
+}
+
+.header-samples-self,
+.header-samples-cumulative {
+  flex-shrink: 0;
+  width: 90px;
+  padding: 8px 10px;
+  text-align: right;
+  border-right: 1px solid var(--border);
+}
+
+.header-samples-self {
+  color: var(--heat-8);
+}
+
+.header-samples-cumulative {
+  color: var(--accent);
+}
+
+.header-content {
+  flex: 1;
+  padding: 8px 15px;
+}
+
+/* Code Lines */
+.code-line {
+  position: relative;
+  display: flex;
+  min-height: 20px;
+  line-height: 20px;
+  font-size: 13px;
+  transition: background var(--transition-fast);
+  scroll-margin-top: calc(var(--topbar-height) + 50px);
+}
+
+.code-line:hover {
+  filter: brightness(0.97);
+}
+
+[data-theme="dark"] .code-line:hover {
+  filter: brightness(1.1);
+}
+
+.line-number {
+  flex-shrink: 0;
+  width: 60px;
+  padding: 0 10px;
+  text-align: right;
+  color: var(--text-muted);
+  background: var(--bg-secondary);
+  border-right: 1px solid var(--border);
+  user-select: none;
+  transition: all var(--transition-fast);
+}
+
+.line-number:hover {
+  background: var(--accent);
+  color: white;
+  cursor: pointer;
+}
+
+.line-samples-self,
+.line-samples-cumulative {
+  flex-shrink: 0;
+  width: 90px;
+  padding: 0 10px;
+  text-align: right;
+  background: var(--bg-secondary);
+  border-right: 1px solid var(--border);
+  font-weight: 600;
+  user-select: none;
+  font-size: 12px;
+}
+
+.line-samples-self {
+  color: var(--heat-8);
+}
+
+.line-samples-cumulative {
+  color: var(--accent);
+}
+
+.line-content {
+  flex: 1;
+  padding: 0 15px;
+  white-space: pre;
+  overflow-x: auto;
+}
+
+/* Scrollbar Styling */
+.line-content::-webkit-scrollbar {
+  height: 6px;
+}
+
+.line-content::-webkit-scrollbar-thumb {
+  background: var(--border);
+  border-radius: 3px;
+}
+
+.line-content::-webkit-scrollbar-thumb:hover {
+  background: var(--text-muted);
+}
+
+/* Navigation Buttons */
+.line-nav-buttons {
+  position: absolute;
+  right: 8px;
+  top: 50%;
+  transform: translateY(-50%);
+  display: flex;
+  gap: 4px;
+  align-items: center;
+}
+
+.nav-btn {
+  padding: 2px 6px;
+  font-size: 12px;
+  font-weight: 500;
+  border: 1px solid var(--accent);
+  border-radius: 4px;
+  background: var(--bg-primary);
+  color: var(--accent);
+  cursor: pointer;
+  transition: all var(--transition-fast);
+  user-select: none;
+  line-height: 1;
+}
+
+.nav-btn:hover:not(:disabled) {
+  background: var(--accent);
+  color: white;
+  transform: translateY(-1px);
+  box-shadow: var(--shadow-sm);
+}
+
+.nav-btn:active:not(:disabled) {
+  transform: translateY(0);
+}
+
+.nav-btn:disabled {
+  opacity: 0.3;
+  cursor: not-allowed;
+  color: var(--text-muted);
+  background: var(--bg-secondary);
+  border-color: var(--border);
+}
+
+.nav-btn.caller {
+  color: var(--nav-caller);
+  border-color: var(--nav-caller);
+}
+
+.nav-btn.callee {
+  color: var(--nav-callee);
+  border-color: var(--nav-callee);
+}
+
+.nav-btn.caller:hover:not(:disabled) {
+  background: var(--nav-caller-hover);
+  color: white;
+}
+
+.nav-btn.callee:hover:not(:disabled) {
+  background: var(--nav-callee-hover);
+  color: white;
+}
+
+/* Highlighted target line */
+.code-line:target {
+  animation: highlight-line 2s ease-out;
+}
+
+@keyframes highlight-line {
+  0% {
+    background: rgba(255, 212, 59, 0.6) !important;
+    outline: 3px solid var(--python-gold);
+    outline-offset: -3px;
+  }
+  50% {
+    background: rgba(255, 212, 59, 0.5) !important;
+    outline: 3px solid var(--python-gold);
+    outline-offset: -3px;
+  }
+  100% {
+    background: inherit;
+    outline: 3px solid transparent;
+    outline-offset: -3px;
+  }
+}
+
+/* Popup menu for multiple callees */
+.callee-menu {
+  position: absolute;
+  background: var(--bg-primary);
+  border: 1px solid var(--border);
+  border-radius: 8px;
+  box-shadow: var(--shadow-lg);
+  padding: 8px;
+  z-index: 1000;
+  min-width: 250px;
+  max-width: 400px;
+  max-height: 300px;
+  overflow-y: auto;
+}
+
+.callee-menu-header {
+  font-weight: 600;
+  color: var(--text-primary);
+  margin-bottom: 8px;
+  padding-bottom: 8px;
+  border-bottom: 1px solid var(--border);
+  font-size: 13px;
+  font-family: var(--font-sans);
+}
+
+.callee-menu-item {
+  padding: 8px;
+  margin: 4px 0;
+  border-radius: 6px;
+  cursor: pointer;
+  transition: background var(--transition-fast);
+  display: flex;
+  flex-direction: column;
+  gap: 4px;
+}
+
+.callee-menu-item:hover {
+  background: var(--bg-secondary);
+}
+
+.callee-menu-func {
+  font-weight: 500;
+  color: var(--accent);
+  font-size: 12px;
+}
+
+.callee-menu-file {
+  font-size: 11px;
+  color: var(--text-muted);
+}
+
+.count-badge {
+  display: inline-block;
+  background: var(--accent);
+  color: white;
+  font-size: 10px;
+  padding: 2px 6px;
+  border-radius: 4px;
+  font-weight: 600;
+  margin-left: 6px;
+}
+
+/* Callee menu scrollbar */
+.callee-menu::-webkit-scrollbar {
+  width: 6px;
+}
+
+.callee-menu::-webkit-scrollbar-track {
+  background: var(--bg-secondary);
+  border-radius: 3px;
+}
+
+.callee-menu::-webkit-scrollbar-thumb {
+  background: var(--border);
+  border-radius: 3px;
+}
+
+/* --------------------------------------------------------------------------
+   Scroll Minimap Marker
+   -------------------------------------------------------------------------- */
+
+#scroll_marker {
+  position: fixed;
+  z-index: 1000;
+  right: 0;
+  top: 0;
+  width: 12px;
+  height: 100%;
+  background: var(--bg-secondary);
+  border-left: 1px solid var(--border);
+  pointer-events: none;
+}
+
+#scroll_marker .marker {
+  position: absolute;
+  min-height: 3px;
+  width: 100%;
+  pointer-events: none;
+}
+
+#scroll_marker .marker.cold {
+  background: var(--heat-1);
+}
+
+#scroll_marker .marker.cool {
+  background: var(--heat-2);
+}
+
+#scroll_marker .marker.mild {
+  background: var(--heat-3);
+}
+
+#scroll_marker .marker.warm {
+  background: var(--heat-4);
+}
+
+#scroll_marker .marker.hot {
+  background: var(--heat-5);
+}
+
+#scroll_marker .marker.very-hot {
+  background: var(--heat-6);
+}
+
+#scroll_marker .marker.intense {
+  background: var(--heat-7);
+}
+
+#scroll_marker .marker.extreme {
+  background: var(--heat-8);
+}
+
+/* --------------------------------------------------------------------------
+   Responsive (Heatmap-specific)
+   -------------------------------------------------------------------------- */
+
+@media (max-width: 1100px) {
+  .stats-summary {
+    grid-template-columns: repeat(2, 1fr);
+  }
+}
+
+@media (max-width: 900px) {
+  .main-content {
+    padding: 16px;
+  }
+}
+
+@media (max-width: 600px) {
+  .stats-summary {
+    grid-template-columns: 1fr;
+  }
+
+  .file-stats .stats-grid {
+    grid-template-columns: repeat(2, 1fr);
+  }
+
+  .legend-content {
+    flex-direction: column;
+    gap: 12px;
+  }
+
+  .legend-gradient {
+    width: 100%;
+    max-width: none;
+  }
+}
diff --git a/Lib/profiling/sampling/_heatmap_assets/heatmap.js b/Lib/profiling/sampling/_heatmap_assets/heatmap.js
new file mode 100644
index 00000000000..5a7ff5dd61a
--- /dev/null
+++ b/Lib/profiling/sampling/_heatmap_assets/heatmap.js
@@ -0,0 +1,338 @@
+// Tachyon Profiler - Heatmap JavaScript
+// Interactive features for the heatmap visualization
+// Aligned with Flamegraph viewer design patterns
+
+// ============================================================================
+// State Management
+// ============================================================================
+
+let currentMenu = null;
+let colorMode = 'self';  // 'self' or 'cumulative' - default to self
+let coldCodeHidden = false;
+
+// ============================================================================
+// Theme Support
+// ============================================================================
+
+function toggleTheme() {
+    const html = document.documentElement;
+    const current = html.getAttribute('data-theme') || 'light';
+    const next = current === 'light' ? 'dark' : 'light';
+    html.setAttribute('data-theme', next);
+    localStorage.setItem('heatmap-theme', next);
+
+    // Update theme button icon
+    const btn = document.getElementById('theme-btn');
+    if (btn) {
+        btn.innerHTML = next === 'dark' ? '&#9788;' : '&#9790;';  // sun or moon
+    }
+    applyLineColors();
+
+    // Rebuild scroll marker with new theme colors
+    buildScrollMarker();
+}
+
+function restoreUIState() {
+    // Restore theme
+    const savedTheme = localStorage.getItem('heatmap-theme');
+    if (savedTheme) {
+        document.documentElement.setAttribute('data-theme', savedTheme);
+        const btn = document.getElementById('theme-btn');
+        if (btn) {
+            btn.innerHTML = savedTheme === 'dark' ? '&#9788;' : '&#9790;';
+        }
+    }
+}
+
+// ============================================================================
+// Utility Functions
+// ============================================================================
+
+function createElement(tag, className, textContent = '') {
+    const el = document.createElement(tag);
+    if (className) el.className = className;
+    if (textContent) el.textContent = textContent;
+    return el;
+}
+
+function calculateMenuPosition(buttonRect, menuWidth, menuHeight) {
+    const viewport = { width: window.innerWidth, height: window.innerHeight };
+    const scroll = {
+        x: window.pageXOffset || document.documentElement.scrollLeft,
+        y: window.pageYOffset || document.documentElement.scrollTop
+    };
+
+    const left = buttonRect.right + menuWidth + 10 < viewport.width
+        ? buttonRect.right + scroll.x + 10
+        : Math.max(scroll.x + 10, buttonRect.left + scroll.x - menuWidth - 10);
+
+    const top = buttonRect.bottom + menuHeight + 10 < viewport.height
+        ? buttonRect.bottom + scroll.y + 5
+        : Math.max(scroll.y + 10, buttonRect.top + scroll.y - menuHeight - 10);
+
+    return { left, top };
+}
+
+// ============================================================================
+// Menu Management
+// ============================================================================
+
+function closeMenu() {
+    if (currentMenu) {
+        currentMenu.remove();
+        currentMenu = null;
+    }
+}
+
+function showNavigationMenu(button, items, title) {
+    closeMenu();
+
+    const menu = createElement('div', 'callee-menu');
+    menu.appendChild(createElement('div', 'callee-menu-header', title));
+
+    items.forEach(linkData => {
+        const item = createElement('div', 'callee-menu-item');
+
+        const funcDiv = createElement('div', 'callee-menu-func');
+        funcDiv.textContent = linkData.func;
+
+        if (linkData.count !== undefined && linkData.count > 0) {
+            const countBadge = createElement('span', 'count-badge');
+            countBadge.textContent = linkData.count.toLocaleString();
+            countBadge.title = `${linkData.count.toLocaleString()} samples`;
+            funcDiv.appendChild(document.createTextNode(' '));
+            funcDiv.appendChild(countBadge);
+        }
+
+        item.appendChild(funcDiv);
+        item.appendChild(createElement('div', 'callee-menu-file', linkData.file));
+        item.addEventListener('click', () => window.location.href = linkData.link);
+        menu.appendChild(item);
+    });
+
+    const pos = calculateMenuPosition(button.getBoundingClientRect(), 350, 300);
+    menu.style.left = `${pos.left}px`;
+    menu.style.top = `${pos.top}px`;
+
+    document.body.appendChild(menu);
+    currentMenu = menu;
+}
+
+// ============================================================================
+// Navigation
+// ============================================================================
+
+function handleNavigationClick(button, e) {
+    e.stopPropagation();
+
+    const navData = button.getAttribute('data-nav');
+    if (navData) {
+        window.location.href = JSON.parse(navData).link;
+        return;
+    }
+
+    const navMulti = button.getAttribute('data-nav-multi');
+    if (navMulti) {
+        const items = JSON.parse(navMulti);
+        const title = button.classList.contains('caller') ? 'Choose a caller:' : 'Choose a callee:';
+        showNavigationMenu(button, items, title);
+    }
+}
+
+function scrollToTargetLine() {
+    if (!window.location.hash) return;
+    const target = document.querySelector(window.location.hash);
+    if (target) {
+        target.scrollIntoView({ behavior: 'smooth', block: 'start' });
+    }
+}
+
+// ============================================================================
+// Sample Count & Intensity
+// ============================================================================
+
+function getSampleCount(line) {
+    let text;
+    if (colorMode === 'self') {
+        text = line.querySelector('.line-samples-self')?.textContent.trim().replace(/,/g, '');
+    } else {
+        text = line.querySelector('.line-samples-cumulative')?.textContent.trim().replace(/,/g, '');
+    }
+    return parseInt(text) || 0;
+}
+
+// ============================================================================
+// Scroll Minimap
+// ============================================================================
+
+function buildScrollMarker() {
+    const existing = document.getElementById('scroll_marker');
+    if (existing) existing.remove();
+
+    if (document.body.scrollHeight <= window.innerHeight) return;
+
+    const allLines = document.querySelectorAll('.code-line');
+    const lines = Array.from(allLines).filter(line => line.style.display !== 'none');
+    const markerScale = window.innerHeight / document.body.scrollHeight;
+    const lineHeight = Math.min(Math.max(3, window.innerHeight / lines.length), 10);
+    const maxSamples = Math.max(...Array.from(lines, getSampleCount));
+
+    const scrollMarker = createElement('div', '');
+    scrollMarker.id = 'scroll_marker';
+
+    let prevLine = -99, lastMark, lastTop;
+
+    lines.forEach((line, index) => {
+        const samples = getSampleCount(line);
+        if (samples === 0) return;
+
+        const lineTop = Math.floor(line.offsetTop * markerScale);
+        const lineNumber = index + 1;
+        const intensityClass = maxSamples > 0 ? (intensityToClass(samples / maxSamples) || 'cold') : 'cold';
+
+        if (lineNumber === prevLine + 1 && lastMark?.classList.contains(intensityClass)) {
+            lastMark.style.height = `${lineTop + lineHeight - lastTop}px`;
+        } else {
+            lastMark = createElement('div', `marker ${intensityClass}`);
+            lastMark.style.height = `${lineHeight}px`;
+            lastMark.style.top = `${lineTop}px`;
+            scrollMarker.appendChild(lastMark);
+            lastTop = lineTop;
+        }
+
+        prevLine = lineNumber;
+    });
+
+    document.body.appendChild(scrollMarker);
+}
+
+function applyLineColors() {
+    const lines = document.querySelectorAll('.code-line');
+    lines.forEach(line => {
+        let intensity;
+        if (colorMode === 'self') {
+            intensity = parseFloat(line.getAttribute('data-self-intensity')) || 0;
+        } else {
+            intensity = parseFloat(line.getAttribute('data-cumulative-intensity')) || 0;
+        }
+
+        const color = intensityToColor(intensity);
+        line.style.background = color;
+    });
+}
+
+// ============================================================================
+// Toggle Controls
+// ============================================================================
+
+function updateToggleUI(toggleId, isOn) {
+    const toggle = document.getElementById(toggleId);
+    if (toggle) {
+        const track = toggle.querySelector('.toggle-track');
+        const labels = toggle.querySelectorAll('.toggle-label');
+        if (isOn) {
+            track.classList.add('on');
+            labels[0].classList.remove('active');
+            labels[1].classList.add('active');
+        } else {
+            track.classList.remove('on');
+            labels[0].classList.add('active');
+            labels[1].classList.remove('active');
+        }
+    }
+}
+
+function toggleColdCode() {
+    coldCodeHidden = !coldCodeHidden;
+    applyHotFilter();
+    updateToggleUI('toggle-cold', coldCodeHidden);
+    buildScrollMarker();
+}
+
+function applyHotFilter() {
+    const lines = document.querySelectorAll('.code-line');
+
+    lines.forEach(line => {
+        const selfSamples = line.querySelector('.line-samples-self')?.textContent.trim();
+        const cumulativeSamples = line.querySelector('.line-samples-cumulative')?.textContent.trim();
+
+        let isCold;
+        if (colorMode === 'self') {
+            isCold = !selfSamples || selfSamples === '';
+        } else {
+            isCold = !cumulativeSamples || cumulativeSamples === '';
+        }
+
+        if (isCold) {
+            line.style.display = coldCodeHidden ? 'none' : 'flex';
+        } else {
+            line.style.display = 'flex';
+        }
+    });
+}
+
+function toggleColorMode() {
+    colorMode = colorMode === 'self' ? 'cumulative' : 'self';
+    applyLineColors();
+
+    updateToggleUI('toggle-color-mode', colorMode === 'cumulative');
+
+    if (coldCodeHidden) {
+        applyHotFilter();
+    }
+
+    buildScrollMarker();
+}
+
+// ============================================================================
+// Initialization
+// ============================================================================
+
+document.addEventListener('DOMContentLoaded', function() {
+    // Restore UI state (theme, etc.)
+    restoreUIState();
+    applyLineColors();
+
+    // Initialize navigation buttons
+    document.querySelectorAll('.nav-btn').forEach(button => {
+        button.addEventListener('click', e => handleNavigationClick(button, e));
+    });
+
+    // Initialize line number permalink handlers
+    document.querySelectorAll('.line-number').forEach(lineNum => {
+        lineNum.style.cursor = 'pointer';
+        lineNum.addEventListener('click', e => {
+            window.location.hash = `line-${e.target.textContent.trim()}`;
+        });
+    });
+
+    // Initialize toggle buttons
+    const toggleColdBtn = document.getElementById('toggle-cold');
+    if (toggleColdBtn) {
+        toggleColdBtn.addEventListener('click', toggleColdCode);
+    }
+
+    const colorModeBtn = document.getElementById('toggle-color-mode');
+    if (colorModeBtn) {
+        colorModeBtn.addEventListener('click', toggleColorMode);
+    }
+
+    // Build scroll marker
+    setTimeout(buildScrollMarker, 200);
+
+    // Setup scroll-to-line behavior
+    setTimeout(scrollToTargetLine, 100);
+});
+
+// Close menu when clicking outside
+document.addEventListener('click', e => {
+    if (currentMenu && !currentMenu.contains(e.target) && !e.target.classList.contains('nav-btn')) {
+        closeMenu();
+    }
+});
+
+// Handle hash changes
+window.addEventListener('hashchange', () => setTimeout(scrollToTargetLine, 50));
+
+// Rebuild scroll marker on resize
+window.addEventListener('resize', buildScrollMarker);
diff --git a/Lib/profiling/sampling/_heatmap_assets/heatmap_index.js b/Lib/profiling/sampling/_heatmap_assets/heatmap_index.js
new file mode 100644
index 00000000000..4ddacca5173
--- /dev/null
+++ b/Lib/profiling/sampling/_heatmap_assets/heatmap_index.js
@@ -0,0 +1,127 @@
+// Tachyon Profiler - Heatmap Index JavaScript
+// Index page specific functionality
+
+// ============================================================================
+// Heatmap Bar Coloring
+// ============================================================================
+
+function applyHeatmapBarColors() {
+    const bars = document.querySelectorAll('.heatmap-bar[data-intensity]');
+    bars.forEach(bar => {
+        const intensity = parseFloat(bar.getAttribute('data-intensity')) || 0;
+        const color = intensityToColor(intensity);
+        bar.style.backgroundColor = color;
+    });
+}
+
+// ============================================================================
+// Theme Support
+// ============================================================================
+
+function toggleTheme() {
+    const html = document.documentElement;
+    const current = html.getAttribute('data-theme') || 'light';
+    const next = current === 'light' ? 'dark' : 'light';
+    html.setAttribute('data-theme', next);
+    localStorage.setItem('heatmap-theme', next);
+
+    // Update theme button icon
+    const btn = document.getElementById('theme-btn');
+    if (btn) {
+        btn.innerHTML = next === 'dark' ? '&#9788;' : '&#9790;';  // sun or moon
+    }
+
+    applyHeatmapBarColors();
+}
+
+function restoreUIState() {
+    // Restore theme
+    const savedTheme = localStorage.getItem('heatmap-theme');
+    if (savedTheme) {
+        document.documentElement.setAttribute('data-theme', savedTheme);
+        const btn = document.getElementById('theme-btn');
+        if (btn) {
+            btn.innerHTML = savedTheme === 'dark' ? '&#9788;' : '&#9790;';
+        }
+    }
+}
+
+// ============================================================================
+// Type Section Toggle (stdlib, project, etc)
+// ============================================================================
+
+function toggleTypeSection(header) {
+    const section = header.parentElement;
+    const content = section.querySelector('.type-content');
+    const icon = header.querySelector('.type-icon');
+
+    if (content.style.display === 'none') {
+        content.style.display = 'block';
+        icon.textContent = '\u25BC';
+    } else {
+        content.style.display = 'none';
+        icon.textContent = '\u25B6';
+    }
+}
+
+// ============================================================================
+// Folder Toggle
+// ============================================================================
+
+function toggleFolder(header) {
+    const folder = header.parentElement;
+    const content = folder.querySelector('.folder-content');
+    const icon = header.querySelector('.folder-icon');
+
+    if (content.style.display === 'none') {
+        content.style.display = 'block';
+        icon.textContent = '\u25BC';
+        folder.classList.remove('collapsed');
+    } else {
+        content.style.display = 'none';
+        icon.textContent = '\u25B6';
+        folder.classList.add('collapsed');
+    }
+}
+
+// ============================================================================
+// Expand/Collapse All
+// ============================================================================
+
+function expandAll() {
+    // Expand all type sections
+    document.querySelectorAll('.type-section').forEach(section => {
+        const content = section.querySelector('.type-content');
+        const icon = section.querySelector('.type-icon');
+        content.style.display = 'block';
+        icon.textContent = '\u25BC';
+    });
+
+    // Expand all folders
+    document.querySelectorAll('.folder-node').forEach(folder => {
+        const content = folder.querySelector('.folder-content');
+        const icon = folder.querySelector('.folder-icon');
+        content.style.display = 'block';
+        icon.textContent = '\u25BC';
+        folder.classList.remove('collapsed');
+    });
+}
+
+function collapseAll() {
+    document.querySelectorAll('.folder-node').forEach(folder => {
+        const content = folder.querySelector('.folder-content');
+        const icon = folder.querySelector('.folder-icon');
+        content.style.display = 'none';
+        icon.textContent = '\u25B6';
+        folder.classList.add('collapsed');
+    });
+}
+
+// ============================================================================
+// Initialization
+// ============================================================================
+
+document.addEventListener('DOMContentLoaded', function() {
+    restoreUIState();
+    applyHeatmapBarColors();
+});
diff --git a/Lib/profiling/sampling/_heatmap_assets/heatmap_index_template.html b/Lib/profiling/sampling/_heatmap_assets/heatmap_index_template.html
new file mode 100644
index 00000000000..b71bd94c661
--- /dev/null
+++ b/Lib/profiling/sampling/_heatmap_assets/heatmap_index_template.html
@@ -0,0 +1,118 @@
+<!doctype html>
+<html lang="en" data-theme="light">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Tachyon Profiler - Heatmap Report</title>
+    <!-- INLINE_CSS -->
+</head>
+<body>
+    <div class="app-layout">
+        <!-- Top Bar -->
+        <header class="top-bar">
+            <div class="brand">
+                <span class="brand-text">Tachyon</span>
+                <span class="brand-divider"></span>
+                <span class="brand-subtitle">Heatmap Report</span>
+            </div>
+            <div class="toolbar">
+                <button
+                    class="toolbar-btn theme-toggle"
+                    onclick="toggleTheme()"
+                    title="Toggle theme"
+                    id="theme-btn"
+                >&#9790;</button>
+            </div>
+        </header>
+
+        <!-- Main Content -->
+        <div class="main-content">
+            <!-- Stats Summary -->
+            <div class="stats-summary">
+                <div class="stat-card">
+                    <div class="stat-icon">&#128196;</div>
+                    <div class="stat-data">
+                        <span class="stat-value"><!-- NUM_FILES --></span>
+                        <span class="stat-label">Files Profiled</span>
+                    </div>
+                    <div class="stat-sparkline"></div>
+                </div>
+                <div class="stat-card">
+                    <div class="stat-icon">&#128202;</div>
+                    <div class="stat-data">
+                        <span class="stat-value"><!-- TOTAL_SAMPLES --></span>
+                        <span class="stat-label">Total Snapshots</span>
+                    </div>
+                    <div class="stat-sparkline"></div>
+                </div>
+                <div class="stat-card">
+                    <div class="stat-icon">&#9201;</div>
+                    <div class="stat-data">
+                        <span class="stat-value"><!-- DURATION --></span>
+                        <span class="stat-label">Duration</span>
+                    </div>
+                    <div class="stat-sparkline"></div>
+                </div>
+                <div class="stat-card">
+                    <div class="stat-icon">&#9889;</div>
+                    <div class="stat-data">
+                        <span class="stat-value"><!-- SAMPLE_RATE --></span>
+                        <span class="stat-label">Samples/sec</span>
+                    </div>
+                    <div class="stat-sparkline"></div>
+                </div>
+                <div class="rate-card">
+                    <div class="rate-header">
+                        <div class="rate-info">
+                            <div class="rate-icon">&#9888;</div>
+                            <span class="rate-label">Error Rate</span>
+                        </div>
+                        <span class="rate-value"><!-- ERROR_RATE --></span>
+                    </div>
+                    <div class="rate-bar">
+                        <div class="rate-fill <!-- ERROR_RATE_CLASS -->" style="width: <!-- ERROR_RATE_WIDTH -->%;"></div>
+                    </div>
+                </div>
+                <div class="rate-card">
+                    <div class="rate-header">
+                        <div class="rate-info">
+                            <div class="rate-icon">&#128165;</div>
+                            <span class="rate-label">Missed Samples</span>
+                        </div>
+                        <span class="rate-value"><!-- MISSED_SAMPLES --></span>
+                    </div>
+                    <div class="rate-bar">
+                        <div class="rate-fill <!-- MISSED_SAMPLES_CLASS -->" style="width: <!-- MISSED_SAMPLES_WIDTH -->%;"></div>
+                    </div>
+                </div>
+            </div>
+
+            <!-- File List Section -->
+            <div class="section-header">
+                <h2 class="section-title">Profiled Files</h2>
+            </div>
+
+            <div class="filter-controls">
+                <button onclick="expandAll()" class="control-btn">Expand All</button>
+                <button onclick="collapseAll()" class="control-btn">Collapse All</button>
+            </div>
+
+            <div class="module-sections">
+<!-- SECTIONS_HTML -->
+            </div>
+        </div>
+
+        <!-- Status Bar -->
+        <footer class="status-bar">
+            <span class="status-item">
+                <span class="status-value">Tachyon Profiler</span>
+            </span>
+            <span class="status-item">
+                <span class="status-label">Python Sampling Profiler</span>
+            </span>
+        </footer>
+    </div>
+
+    <!-- INLINE_JS -->
+</body>
+</html>
diff --git a/Lib/profiling/sampling/_heatmap_assets/heatmap_pyfile_template.html b/Lib/profiling/sampling/_heatmap_assets/heatmap_pyfile_template.html
new file mode 100644
index 00000000000..d8b26adfb02
--- /dev/null
+++ b/Lib/profiling/sampling/_heatmap_assets/heatmap_pyfile_template.html
@@ -0,0 +1,96 @@
+<!doctype html>
+<html lang="en" data-theme="light">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title><!-- FILENAME --> - Heatmap</title>
+    <!-- INLINE_CSS -->
+</head>
+<body class="code-view">
+    <div class="app-layout">
+        <!-- Top Bar (Code Header) -->
+        <header class="top-bar">
+            <div class="brand">
+                <span class="brand-text">Tachyon</span>
+                <span class="brand-divider"></span>
+                <span class="brand-subtitle" style="font-family: var(--font-mono); font-size: 13px;"><!-- FILENAME --></span>
+            </div>
+            <div class="toolbar">
+                <a href="index.html" class="back-link">Back to Index</a>
+                <button
+                    class="toolbar-btn theme-toggle"
+                    onclick="toggleTheme()"
+                    title="Toggle theme"
+                    id="theme-btn"
+                >&#9790;</button>
+            </div>
+        </header>
+
+        <!-- File Stats Bar -->
+        <div class="file-stats">
+            <div class="stats-grid">
+                <div class="stat-item">
+                    <div class="stat-value"><!-- TOTAL_SELF_SAMPLES --></div>
+                    <div class="stat-label">Self Samples</div>
+                </div>
+                <div class="stat-item">
+                    <div class="stat-value"><!-- TOTAL_SAMPLES --></div>
+                    <div class="stat-label">Cumulative</div>
+                </div>
+                <div class="stat-item">
+                    <div class="stat-value"><!-- NUM_LINES --></div>
+                    <div class="stat-label">Lines Hit</div>
+                </div>
+                <div class="stat-item">
+                    <div class="stat-value"><!-- PERCENTAGE -->%</div>
+                    <div class="stat-label">% of Total</div>
+                </div>
+                <div class="stat-item">
+                    <div class="stat-value"><!-- MAX_SELF_SAMPLES --></div>
+                    <div class="stat-label">Max Self</div>
+                </div>
+                <div class="stat-item">
+                    <div class="stat-value"><!-- MAX_SAMPLES --></div>
+                    <div class="stat-label">Max Total</div>
+                </div>
+            </div>
+        </div>
+
+        <!-- Legend -->
+        <div class="legend">
+            <div class="legend-content">
+                <span class="legend-title">Intensity:</span>
+                <div class="legend-gradient"></div>
+                <div class="legend-labels">
+                    <span>Cold</span>
+                    <span>→</span>
+                    <span>Hot</span>
+                </div>
+                <div class="toggle-switch" id="toggle-color-mode" title="Toggle between self time and total time coloring">
+                    <span class="toggle-label active">Self Time</span>
+                    <div class="toggle-track"></div>
+                    <span class="toggle-label">Total Time</span>
+                </div>
+                <div class="toggle-switch" id="toggle-cold" title="Toggle visibility of lines with zero samples">
+                    <span class="toggle-label active">Show All</span>
+                    <div class="toggle-track"></div>
+                    <span class="toggle-label">Hot Only</span>
+                </div>
+            </div>
+        </div>
+
+        <!-- Code Container -->
+        <div class="code-container">
+            <div class="code-header-row">
+                <div class="header-line-number">Line</div>
+                <div class="header-samples-self">Self</div>
+                <div class="header-samples-cumulative">Total</div>
+                <div class="header-content">Code</div>
+            </div>
+<!-- CODE_LINES -->
+        </div>
+    </div>
+
+    <!-- INLINE_JS -->
+</body>
+</html>
diff --git a/Lib/profiling/sampling/_heatmap_assets/heatmap_shared.js b/Lib/profiling/sampling/_heatmap_assets/heatmap_shared.js
new file mode 100644
index 00000000000..f44ebcff4ff
--- /dev/null
+++ b/Lib/profiling/sampling/_heatmap_assets/heatmap_shared.js
@@ -0,0 +1,40 @@
+// Tachyon Profiler - Shared Heatmap JavaScript
+// Common utilities shared between index and file views
+
+// ============================================================================
+// Heat Level Mapping (Single source of truth for intensity thresholds)
+// ============================================================================
+
+// Maps intensity (0-1) to heat level (0-8). Level 0 = no heat, 1-8 = heat levels.
+function intensityToHeatLevel(intensity) {
+    if (intensity <= 0) return 0;
+    if (intensity <= 0.125) return 1;
+    if (intensity <= 0.25) return 2;
+    if (intensity <= 0.375) return 3;
+    if (intensity <= 0.5) return 4;
+    if (intensity <= 0.625) return 5;
+    if (intensity <= 0.75) return 6;
+    if (intensity <= 0.875) return 7;
+    return 8;
+}
+
+// Class names corresponding to heat levels 1-8 (used by scroll marker)
+const HEAT_CLASS_NAMES = ['cold', 'cool', 'mild', 'warm', 'hot', 'very-hot', 'intense', 'extreme'];
+
+function intensityToClass(intensity) {
+    const level = intensityToHeatLevel(intensity);
+    return level === 0 ? null : HEAT_CLASS_NAMES[level - 1];
+}
+
+// ============================================================================
+// Color Mapping (Intensity to Heat Color)
+// ============================================================================
+
+function intensityToColor(intensity) {
+    const level = intensityToHeatLevel(intensity);
+    if (level === 0) {
+        return 'transparent';
+    }
+    const rootStyle = getComputedStyle(document.documentElement);
+    return rootStyle.getPropertyValue(`--heat-${level}`).trim();
+}
diff --git a/Lib/profiling/sampling/_shared_assets/base.css b/Lib/profiling/sampling/_shared_assets/base.css
new file mode 100644
index 00000000000..54e3d78f8eb
--- /dev/null
+++ b/Lib/profiling/sampling/_shared_assets/base.css
@@ -0,0 +1,369 @@
+/* ==========================================================================
+   Python Profiler - Shared CSS Foundation
+   Design system shared between Flamegraph and Heatmap viewers
+   ========================================================================== */
+
+/* --------------------------------------------------------------------------
+   CSS Variables & Theme System
+   -------------------------------------------------------------------------- */
+
+:root {
+  /* Typography */
+  --font-sans: "Source Sans Pro", "Lucida Grande", "Lucida Sans Unicode",
+               "Geneva", "Verdana", sans-serif;
+  --font-mono: 'SF Mono', 'Monaco', 'Consolas', 'Liberation Mono', monospace;
+
+  /* Python brand colors (theme-independent) */
+  --python-blue: #3776ab;
+  --python-blue-light: #4584bb;
+  --python-blue-lighter: #5592cc;
+  --python-gold: #ffd43b;
+  --python-gold-dark: #ffcd02;
+  --python-gold-light: #ffdc5c;
+
+  /* Heat palette - defined per theme below */
+
+  /* Layout */
+  --sidebar-width: 280px;
+  --sidebar-collapsed: 44px;
+  --topbar-height: 56px;
+  --statusbar-height: 32px;
+
+  /* Transitions */
+  --transition-fast: 0.15s ease;
+  --transition-normal: 0.25s ease;
+}
+
+/* Light theme (default) */
+:root, [data-theme="light"] {
+  --bg-primary: #ffffff;
+  --bg-secondary: #f8f9fa;
+  --bg-tertiary: #e9ecef;
+  --border: #e9ecef;
+  --border-subtle: #f0f2f5;
+
+  --text-primary: #2e3338;
+  --text-secondary: #5a6c7d;
+  --text-muted: #6f767e;
+
+  --accent: #3776ab;
+  --accent-hover: #2d5aa0;
+  --accent-glow: rgba(55, 118, 171, 0.15);
+
+  --shadow-sm: 0 1px 3px rgba(0, 0, 0, 0.08);
+  --shadow-md: 0 4px 12px rgba(0, 0, 0, 0.1);
+  --shadow-lg: 0 8px 24px rgba(0, 0, 0, 0.15);
+
+  --header-gradient: linear-gradient(135deg, #3776ab 0%, #4584bb 100%);
+
+  /* Light mode heat palette - blue to yellow to orange to red (cold to hot) */
+  --heat-1: #7ba3d1;
+  --heat-2: #a8d0ef;
+  --heat-3: #d6e9f8;
+  --heat-4: #ffe6a8;
+  --heat-5: #ffd43b;
+  --heat-6: #ffb84d;
+  --heat-7: #ff9966;
+  --heat-8: #ff6347;
+
+  /* Code view specific */
+  --code-bg: #ffffff;
+  --code-bg-line: #f8f9fa;
+  --code-border: #e9ecef;
+  --code-text: #2e3338;
+  --code-text-muted: #8b949e;
+  --code-accent: #3776ab;
+
+  /* Navigation colors */
+  --nav-caller: #2563eb;
+  --nav-caller-hover: #1d4ed8;
+  --nav-callee: #dc2626;
+  --nav-callee-hover: #b91c1c;
+}
+
+/* Dark theme */
+[data-theme="dark"] {
+  --bg-primary: #0d1117;
+  --bg-secondary: #161b22;
+  --bg-tertiary: #21262d;
+  --border: #30363d;
+  --border-subtle: #21262d;
+
+  --text-primary: #e6edf3;
+  --text-secondary: #8b949e;
+  --text-muted: #757e8a;
+
+  --accent: #58a6ff;
+  --accent-hover: #79b8ff;
+  --accent-glow: rgba(88, 166, 255, 0.15);
+
+  --shadow-sm: 0 1px 3px rgba(0, 0, 0, 0.3);
+  --shadow-md: 0 4px 12px rgba(0, 0, 0, 0.4);
+  --shadow-lg: 0 8px 24px rgba(0, 0, 0, 0.5);
+
+  --header-gradient: linear-gradient(135deg, #21262d 0%, #30363d 100%);
+
+  /* Dark mode heat palette - dark blue to teal to yellow to orange (cold to hot) */
+  --heat-1: #4a7ba7;
+  --heat-2: #5a9fa8;
+  --heat-3: #6ab5b5;
+  --heat-4: #7ec488;
+  --heat-5: #a0d878;
+  --heat-6: #c4de6a;
+  --heat-7: #f4d44d;
+  --heat-8: #ff6b35;
+
+  /* Code view specific - dark mode */
+  --code-bg: #0d1117;
+  --code-bg-line: #161b22;
+  --code-border: #30363d;
+  --code-text: #e6edf3;
+  --code-text-muted: #6e7681;
+  --code-accent: #58a6ff;
+
+  /* Navigation colors - dark theme friendly */
+  --nav-caller: #58a6ff;
+  --nav-caller-hover: #4184e4;
+  --nav-callee: #f87171;
+  --nav-callee-hover: #e53e3e;
+}
+
+/* --------------------------------------------------------------------------
+   Base Styles
+   -------------------------------------------------------------------------- */
+
+*, *::before, *::after {
+  box-sizing: border-box;
+}
+
+html, body {
+  margin: 0;
+  padding: 0;
+}
+
+body {
+  font-family: var(--font-sans);
+  font-size: 14px;
+  line-height: 1.6;
+  color: var(--text-primary);
+  background: var(--bg-primary);
+  -webkit-font-smoothing: antialiased;
+  -moz-osx-font-smoothing: grayscale;
+  transition: background var(--transition-normal), color var(--transition-normal);
+}
+
+/* --------------------------------------------------------------------------
+   Layout Structure
+   -------------------------------------------------------------------------- */
+
+.app-layout {
+  display: flex;
+  flex-direction: column;
+}
+
+/* --------------------------------------------------------------------------
+   Top Bar
+   -------------------------------------------------------------------------- */
+
+.top-bar {
+  height: var(--topbar-height);
+  background: var(--header-gradient);
+  display: flex;
+  align-items: center;
+  padding: 0 16px;
+  gap: 16px;
+  flex-shrink: 0;
+  box-shadow: 0 2px 10px rgba(55, 118, 171, 0.25);
+  border-bottom: 2px solid var(--python-gold);
+}
+
+/* Brand / Logo */
+.brand {
+  display: flex;
+  align-items: center;
+  gap: 12px;
+  color: white;
+  text-decoration: none;
+  flex-shrink: 0;
+}
+
+.brand-logo {
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  width: 28px;
+  height: 28px;
+  flex-shrink: 0;
+}
+
+/* Style the inlined SVG/img inside brand-logo */
+.brand-logo svg,
+.brand-logo img {
+  width: 28px;
+  height: 28px;
+  display: block;
+  object-fit: contain;
+  filter: drop-shadow(0 1px 2px rgba(0, 0, 0, 0.2));
+}
+
+.brand-info {
+  display: flex;
+  flex-direction: column;
+  line-height: 1.15;
+}
+
+.brand-text {
+  font-weight: 700;
+  font-size: 16px;
+  letter-spacing: -0.3px;
+  text-shadow: 0 1px 2px rgba(0, 0, 0, 0.15);
+}
+
+.brand-subtitle {
+  font-weight: 500;
+  font-size: 10px;
+  opacity: 0.9;
+  text-transform: uppercase;
+  letter-spacing: 0.5px;
+}
+
+.brand-divider {
+  width: 1px;
+  height: 16px;
+  background: rgba(255, 255, 255, 0.3);
+}
+
+/* Toolbar */
+.toolbar {
+  display: flex;
+  align-items: center;
+  gap: 6px;
+  margin-left: auto;
+}
+
+.toolbar-btn {
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  width: 32px;
+  height: 32px;
+  padding: 0;
+  font-size: 15px;
+  color: white;
+  background: rgba(255, 255, 255, 0.12);
+  border: 1px solid rgba(255, 255, 255, 0.18);
+  border-radius: 6px;
+  cursor: pointer;
+  transition: all var(--transition-fast);
+}
+
+.toolbar-btn:hover {
+  background: rgba(255, 255, 255, 0.22);
+  border-color: rgba(255, 255, 255, 0.35);
+}
+
+.toolbar-btn:active {
+  transform: scale(0.95);
+}
+
+/* --------------------------------------------------------------------------
+   Status Bar
+   -------------------------------------------------------------------------- */
+
+.status-bar {
+  height: var(--statusbar-height);
+  background: var(--bg-secondary);
+  border-top: 1px solid var(--border);
+  display: flex;
+  align-items: center;
+  padding: 0 16px;
+  gap: 16px;
+  font-family: var(--font-mono);
+  font-size: 11px;
+  color: var(--text-secondary);
+  flex-shrink: 0;
+}
+
+.status-item {
+  display: flex;
+  align-items: center;
+  gap: 5px;
+}
+
+.status-item::before {
+  content: '';
+  width: 4px;
+  height: 4px;
+  background: var(--python-gold);
+  border-radius: 50%;
+}
+
+.status-item:first-child::before {
+  display: none;
+}
+
+.status-label {
+  color: var(--text-muted);
+}
+
+.status-value {
+  color: var(--text-primary);
+  font-weight: 500;
+}
+
+.status-value.accent {
+  color: var(--accent);
+  font-weight: 600;
+}
+
+/* --------------------------------------------------------------------------
+   Animations
+   -------------------------------------------------------------------------- */
+
+@keyframes fadeIn {
+  from { opacity: 0; }
+  to { opacity: 1; }
+}
+
+@keyframes slideUp {
+  from {
+    opacity: 0;
+    transform: translateY(12px);
+  }
+  to {
+    opacity: 1;
+    transform: translateY(0);
+  }
+}
+
+@keyframes shimmer {
+  0% { left: -100%; }
+  100% { left: 100%; }
+}
+
+/* --------------------------------------------------------------------------
+   Focus States (Accessibility)
+   -------------------------------------------------------------------------- */
+
+button:focus-visible,
+select:focus-visible,
+input:focus-visible {
+  outline: 2px solid var(--python-gold);
+  outline-offset: 2px;
+}
+
+/* --------------------------------------------------------------------------
+   Shared Responsive
+   -------------------------------------------------------------------------- */
+
+@media (max-width: 900px) {
+  .brand-subtitle {
+    display: none;
+  }
+}
+
+@media (max-width: 600px) {
+  .toolbar-btn:not(.theme-toggle) {
+    display: none;
+  }
+}
diff --git a/Lib/profiling/sampling/cli.py b/Lib/profiling/sampling/cli.py
new file mode 100644
index 00000000000..0a082c0c638
--- /dev/null
+++ b/Lib/profiling/sampling/cli.py
@@ -0,0 +1,757 @@
+"""Command-line interface for the sampling profiler."""
+
+import argparse
+import os
+import socket
+import subprocess
+import sys
+
+from .sample import sample, sample_live
+from .pstats_collector import PstatsCollector
+from .stack_collector import CollapsedStackCollector, FlamegraphCollector
+from .heatmap_collector import HeatmapCollector
+from .gecko_collector import GeckoCollector
+from .constants import (
+    PROFILING_MODE_ALL,
+    PROFILING_MODE_WALL,
+    PROFILING_MODE_CPU,
+    PROFILING_MODE_GIL,
+    SORT_MODE_NSAMPLES,
+    SORT_MODE_TOTTIME,
+    SORT_MODE_CUMTIME,
+    SORT_MODE_SAMPLE_PCT,
+    SORT_MODE_CUMUL_PCT,
+    SORT_MODE_NSAMPLES_CUMUL,
+)
+
+try:
+    from .live_collector import LiveStatsCollector
+except ImportError:
+    LiveStatsCollector = None
+
+
+class CustomFormatter(
+    argparse.ArgumentDefaultsHelpFormatter,
+    argparse.RawDescriptionHelpFormatter,
+):
+    """Custom formatter that combines default values display with raw description formatting."""
+    pass
+
+
+_HELP_DESCRIPTION = """Sample a process's stack frames and generate profiling data.
+
+Commands:
+  run        Run and profile a script or module
+  attach     Attach to and profile a running process
+
+Examples:
+  # Run and profile a script
+  python -m profiling.sampling run script.py arg1 arg2
+
+  # Attach to a running process
+  python -m profiling.sampling attach 1234
+
+  # Live interactive mode for a script
+  python -m profiling.sampling run --live script.py
+
+  # Live interactive mode for a running process
+  python -m profiling.sampling attach --live 1234
+
+Use 'python -m profiling.sampling <command> --help' for command-specific help."""
+
+
+# Constants for socket synchronization
+_SYNC_TIMEOUT = 5.0
+_PROCESS_KILL_TIMEOUT = 2.0
+_READY_MESSAGE = b"ready"
+_RECV_BUFFER_SIZE = 1024
+
+# Format configuration
+FORMAT_EXTENSIONS = {
+    "pstats": "pstats",
+    "collapsed": "txt",
+    "flamegraph": "html",
+    "gecko": "json",
+    "heatmap": "html",
+}
+
+COLLECTOR_MAP = {
+    "pstats": PstatsCollector,
+    "collapsed": CollapsedStackCollector,
+    "flamegraph": FlamegraphCollector,
+    "gecko": GeckoCollector,
+    "heatmap": HeatmapCollector,
+}
+
+
+def _parse_mode(mode_string):
+    """Convert mode string to mode constant."""
+    mode_map = {
+        "wall": PROFILING_MODE_WALL,
+        "cpu": PROFILING_MODE_CPU,
+        "gil": PROFILING_MODE_GIL,
+    }
+    return mode_map[mode_string]
+
+
+def _run_with_sync(original_cmd, suppress_output=False):
+    """Run a command with socket-based synchronization and return the process."""
+    # Create a TCP socket for synchronization with better socket options
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sync_sock:
+        # Set SO_REUSEADDR to avoid "Address already in use" errors
+        sync_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+        sync_sock.bind(("127.0.0.1", 0))  # Let OS choose a free port
+        sync_port = sync_sock.getsockname()[1]
+        sync_sock.listen(1)
+        sync_sock.settimeout(_SYNC_TIMEOUT)
+
+        # Get current working directory to preserve it
+        cwd = os.getcwd()
+
+        # Build command using the sync coordinator
+        target_args = original_cmd[1:]  # Remove python executable
+        cmd = (
+            sys.executable,
+            "-m",
+            "profiling.sampling._sync_coordinator",
+            str(sync_port),
+            cwd,
+        ) + tuple(target_args)
+
+        # Start the process with coordinator
+        # Suppress stdout/stderr if requested (for live mode)
+        popen_kwargs = {}
+        if suppress_output:
+            popen_kwargs["stdin"] = subprocess.DEVNULL
+            popen_kwargs["stdout"] = subprocess.DEVNULL
+            popen_kwargs["stderr"] = subprocess.DEVNULL
+
+        process = subprocess.Popen(cmd, **popen_kwargs)
+
+        try:
+            # Wait for ready signal with timeout
+            with sync_sock.accept()[0] as conn:
+                ready_signal = conn.recv(_RECV_BUFFER_SIZE)
+
+                if ready_signal != _READY_MESSAGE:
+                    raise RuntimeError(
+                        f"Invalid ready signal received: {ready_signal!r}"
+                    )
+
+        except socket.timeout:
+            # If we timeout, kill the process and raise an error
+            if process.poll() is None:
+                process.terminate()
+                try:
+                    process.wait(timeout=_PROCESS_KILL_TIMEOUT)
+                except subprocess.TimeoutExpired:
+                    process.kill()
+                    process.wait()
+            raise RuntimeError(
+                "Process failed to signal readiness within timeout"
+            )
+
+        return process
+
+
+def _add_sampling_options(parser):
+    """Add sampling configuration options to a parser."""
+    sampling_group = parser.add_argument_group("Sampling configuration")
+    sampling_group.add_argument(
+        "-i",
+        "--interval",
+        type=int,
+        default=100,
+        metavar="MICROSECONDS",
+        help="sampling interval",
+    )
+    sampling_group.add_argument(
+        "-d",
+        "--duration",
+        type=int,
+        default=10,
+        metavar="SECONDS",
+        help="Sampling duration",
+    )
+    sampling_group.add_argument(
+        "-a",
+        "--all-threads",
+        action="store_true",
+        help="Sample all threads in the process instead of just the main thread",
+    )
+    sampling_group.add_argument(
+        "--realtime-stats",
+        action="store_true",
+        help="Print real-time sampling statistics (Hz, mean, min, max) during profiling",
+    )
+    sampling_group.add_argument(
+        "--native",
+        action="store_true",
+        help='Include artificial "<native>" frames to denote calls to non-Python code',
+    )
+    sampling_group.add_argument(
+        "--no-gc",
+        action="store_false",
+        dest="gc",
+        help='Don\'t include artificial "<GC>" frames to denote active garbage collection',
+    )
+    sampling_group.add_argument(
+        "--async-aware",
+        action="store_true",
+        help="Enable async-aware profiling (uses task-based stack reconstruction)",
+    )
+
+
+def _add_mode_options(parser):
+    """Add mode options to a parser."""
+    mode_group = parser.add_argument_group("Mode options")
+    mode_group.add_argument(
+        "--mode",
+        choices=["wall", "cpu", "gil"],
+        default="wall",
+        help="Sampling mode: wall (all samples), cpu (only samples when thread is on CPU), "
+        "gil (only samples when thread holds the GIL). Incompatible with --async-aware",
+    )
+    mode_group.add_argument(
+        "--async-mode",
+        choices=["running", "all"],
+        default="running",
+        help='Async profiling mode: "running" (only running task) '
+        'or "all" (all tasks including waiting). Requires --async-aware',
+    )
+
+
+def _add_format_options(parser):
+    """Add output format options to a parser."""
+    output_group = parser.add_argument_group("Output options")
+    format_group = output_group.add_mutually_exclusive_group()
+    format_group.add_argument(
+        "--pstats",
+        action="store_const",
+        const="pstats",
+        dest="format",
+        help="Generate pstats output (default)",
+    )
+    format_group.add_argument(
+        "--collapsed",
+        action="store_const",
+        const="collapsed",
+        dest="format",
+        help="Generate collapsed stack traces for flamegraphs",
+    )
+    format_group.add_argument(
+        "--flamegraph",
+        action="store_const",
+        const="flamegraph",
+        dest="format",
+        help="Generate interactive HTML flamegraph visualization",
+    )
+    format_group.add_argument(
+        "--gecko",
+        action="store_const",
+        const="gecko",
+        dest="format",
+        help="Generate Gecko format for Firefox Profiler",
+    )
+    format_group.add_argument(
+        "--heatmap",
+        action="store_const",
+        const="heatmap",
+        dest="format",
+        help="Generate interactive HTML heatmap visualization with line-level sample counts",
+    )
+    parser.set_defaults(format="pstats")
+
+    output_group.add_argument(
+        "-o",
+        "--output",
+        dest="outfile",
+        help="Output path (default: stdout for pstats, auto-generated for others). "
+        "For heatmap: directory name (default: heatmap_PID)",
+    )
+
+
+def _add_pstats_options(parser):
+    """Add pstats-specific display options to a parser."""
+    pstats_group = parser.add_argument_group("pstats format options")
+    pstats_group.add_argument(
+        "--sort",
+        choices=[
+            "nsamples",
+            "tottime",
+            "cumtime",
+            "sample-pct",
+            "cumul-pct",
+            "nsamples-cumul",
+            "name",
+        ],
+        default=None,
+        help="Sort order for pstats output (default: nsamples)",
+    )
+    pstats_group.add_argument(
+        "-l",
+        "--limit",
+        type=int,
+        default=None,
+        help="Limit the number of rows in the output (default: 15)",
+    )
+    pstats_group.add_argument(
+        "--no-summary",
+        action="store_true",
+        help="Disable the summary section in the pstats output",
+    )
+
+
+def _sort_to_mode(sort_choice):
+    """Convert sort choice string to SORT_MODE constant."""
+    sort_map = {
+        "nsamples": SORT_MODE_NSAMPLES,
+        "tottime": SORT_MODE_TOTTIME,
+        "cumtime": SORT_MODE_CUMTIME,
+        "sample-pct": SORT_MODE_SAMPLE_PCT,
+        "cumul-pct": SORT_MODE_CUMUL_PCT,
+        "nsamples-cumul": SORT_MODE_NSAMPLES_CUMUL,
+        "name": -1,
+    }
+    return sort_map.get(sort_choice, SORT_MODE_NSAMPLES)
+
+
+def _create_collector(format_type, interval, skip_idle):
+    """Create the appropriate collector based on format type.
+
+    Args:
+        format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko')
+        interval: Sampling interval in microseconds
+        skip_idle: Whether to skip idle samples
+
+    Returns:
+        A collector instance of the appropriate type
+    """
+    collector_class = COLLECTOR_MAP.get(format_type)
+    if collector_class is None:
+        raise ValueError(f"Unknown format: {format_type}")
+
+    # Gecko format never skips idle (it needs both GIL and CPU data)
+    if format_type == "gecko":
+        skip_idle = False
+
+    return collector_class(interval, skip_idle=skip_idle)
+
+
+def _generate_output_filename(format_type, pid):
+    """Generate output filename based on format and PID.
+
+    Args:
+        format_type: The output format
+        pid: Process ID
+
+    Returns:
+        Generated filename
+    """
+    extension = FORMAT_EXTENSIONS.get(format_type, "txt")
+    # For heatmap, use cleaner directory name without extension
+    if format_type == "heatmap":
+        return f"heatmap_{pid}"
+    return f"{format_type}.{pid}.{extension}"
+
+
+def _handle_output(collector, args, pid, mode):
+    """Handle output for the collector based on format and arguments.
+
+    Args:
+        collector: The collector instance with profiling data
+        args: Parsed command-line arguments
+        pid: Process ID (for generating filenames)
+        mode: Profiling mode used
+    """
+    if args.format == "pstats":
+        if args.outfile:
+            collector.export(args.outfile)
+        else:
+            # Print to stdout with defaults applied
+            sort_choice = args.sort if args.sort is not None else "nsamples"
+            limit = args.limit if args.limit is not None else 15
+            sort_mode = _sort_to_mode(sort_choice)
+            collector.print_stats(
+                sort_mode, limit, not args.no_summary, mode
+            )
+    else:
+        # Export to file
+        filename = args.outfile or _generate_output_filename(args.format, pid)
+        collector.export(filename)
+
+
+def _validate_args(args, parser):
+    """Validate format-specific options and live mode requirements.
+
+    Args:
+        args: Parsed command-line arguments
+        parser: ArgumentParser instance for error reporting
+    """
+    # Check if live mode is available
+    if hasattr(args, 'live') and args.live and LiveStatsCollector is None:
+        parser.error(
+            "Live mode requires the curses module, which is not available."
+        )
+
+    # Async-aware mode is incompatible with --native, --no-gc, --mode, and --all-threads
+    if args.async_aware:
+        issues = []
+        if args.native:
+            issues.append("--native")
+        if not args.gc:
+            issues.append("--no-gc")
+        if hasattr(args, 'mode') and args.mode != "wall":
+            issues.append(f"--mode={args.mode}")
+        if hasattr(args, 'all_threads') and args.all_threads:
+            issues.append("--all-threads")
+        if issues:
+            parser.error(
+                f"Options {', '.join(issues)} are incompatible with --async-aware. "
+                "Async-aware profiling uses task-based stack reconstruction."
+            )
+
+    # --async-mode requires --async-aware
+    if hasattr(args, 'async_mode') and args.async_mode != "running" and not args.async_aware:
+        parser.error("--async-mode requires --async-aware to be enabled.")
+
+    # Live mode is incompatible with format options
+    if hasattr(args, 'live') and args.live:
+        if args.format != "pstats":
+            format_flag = f"--{args.format}"
+            parser.error(
+                f"--live is incompatible with {format_flag}. Live mode uses a TUI interface."
+            )
+
+        # Live mode is also incompatible with pstats-specific options
+        issues = []
+        if args.sort is not None:
+            issues.append("--sort")
+        if args.limit is not None:
+            issues.append("--limit")
+        if args.no_summary:
+            issues.append("--no-summary")
+
+        if issues:
+            parser.error(
+                f"Options {', '.join(issues)} are incompatible with --live. "
+                "Live mode uses a TUI interface with its own controls."
+            )
+        return
+
+    # Validate gecko mode doesn't use non-wall mode
+    if args.format == "gecko" and args.mode != "wall":
+        parser.error(
+            "--mode option is incompatible with --gecko. "
+            "Gecko format automatically includes both GIL-holding and CPU status analysis."
+        )
+
+    # Validate pstats-specific options are only used with pstats format
+    if args.format != "pstats":
+        issues = []
+        if args.sort is not None:
+            issues.append("--sort")
+        if args.limit is not None:
+            issues.append("--limit")
+        if args.no_summary:
+            issues.append("--no-summary")
+
+        if issues:
+            format_flag = f"--{args.format}"
+            parser.error(
+                f"Options {', '.join(issues)} are only valid with --pstats, not {format_flag}"
+            )
+
+
+def main():
+    """Main entry point for the CLI."""
+    # Create the main parser
+    parser = argparse.ArgumentParser(
+        description=_HELP_DESCRIPTION,
+        formatter_class=CustomFormatter,
+    )
+
+    # Create subparsers for commands
+    subparsers = parser.add_subparsers(
+        dest="command", required=True, help="Command to run"
+    )
+
+    # === RUN COMMAND ===
+    run_parser = subparsers.add_parser(
+        "run",
+        help="Run and profile a script or module",
+        formatter_class=CustomFormatter,
+        description="""Run and profile a Python script or module
+
+Examples:
+  # Run and profile a module
+  python -m profiling.sampling run -m mymodule arg1 arg2
+
+  # Generate flamegraph from a script
+  python -m profiling.sampling run --flamegraph -o output.html script.py
+
+  # Profile with custom interval and duration
+  python -m profiling.sampling run -i 50 -d 30 script.py
+
+  # Save collapsed stacks to file
+  python -m profiling.sampling run --collapsed -o stacks.txt script.py
+
+  # Live interactive mode for a script
+  python -m profiling.sampling run --live script.py""",
+    )
+    run_parser.add_argument(
+        "-m",
+        "--module",
+        action="store_true",
+        help="Run target as a module (like python -m)",
+    )
+    run_parser.add_argument(
+        "target",
+        help="Script file or module name to profile",
+    )
+    run_parser.add_argument(
+        "args",
+        nargs=argparse.REMAINDER,
+        help="Arguments to pass to the script or module",
+    )
+    run_parser.add_argument(
+        "--live",
+        action="store_true",
+        help="Interactive TUI profiler (top-like interface, press 'q' to quit, 's' to cycle sort)",
+    )
+    _add_sampling_options(run_parser)
+    _add_mode_options(run_parser)
+    _add_format_options(run_parser)
+    _add_pstats_options(run_parser)
+
+    # === ATTACH COMMAND ===
+    attach_parser = subparsers.add_parser(
+        "attach",
+        help="Attach to and profile a running process",
+        formatter_class=CustomFormatter,
+        description="""Attach to a running process and profile it
+
+Examples:
+  # Profile all threads, sort by total time
+  python -m profiling.sampling attach -a --sort tottime 1234
+
+  # Live interactive mode for a running process
+  python -m profiling.sampling attach --live 1234""",
+    )
+    attach_parser.add_argument(
+        "pid",
+        type=int,
+        help="Process ID to attach to",
+    )
+    attach_parser.add_argument(
+        "--live",
+        action="store_true",
+        help="Interactive TUI profiler (top-like interface, press 'q' to quit, 's' to cycle sort)",
+    )
+    _add_sampling_options(attach_parser)
+    _add_mode_options(attach_parser)
+    _add_format_options(attach_parser)
+    _add_pstats_options(attach_parser)
+
+    # Parse arguments
+    args = parser.parse_args()
+
+    # Validate arguments
+    _validate_args(args, parser)
+
+    # Command dispatch table
+    command_handlers = {
+        "run": _handle_run,
+        "attach": _handle_attach,
+    }
+
+    # Execute the appropriate command
+    handler = command_handlers.get(args.command)
+    if handler:
+        handler(args)
+    else:
+        parser.error(f"Unknown command: {args.command}")
+
+
+def _handle_attach(args):
+    """Handle the 'attach' command."""
+    # Check if live mode is requested
+    if args.live:
+        _handle_live_attach(args, args.pid)
+        return
+
+    # Use PROFILING_MODE_ALL for gecko format
+    mode = (
+        PROFILING_MODE_ALL
+        if args.format == "gecko"
+        else _parse_mode(args.mode)
+    )
+
+    # Determine skip_idle based on mode
+    skip_idle = (
+        mode != PROFILING_MODE_WALL if mode != PROFILING_MODE_ALL else False
+    )
+
+    # Create the appropriate collector
+    collector = _create_collector(args.format, args.interval, skip_idle)
+
+    # Sample the process
+    collector = sample(
+        args.pid,
+        collector,
+        duration_sec=args.duration,
+        all_threads=args.all_threads,
+        realtime_stats=args.realtime_stats,
+        mode=mode,
+        async_aware=args.async_mode if args.async_aware else None,
+        native=args.native,
+        gc=args.gc,
+    )
+
+    # Handle output
+    _handle_output(collector, args, args.pid, mode)
+
+
+def _handle_run(args):
+    """Handle the 'run' command."""
+    # Check if live mode is requested
+    if args.live:
+        _handle_live_run(args)
+        return
+
+    # Build the command to run
+    if args.module:
+        cmd = (sys.executable, "-m", args.target, *args.args)
+    else:
+        cmd = (sys.executable, args.target, *args.args)
+
+    # Run with synchronization
+    process = _run_with_sync(cmd, suppress_output=False)
+
+    # Use PROFILING_MODE_ALL for gecko format
+    mode = (
+        PROFILING_MODE_ALL
+        if args.format == "gecko"
+        else _parse_mode(args.mode)
+    )
+
+    # Determine skip_idle based on mode
+    skip_idle = (
+        mode != PROFILING_MODE_WALL if mode != PROFILING_MODE_ALL else False
+    )
+
+    # Create the appropriate collector
+    collector = _create_collector(args.format, args.interval, skip_idle)
+
+    # Profile the subprocess
+    try:
+        collector = sample(
+            process.pid,
+            collector,
+            duration_sec=args.duration,
+            all_threads=args.all_threads,
+            realtime_stats=args.realtime_stats,
+            mode=mode,
+            async_aware=args.async_mode if args.async_aware else None,
+            native=args.native,
+            gc=args.gc,
+        )
+
+        # Handle output
+        _handle_output(collector, args, process.pid, mode)
+    finally:
+        # Clean up the subprocess
+        if process.poll() is None:
+            process.terminate()
+            try:
+                process.wait(timeout=_PROCESS_KILL_TIMEOUT)
+            except subprocess.TimeoutExpired:
+                process.kill()
+                process.wait()
+
+
+def _handle_live_attach(args, pid):
+    """Handle live mode for an existing process."""
+    mode = _parse_mode(args.mode)
+
+    # Determine skip_idle based on mode
+    skip_idle = mode != PROFILING_MODE_WALL
+
+    # Create live collector with default settings
+    collector = LiveStatsCollector(
+        args.interval,
+        skip_idle=skip_idle,
+        sort_by="tottime",  # Default initial sort
+        limit=20,  # Default limit
+        pid=pid,
+        mode=mode,
+        async_aware=args.async_mode if args.async_aware else None,
+    )
+
+    # Sample in live mode
+    sample_live(
+        pid,
+        collector,
+        duration_sec=args.duration,
+        all_threads=args.all_threads,
+        realtime_stats=args.realtime_stats,
+        mode=mode,
+        async_aware=args.async_mode if args.async_aware else None,
+        native=args.native,
+        gc=args.gc,
+    )
+
+
+def _handle_live_run(args):
+    """Handle live mode for running a script/module."""
+    # Build the command to run
+    if args.module:
+        cmd = (sys.executable, "-m", args.target, *args.args)
+    else:
+        cmd = (sys.executable, args.target, *args.args)
+
+    # Run with synchronization, suppressing output for live mode
+    process = _run_with_sync(cmd, suppress_output=True)
+
+    mode = _parse_mode(args.mode)
+
+    # Determine skip_idle based on mode
+    skip_idle = mode != PROFILING_MODE_WALL
+
+    # Create live collector with default settings
+    collector = LiveStatsCollector(
+        args.interval,
+        skip_idle=skip_idle,
+        sort_by="tottime",  # Default initial sort
+        limit=20,  # Default limit
+        pid=process.pid,
+        mode=mode,
+        async_aware=args.async_mode if args.async_aware else None,
+    )
+
+    # Profile the subprocess in live mode
+    try:
+        sample_live(
+            process.pid,
+            collector,
+            duration_sec=args.duration,
+            all_threads=args.all_threads,
+            realtime_stats=args.realtime_stats,
+            mode=mode,
+            async_aware=args.async_mode if args.async_aware else None,
+            native=args.native,
+            gc=args.gc,
+        )
+    finally:
+        # Clean up the subprocess
+        if process.poll() is None:
+            process.terminate()
+            try:
+                process.wait(timeout=_PROCESS_KILL_TIMEOUT)
+            except subprocess.TimeoutExpired:
+                process.kill()
+                process.wait()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Lib/profiling/sampling/collector.py b/Lib/profiling/sampling/collector.py
index 3c2325ef772..f63ea0afd8a 100644
--- a/Lib/profiling/sampling/collector.py
+++ b/Lib/profiling/sampling/collector.py
@@ -1,26 +1,30 @@
 from abc import ABC, abstractmethod
+from .constants import (
+    THREAD_STATUS_HAS_GIL,
+    THREAD_STATUS_ON_CPU,
+    THREAD_STATUS_GIL_REQUESTED,
+    THREAD_STATUS_UNKNOWN,
+)
 
-# Thread status flags
 try:
-    from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED
+    from _remote_debugging import FrameInfo
 except ImportError:
-    # Fallback for tests or when module is not available
-    THREAD_STATUS_HAS_GIL = (1 << 0)
-    THREAD_STATUS_ON_CPU = (1 << 1)
-    THREAD_STATUS_UNKNOWN = (1 << 2)
-    THREAD_STATUS_GIL_REQUESTED = (1 << 3)
+    # Fallback definition if _remote_debugging is not available
+    FrameInfo = None
 
 class Collector(ABC):
     @abstractmethod
     def collect(self, stack_frames):
         """Collect profiling data from stack frames."""
 
+    def collect_failed_sample(self):
+        """Collect data about a failed sample attempt."""
+
     @abstractmethod
     def export(self, filename):
         """Export collected data to a file."""
 
     def _iter_all_frames(self, stack_frames, skip_idle=False):
-        """Iterate over all frame stacks from all interpreters and threads."""
         for interpreter_info in stack_frames:
             for thread_info in interpreter_info.threads:
                 # skip_idle now means: skip if thread is not actively running
@@ -34,3 +38,172 @@ def _iter_all_frames(self, stack_frames, skip_idle=False):
                 frames = thread_info.frame_info
                 if frames:
                     yield frames, thread_info.thread_id
+
+    def _iter_async_frames(self, awaited_info_list):
+        # Phase 1: Index tasks and build parent relationships with pre-computed selection
+        task_map, child_to_parent, all_task_ids, all_parent_ids = self._build_task_graph(awaited_info_list)
+
+        # Phase 2: Find leaf tasks (tasks not awaited by anyone)
+        leaf_task_ids = self._find_leaf_tasks(all_task_ids, all_parent_ids)
+
+        # Phase 3: Build linear stacks from each leaf to root (optimized - no sorting!)
+        yield from self._build_linear_stacks(leaf_task_ids, task_map, child_to_parent)
+
+    def _build_task_graph(self, awaited_info_list):
+        task_map = {}
+        child_to_parent = {}  # Maps child_id -> (selected_parent_id, parent_count)
+        all_task_ids = set()
+        all_parent_ids = set()  # Track ALL parent IDs for leaf detection
+
+        for awaited_info in awaited_info_list:
+            thread_id = awaited_info.thread_id
+            for task_info in awaited_info.awaited_by:
+                task_id = task_info.task_id
+                task_map[task_id] = (task_info, thread_id)
+                all_task_ids.add(task_id)
+
+                # Pre-compute selected parent and count for optimization
+                if task_info.awaited_by:
+                    parent_ids = [p.task_name for p in task_info.awaited_by]
+                    parent_count = len(parent_ids)
+                    # Track ALL parents for leaf detection
+                    all_parent_ids.update(parent_ids)
+                    # Use min() for O(n) instead of sorted()[0] which is O(n log n)
+                    selected_parent = min(parent_ids) if parent_count > 1 else parent_ids[0]
+                    child_to_parent[task_id] = (selected_parent, parent_count)
+
+        return task_map, child_to_parent, all_task_ids, all_parent_ids
+
+    def _find_leaf_tasks(self, all_task_ids, all_parent_ids):
+        # Leaves are tasks that are not parents of any other task
+        return all_task_ids - all_parent_ids
+
+    def _build_linear_stacks(self, leaf_task_ids, task_map, child_to_parent):
+        for leaf_id in leaf_task_ids:
+            frames = []
+            visited = set()
+            current_id = leaf_id
+            thread_id = None
+
+            # Follow the single parent chain from leaf to root
+            while current_id is not None:
+                # Cycle detection
+                if current_id in visited:
+                    break
+                visited.add(current_id)
+
+                # Check if task exists in task_map
+                if current_id not in task_map:
+                    break
+
+                task_info, tid = task_map[current_id]
+
+                # Set thread_id from first task
+                if thread_id is None:
+                    thread_id = tid
+
+                # Add all frames from all coroutines in this task
+                if task_info.coroutine_stack:
+                    for coro_info in task_info.coroutine_stack:
+                        for frame in coro_info.call_stack:
+                            frames.append(frame)
+
+                # Get pre-computed parent info (no sorting needed!)
+                parent_info = child_to_parent.get(current_id)
+
+                # Add task boundary marker with parent count annotation if multiple parents
+                task_name = task_info.task_name or "Task-" + str(task_info.task_id)
+                if parent_info:
+                    selected_parent, parent_count = parent_info
+                    if parent_count > 1:
+                        task_name = f"{task_name} ({parent_count} parents)"
+                    frames.append(FrameInfo(("<task>", 0, task_name)))
+                    current_id = selected_parent
+                else:
+                    # Root task - no parent
+                    frames.append(FrameInfo(("<task>", 0, task_name)))
+                    current_id = None
+
+            # Yield the complete stack if we collected any frames
+            if frames and thread_id is not None:
+                yield frames, thread_id, leaf_id
+
+    def _is_gc_frame(self, frame):
+        if isinstance(frame, tuple):
+            funcname = frame[2] if len(frame) >= 3 else ""
+        else:
+            funcname = getattr(frame, "funcname", "")
+
+        return "<GC>" in funcname or "gc_collect" in funcname
+
+    def _collect_thread_status_stats(self, stack_frames):
+        """Collect aggregate and per-thread status statistics from a sample.
+
+        Returns:
+            tuple: (aggregate_status_counts, has_gc_frame, per_thread_stats)
+                - aggregate_status_counts: dict with has_gil, on_cpu, etc.
+                - has_gc_frame: bool indicating if any thread has GC frames
+                - per_thread_stats: dict mapping thread_id to per-thread counts
+        """
+        status_counts = {
+            "has_gil": 0,
+            "on_cpu": 0,
+            "gil_requested": 0,
+            "unknown": 0,
+            "total": 0,
+        }
+        has_gc_frame = False
+        per_thread_stats = {}
+
+        for interpreter_info in stack_frames:
+            threads = getattr(interpreter_info, "threads", [])
+            for thread_info in threads:
+                status_counts["total"] += 1
+
+                # Track thread status using bit flags
+                status_flags = getattr(thread_info, "status", 0)
+
+                if status_flags & THREAD_STATUS_HAS_GIL:
+                    status_counts["has_gil"] += 1
+                if status_flags & THREAD_STATUS_ON_CPU:
+                    status_counts["on_cpu"] += 1
+                if status_flags & THREAD_STATUS_GIL_REQUESTED:
+                    status_counts["gil_requested"] += 1
+                if status_flags & THREAD_STATUS_UNKNOWN:
+                    status_counts["unknown"] += 1
+
+                # Track per-thread statistics
+                thread_id = getattr(thread_info, "thread_id", None)
+                if thread_id is not None:
+                    if thread_id not in per_thread_stats:
+                        per_thread_stats[thread_id] = {
+                            "has_gil": 0,
+                            "on_cpu": 0,
+                            "gil_requested": 0,
+                            "unknown": 0,
+                            "total": 0,
+                            "gc_samples": 0,
+                        }
+
+                    thread_stats = per_thread_stats[thread_id]
+                    thread_stats["total"] += 1
+
+                    if status_flags & THREAD_STATUS_HAS_GIL:
+                        thread_stats["has_gil"] += 1
+                    if status_flags & THREAD_STATUS_ON_CPU:
+                        thread_stats["on_cpu"] += 1
+                    if status_flags & THREAD_STATUS_GIL_REQUESTED:
+                        thread_stats["gil_requested"] += 1
+                    if status_flags & THREAD_STATUS_UNKNOWN:
+                        thread_stats["unknown"] += 1
+
+                    # Check for GC frames in this thread
+                    frames = getattr(thread_info, "frame_info", None)
+                    if frames:
+                        for frame in frames:
+                            if self._is_gc_frame(frame):
+                                thread_stats["gc_samples"] += 1
+                                has_gc_frame = True
+                                break
+
+        return status_counts, has_gc_frame, per_thread_stats
diff --git a/Lib/profiling/sampling/constants.py b/Lib/profiling/sampling/constants.py
new file mode 100644
index 00000000000..be2ae60a88f
--- /dev/null
+++ b/Lib/profiling/sampling/constants.py
@@ -0,0 +1,30 @@
+"""Constants for the sampling profiler."""
+
+# Profiling mode constants
+PROFILING_MODE_WALL = 0
+PROFILING_MODE_CPU = 1
+PROFILING_MODE_GIL = 2
+PROFILING_MODE_ALL = 3  # Combines GIL + CPU checks
+
+# Sort mode constants
+SORT_MODE_NSAMPLES = 0
+SORT_MODE_TOTTIME = 1
+SORT_MODE_CUMTIME = 2
+SORT_MODE_SAMPLE_PCT = 3
+SORT_MODE_CUMUL_PCT = 4
+SORT_MODE_NSAMPLES_CUMUL = 5
+
+# Thread status flags
+try:
+    from _remote_debugging import (
+        THREAD_STATUS_HAS_GIL,
+        THREAD_STATUS_ON_CPU,
+        THREAD_STATUS_UNKNOWN,
+        THREAD_STATUS_GIL_REQUESTED,
+    )
+except ImportError:
+    # Fallback for tests or when module is not available
+    THREAD_STATUS_HAS_GIL = (1 << 0)
+    THREAD_STATUS_ON_CPU = (1 << 1)
+    THREAD_STATUS_UNKNOWN = (1 << 2)
+    THREAD_STATUS_GIL_REQUESTED = (1 << 3)
diff --git a/Lib/profiling/sampling/flamegraph.css b/Lib/profiling/sampling/flamegraph.css
deleted file mode 100644
index 67754ca609a..00000000000
--- a/Lib/profiling/sampling/flamegraph.css
+++ /dev/null
@@ -1,488 +0,0 @@
-body {
-  font-family:
-    "Source Sans Pro", "Lucida Grande", "Lucida Sans Unicode", "Geneva",
-    "Verdana", sans-serif;
-  margin: 0;
-  padding: 0;
-  background: #ffffff;
-  color: #2e3338;
-  line-height: 1.6;
-}
-
-.header {
-  background: linear-gradient(135deg, #3776ab 0%, #4584bb 100%);
-  color: white;
-  padding: 32px 0;
-  box-shadow: 0 2px 10px rgba(55, 118, 171, 0.2);
-}
-
-.header-content {
-  max-width: 1200px;
-  margin: 0 auto;
-  padding: 0 24px;
-  display: flex;
-  flex-direction: column;
-  align-items: center;
-  justify-content: center;
-  text-align: center;
-  gap: 20px;
-}
-
-.python-logo {
-  width: auto;
-  height: 72px;
-  margin-bottom: 12px; /* tighter spacing to avoid visual gap */
-  flex-shrink: 0;
-  display: flex;
-  align-items: center;
-  justify-content: center;
-}
-
-.python-logo img {
-  height: 72px;
-  width: auto;
-  display: block; /* avoid baseline alignment issues */
-  vertical-align: middle;
-  /* subtle shadow that does not affect layout */
-  filter: drop-shadow(0 2px 6px rgba(0, 0, 0, 0.1));
-}
-
-.header-text h1 {
-  margin: 0;
-  font-size: 2.5em;
-  font-weight: 600;
-  color: white;
-  text-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
-}
-
-.header-text .subtitle {
-  margin: 8px 0 0 0;
-  font-size: 1.1em;
-  color: rgba(255, 255, 255, 0.9);
-  font-weight: 300;
-}
-
-.header-search {
-  width: 100%;
-  max-width: 500px;
-}
-
-.header-search #search-input {
-  width: 100%;
-  padding: 12px 20px;
-  border: 2px solid rgba(255, 255, 255, 0.2);
-  border-radius: 25px;
-  font-size: 16px;
-  font-family: inherit;
-  background: rgba(255, 255, 255, 0.95);
-  color: #2e3338;
-  transition: all 0.3s ease;
-  box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
-  backdrop-filter: blur(10px);
-}
-
-.header-search #search-input:focus {
-  outline: none;
-  border-color: rgba(255, 255, 255, 0.8);
-  background: rgba(255, 255, 255, 1);
-  box-shadow: 0 6px 20px rgba(0, 0, 0, 0.15);
-  transform: translateY(-2px);
-}
-
-.header-search #search-input::placeholder {
-  color: #6c757d;
-}
-
-.stats-section {
-  background: #ffffff;
-  padding: 24px 0;
-  border-bottom: 1px solid #e9ecef;
-}
-
-.stats-container {
-  max-width: 1200px;
-  margin: 0 auto;
-  padding: 0 24px;
-  display: grid;
-  grid-template-columns: repeat(3, 1fr);
-  gap: 20px;
-}
-
-.stat-card {
-  background: #ffffff;
-  border: 1px solid #e9ecef;
-  border-radius: 12px;
-  padding: 20px;
-  display: flex;
-  align-items: flex-start;
-  gap: 16px;
-  box-shadow: 0 2px 8px rgba(0, 0, 0, 0.06);
-  transition: all 0.2s ease;
-  min-height: 120px;
-}
-
-.stat-card:hover {
-  box-shadow: 0 4px 16px rgba(0, 0, 0, 0.1);
-  transform: translateY(-2px);
-}
-
-.stat-icon {
-  font-size: 32px;
-  width: 56px;
-  height: 56px;
-  display: flex;
-  align-items: center;
-  justify-content: center;
-  background: linear-gradient(135deg, #3776ab 0%, #4584bb 100%);
-  border-radius: 50%;
-  flex-shrink: 0;
-  box-shadow: 0 2px 8px rgba(55, 118, 171, 0.3);
-}
-
-.stat-content {
-  flex: 1;
-}
-
-.stat-label {
-  font-size: 14px;
-  color: #5a6c7d;
-  font-weight: 500;
-  margin-bottom: 4px;
-  text-transform: uppercase;
-  letter-spacing: 0.5px;
-}
-
-.stat-value {
-  font-size: 16px;
-  font-weight: 700;
-  color: #2e3338;
-  line-height: 1.3;
-  margin-bottom: 4px;
-  word-break: break-word;
-  overflow-wrap: break-word;
-}
-
-.stat-file {
-  font-size: 12px;
-  color: #8b949e;
-  font-weight: 400;
-  margin-bottom: 2px;
-  font-family: 'SF Mono', 'Monaco', 'Consolas', monospace;
-  word-break: break-word;
-  overflow-wrap: break-word;
-}
-
-.stat-detail {
-  font-size: 12px;
-  color: #5a6c7d;
-  font-weight: 400;
-  line-height: 1.4;
-  font-family: 'SF Mono', 'Monaco', 'Consolas', monospace;
-  word-break: break-word;
-  overflow-wrap: break-word;
-}
-
-.controls {
-  background: #f8f9fa;
-  border-bottom: 1px solid #e9ecef;
-  padding: 20px 0;
-  text-align: center;
-}
-
-.controls-content {
-  max-width: 1200px;
-  margin: 0 auto;
-  padding: 0 24px;
-  text-align: center;
-}
-
-
-.controls button {
-  background: #3776ab;
-  color: white;
-  border: none;
-  padding: 12px 24px;
-  margin: 0 8px;
-  border-radius: 6px;
-  cursor: pointer;
-  font-weight: 600;
-  font-size: 14px;
-  font-family: inherit;
-  transition: all 0.2s ease;
-  box-shadow: 0 2px 4px rgba(55, 118, 171, 0.2);
-}
-
-.controls button:hover {
-  background: #2d5aa0;
-  transform: translateY(-1px);
-  box-shadow: 0 4px 8px rgba(55, 118, 171, 0.3);
-}
-
-.controls button.secondary {
-  background: #ffd43b;
-  color: #2e3338;
-}
-
-.controls button.secondary:hover {
-  background: #ffcd02;
-}
-
-.thread-filter-wrapper {
-  display: none;
-  align-items: center;
-  margin-left: 16px;
-  background: white;
-  border-radius: 6px;
-  padding: 4px 8px 4px 12px;
-  border: 2px solid #3776ab;
-  transition: all 0.2s ease;
-}
-
-.thread-filter-wrapper:hover {
-  border-color: #2d5aa0;
-  box-shadow: 0 2px 6px rgba(55, 118, 171, 0.2);
-}
-
-.thread-filter-label {
-  color: #3776ab;
-  font-size: 14px;
-  font-weight: 600;
-  margin-right: 8px;
-  display: flex;
-  align-items: center;
-}
-
-.thread-filter-select {
-  background: transparent;
-  color: #2e3338;
-  border: none;
-  padding: 4px 24px 4px 4px;
-  font-size: 14px;
-  font-weight: 600;
-  cursor: pointer;
-  min-width: 120px;
-  font-family: inherit;
-  appearance: none;
-  -webkit-appearance: none;
-  -moz-appearance: none;
-  background-image: url("data:image/svg+xml;charset=UTF-8,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' fill='none' stroke='%233776ab' stroke-width='2' stroke-linecap='round' stroke-linejoin='round'%3e%3cpolyline points='6 9 12 15 18 9'%3e%3c/polyline%3e%3c/svg%3e");
-  background-repeat: no-repeat;
-  background-position: right 4px center;
-  background-size: 16px;
-}
-
-.thread-filter-select:focus {
-  outline: none;
-}
-
-.thread-filter-select:hover {
-  color: #3776ab;
-}
-
-.thread-filter-select option {
-  padding: 8px;
-  background: white;
-  color: #2e3338;
-  font-weight: normal;
-}
-
-#chart {
-  width: 100%;
-  height: calc(100vh - 160px);
-  overflow: hidden;
-  background: #ffffff;
-  padding: 0 40px;
-}
-
-.d3-flame-graph rect {
-  /* Prefer selector specificity instead of !important */
-  stroke: rgba(55, 118, 171, 0.3);
-  stroke-width: 1px;
-  cursor: pointer;
-  transition: all 0.1s ease;
-}
-
-.d3-flame-graph rect:hover {
-  stroke: #3776ab;
-  stroke-width: 2px;
-  filter: brightness(1.05);
-}
-
-.d3-flame-graph text {
-  /* Ensure labels use our font without !important */
-  font-family: "Source Sans Pro", sans-serif;
-  font-size: 12px;
-  font-weight: 500;
-  fill: #2e3338;
-  pointer-events: none;
-}
-
-.info-panel {
-  position: fixed;
-  bottom: 24px;
-  left: 84px; /* Leave space for the button */
-  background: white;
-  padding: 24px;
-  border-radius: 8px;
-  border: 1px solid #e9ecef;
-  font-size: 14px;
-  max-width: 280px;
-  box-shadow: 0 4px 20px rgba(0, 0, 0, 0.1);
-  z-index: 1000;
-  display: none;
-}
-
-.info-panel h3 {
-  margin: 0 0 16px 0;
-  color: #3776ab;
-  font-weight: 600;
-  font-size: 16px;
-  border-bottom: 2px solid #ffd43b;
-  padding-bottom: 8px;
-}
-
-.info-panel p {
-  margin: 12px 0;
-  color: #5a6c7d;
-  line-height: 1.5;
-}
-
-.info-panel strong {
-  color: #3776ab;
-}
-
-#show-info-btn {
-  position: fixed;
-  bottom: 32px;
-  left: 32px;
-  z-index: 1100;
-  width: 44px;
-  height: 44px;
-  border-radius: 50%;
-  background: #3776ab;
-  color: white;
-  border: none;
-  font-size: 24px;
-  box-shadow: 0 2px 8px rgba(55, 118, 171, 0.15);
-  cursor: pointer;
-  display: flex;
-  align-items: center;
-  justify-content: center;
-  transition: background 0.2s;
-}
-
-#show-info-btn:hover {
-  background: #2d5aa0;
-}
-
-#close-info-btn {
-  position: absolute;
-  top: 8px;
-  right: 12px;
-  background: none;
-  border: none;
-  font-size: 20px;
-  cursor: pointer;
-  color: #3776ab;
-}
-
-@media (max-width: 600px) {
-  .python-logo { height: 48px; }
-  .python-logo img { height: 48px; }
-  #show-info-btn {
-    left: 8px;
-    bottom: 8px;
-  }
-  .info-panel {
-    left: 60px; /* Still leave space for button */
-    bottom: 8px;
-    max-width: 90vw;
-  }
-}
-
-.legend-panel {
-  position: fixed;
-  top: 24px;
-  left: 24px;
-  background: white;
-  padding: 24px;
-  border-radius: 8px;
-  border: 1px solid #e9ecef;
-  font-size: 14px;
-  max-width: 320px;
-  box-shadow: 0 4px 20px rgba(0, 0, 0, 0.1);
-  display: none;
-  z-index: 1001;
-}
-
-.legend-panel h3 {
-  margin: 0 0 20px 0;
-  color: #3776ab;
-  font-weight: 600;
-  font-size: 18px;
-  text-align: center;
-  border-bottom: 2px solid #ffd43b;
-  padding-bottom: 8px;
-}
-
-.legend-item {
-  display: flex;
-  align-items: center;
-  margin: 12px 0;
-  padding: 10px;
-  border-radius: 6px;
-  background: #f8f9fa;
-  border: 1px solid #e9ecef;
-}
-
-.legend-color {
-  width: 28px;
-  height: 18px;
-  border-radius: 4px;
-  margin-right: 16px;
-  border: 1px solid rgba(0, 0, 0, 0.1);
-  flex-shrink: 0;
-}
-
-.legend-label {
-  color: #2e3338;
-  font-weight: 600;
-  flex: 1;
-}
-
-.legend-description {
-  color: #5a6c7d;
-  font-size: 12px;
-  margin-top: 2px;
-  font-weight: 400;
-}
-
-.chart-container {
-  background: #ffffff;
-  margin: 0;
-  padding: 12px 0;
-}
-
-/* Tooltip overflow fixes */
-.python-tooltip {
-  max-width: 500px !important;
-  word-wrap: break-word !important;
-  overflow-wrap: break-word !important;
-  box-sizing: border-box !important;
-}
-
-/* Responsive tooltip adjustments */
-@media (max-width: 768px) {
-  .python-tooltip {
-    max-width: calc(100vw - 40px) !important;
-    max-height: calc(100vh - 80px) !important;
-    overflow-y: auto !important;
-  }
-}
-
-@media (max-width: 480px) {
-  .python-tooltip {
-    max-width: calc(100vw - 20px) !important;
-    font-size: 12px !important;
-  }
-}
diff --git a/Lib/profiling/sampling/flamegraph.js b/Lib/profiling/sampling/flamegraph.js
deleted file mode 100644
index 670ca22d442..00000000000
--- a/Lib/profiling/sampling/flamegraph.js
+++ /dev/null
@@ -1,646 +0,0 @@
-const EMBEDDED_DATA = {{FLAMEGRAPH_DATA}};
-
-// Global string table for resolving string indices
-let stringTable = [];
-let originalData = null;
-let currentThreadFilter = 'all';
-
-// Function to resolve string indices to actual strings
-function resolveString(index) {
-    if (typeof index === 'number' && index >= 0 && index < stringTable.length) {
-        return stringTable[index];
-    }
-    // Fallback for non-indexed strings or invalid indices
-    return String(index);
-}
-
-// Function to recursively resolve all string indices in flamegraph data
-function resolveStringIndices(node) {
-    if (!node) return node;
-
-    // Create a copy to avoid mutating the original
-    const resolved = { ...node };
-
-    // Resolve string fields
-    if (typeof resolved.name === 'number') {
-        resolved.name = resolveString(resolved.name);
-    }
-    if (typeof resolved.filename === 'number') {
-        resolved.filename = resolveString(resolved.filename);
-    }
-    if (typeof resolved.funcname === 'number') {
-        resolved.funcname = resolveString(resolved.funcname);
-    }
-
-    // Resolve source lines if present
-    if (Array.isArray(resolved.source)) {
-        resolved.source = resolved.source.map(index =>
-            typeof index === 'number' ? resolveString(index) : index
-        );
-    }
-
-    // Recursively resolve children
-    if (Array.isArray(resolved.children)) {
-        resolved.children = resolved.children.map(child => resolveStringIndices(child));
-    }
-
-    return resolved;
-}
-
-// Python color palette - cold to hot
-const pythonColors = [
-  "#fff4bf", // Coldest - light yellow (<1%)
-  "#ffec9e", // Cold - yellow (1-3%)
-  "#ffe47d", // Cool - golden yellow (3-6%)
-  "#ffdc5c", // Medium - golden (6-12%)
-  "#ffd43b", // Warm - Python gold (12-18%)
-  "#5592cc", // Hot - light blue (18-35%)
-  "#4584bb", // Very hot - medium blue (35-60%)
-  "#3776ab", // Hottest - Python blue (≥60%)
-];
-
-function ensureLibraryLoaded() {
-  if (typeof flamegraph === "undefined") {
-    console.error("d3-flame-graph library not loaded");
-    document.getElementById("chart").innerHTML =
-      '<h2 style="text-align: center; color: #d32f2f;">Error: d3-flame-graph library failed to load</h2>';
-    throw new Error("d3-flame-graph library failed to load");
-  }
-}
-
-function createPythonTooltip(data) {
-  const pythonTooltip = flamegraph.tooltip.defaultFlamegraphTooltip();
-  pythonTooltip.show = function (d, element) {
-    if (!this._tooltip) {
-      this._tooltip = d3
-        .select("body")
-        .append("div")
-        .attr("class", "python-tooltip")
-        .style("position", "absolute")
-        .style("padding", "20px")
-        .style("background", "white")
-        .style("color", "#2e3338")
-        .style("border-radius", "8px")
-        .style("font-size", "14px")
-        .style("border", "1px solid #e9ecef")
-        .style("box-shadow", "0 8px 30px rgba(0, 0, 0, 0.15)")
-        .style("z-index", "1000")
-        .style("pointer-events", "none")
-        .style("font-weight", "400")
-        .style("line-height", "1.5")
-        .style("max-width", "500px")
-        .style("word-wrap", "break-word")
-        .style("overflow-wrap", "break-word")
-        .style("font-family", "'Source Sans Pro', sans-serif")
-        .style("opacity", 0);
-    }
-
-    const timeMs = (d.data.value / 1000).toFixed(2);
-    const percentage = ((d.data.value / data.value) * 100).toFixed(2);
-    const calls = d.data.calls || 0;
-    const childCount = d.children ? d.children.length : 0;
-    const source = d.data.source;
-
-    // Create source code section if available
-    let sourceSection = "";
-    if (source && Array.isArray(source) && source.length > 0) {
-      const sourceLines = source
-        .map(
-          (line) =>
-            `<div style="font-family: 'SF Mono', 'Monaco', 'Consolas', ` +
-            `monospace; font-size: 12px; color: ${
-              line.startsWith("→") ? "#3776ab" : "#5a6c7d"
-            }; white-space: pre-wrap; word-break: break-all; overflow-wrap: break-word; line-height: 1.4; padding: 2px 0;">${line
-              .replace(/&/g, "&amp;")
-              .replace(/</g, "&lt;")
-              .replace(/>/g, "&gt;")}</div>`,
-        )
-        .join("");
-
-      sourceSection = `
-        <div style="margin-top: 16px; padding-top: 12px;
-                    border-top: 1px solid #e9ecef;">
-          <div style="color: #3776ab; font-size: 13px;
-                      margin-bottom: 8px; font-weight: 600;">
-            Source Code:
-          </div>
-          <div style="background: #f8f9fa; border: 1px solid #e9ecef;
-                      border-radius: 6px; padding: 12px; max-height: 150px;
-                      overflow-y: auto; overflow-x: hidden;">
-            ${sourceLines}
-          </div>
-        </div>`;
-    } else if (source) {
-      // Show debug info if source exists but isn't an array
-      sourceSection = `
-        <div style="margin-top: 16px; padding-top: 12px;
-                    border-top: 1px solid #e9ecef;">
-          <div style="color: #d32f2f; font-size: 13px;
-                      margin-bottom: 8px; font-weight: 600;">
-            [Debug] - Source data type: ${typeof source}
-          </div>
-          <div style="background: #f8f9fa; border: 1px solid #e9ecef;
-                      border-radius: 6px; padding: 12px; max-height: 150px;
-                      overflow-y: auto; overflow-x: hidden; font-family: monospace; font-size: 11px; word-break: break-all; overflow-wrap: break-word;">
-            ${JSON.stringify(source, null, 2)}
-          </div>
-        </div>`;
-    }
-
-    // Resolve strings for display
-    const funcname = resolveString(d.data.funcname) || resolveString(d.data.name);
-    const filename = resolveString(d.data.filename) || "";
-
-    // Don't show file location for special frames like <GC> and <native>
-    const isSpecialFrame = filename === "~";
-    const fileLocationHTML = isSpecialFrame ? "" : `
-        <div style="color: #5a6c7d; font-size: 13px; margin-bottom: 12px;
-                    font-family: monospace; background: #f8f9fa;
-                    padding: 4px 8px; border-radius: 4px; word-break: break-all; overflow-wrap: break-word;">
-          ${filename}${d.data.lineno ? ":" + d.data.lineno : ""}
-        </div>`;
-
-    const tooltipHTML = `
-      <div>
-        <div style="color: #3776ab; font-weight: 600; font-size: 16px;
-                    margin-bottom: 8px; line-height: 1.3; word-break: break-word; overflow-wrap: break-word;">
-          ${funcname}
-        </div>
-        ${fileLocationHTML}
-        <div style="display: grid; grid-template-columns: auto 1fr;
-                    gap: 8px 16px; font-size: 14px;">
-          <span style="color: #5a6c7d; font-weight: 500;">Execution Time:</span>
-          <strong style="color: #2e3338;">${timeMs} ms</strong>
-
-          <span style="color: #5a6c7d; font-weight: 500;">Percentage:</span>
-          <strong style="color: #3776ab;">${percentage}%</strong>
-
-          ${calls > 0 ? `
-            <span style="color: #5a6c7d; font-weight: 500;">Function Calls:</span>
-            <strong style="color: #2e3338;">${calls.toLocaleString()}</strong>
-          ` : ''}
-
-          ${childCount > 0 ? `
-            <span style="color: #5a6c7d; font-weight: 500;">Child Functions:</span>
-            <strong style="color: #2e3338;">${childCount}</strong>
-          ` : ''}
-        </div>
-        ${sourceSection}
-        <div style="margin-top: 16px; padding-top: 12px;
-                    border-top: 1px solid #e9ecef; font-size: 13px;
-                    color: #5a6c7d; text-align: center;">
-          ${childCount > 0 ?
-            "Click to focus on this function" :
-            "Leaf function - no children"}
-        </div>
-      </div>
-    `;
-
-    // Get mouse position
-    const event = d3.event || window.event;
-    const mouseX = event.pageX || event.clientX;
-    const mouseY = event.pageY || event.clientY;
-
-    // Calculate tooltip dimensions (default to 320px width if not rendered yet)
-    let tooltipWidth = 320;
-    let tooltipHeight = 200;
-    if (this._tooltip && this._tooltip.node()) {
-      const node = this._tooltip
-        .style("opacity", 0)
-        .style("display", "block")
-        .node();
-      tooltipWidth = node.offsetWidth || 320;
-      tooltipHeight = node.offsetHeight || 200;
-      this._tooltip.style("display", null);
-    }
-
-    // Calculate horizontal position: if overflow, show to the left of cursor
-    const padding = 10;
-    const rightEdge = mouseX + padding + tooltipWidth;
-    const viewportWidth = window.innerWidth;
-    let left;
-    if (rightEdge > viewportWidth) {
-      left = mouseX - tooltipWidth - padding;
-      if (left < 0) left = padding; // prevent off left edge
-    } else {
-      left = mouseX + padding;
-    }
-
-    // Calculate vertical position: if overflow, show above cursor
-    const bottomEdge = mouseY + padding + tooltipHeight;
-    const viewportHeight = window.innerHeight;
-    let top;
-    if (bottomEdge > viewportHeight) {
-      top = mouseY - tooltipHeight - padding;
-      if (top < 0) top = padding; // prevent off top edge
-    } else {
-      top = mouseY + padding;
-    }
-
-    this._tooltip
-      .html(tooltipHTML)
-      .style("left", left + "px")
-      .style("top", top + "px")
-      .transition()
-      .duration(200)
-      .style("opacity", 1);
-  };
-
-  // Override the hide method
-  pythonTooltip.hide = function () {
-    if (this._tooltip) {
-      this._tooltip.transition().duration(200).style("opacity", 0);
-    }
-  };
-  return pythonTooltip;
-}
-
-function createFlamegraph(tooltip, rootValue) {
-  let chart = flamegraph()
-    .width(window.innerWidth - 80)
-    .cellHeight(20)
-    .transitionDuration(300)
-    .minFrameSize(1)
-    .tooltip(tooltip)
-    .inverted(true)
-    .setColorMapper(function (d) {
-      const percentage = d.data.value / rootValue;
-      let colorIndex;
-      if (percentage >= 0.6) colorIndex = 7;
-      else if (percentage >= 0.35) colorIndex = 6;
-      else if (percentage >= 0.18) colorIndex = 5;
-      else if (percentage >= 0.12) colorIndex = 4;
-      else if (percentage >= 0.06) colorIndex = 3;
-      else if (percentage >= 0.03) colorIndex = 2;
-      else if (percentage >= 0.01) colorIndex = 1;
-      else colorIndex = 0; // <1%
-      return pythonColors[colorIndex];
-    });
-  return chart;
-}
-
-function renderFlamegraph(chart, data) {
-  d3.select("#chart").datum(data).call(chart);
-  window.flamegraphChart = chart; // for controls
-  window.flamegraphData = data;   // for resize/search
-  populateStats(data);
-}
-
-function attachPanelControls() {
-  const infoBtn = document.getElementById("show-info-btn");
-  const infoPanel = document.getElementById("info-panel");
-  const closeBtn = document.getElementById("close-info-btn");
-  if (infoBtn && infoPanel) {
-    infoBtn.addEventListener("click", function () {
-      const isOpen = infoPanel.style.display === "block";
-      infoPanel.style.display = isOpen ? "none" : "block";
-    });
-  }
-  if (closeBtn && infoPanel) {
-    closeBtn.addEventListener("click", function () {
-      infoPanel.style.display = "none";
-    });
-  }
-}
-
-function updateSearchHighlight(searchTerm, searchInput) {
-  d3.selectAll("#chart rect")
-    .style("stroke", null)
-    .style("stroke-width", null)
-    .style("opacity", null);
-  if (searchTerm && searchTerm.length > 0) {
-    d3.selectAll("#chart rect").style("opacity", 0.3);
-    let matchCount = 0;
-    d3.selectAll("#chart rect").each(function (d) {
-      if (d && d.data) {
-        const name = resolveString(d.data.name) || "";
-        const funcname = resolveString(d.data.funcname) || "";
-        const filename = resolveString(d.data.filename) || "";
-        const term = searchTerm.toLowerCase();
-        const matches =
-          name.toLowerCase().includes(term) ||
-          funcname.toLowerCase().includes(term) ||
-          filename.toLowerCase().includes(term);
-        if (matches) {
-          matchCount++;
-          d3.select(this)
-            .style("opacity", 1)
-            .style("stroke", "#ff6b35")
-            .style("stroke-width", "2px")
-            .style("stroke-dasharray", "3,3");
-        }
-      }
-    });
-    if (searchInput) {
-      if (matchCount > 0) {
-        searchInput.style.borderColor = "rgba(40, 167, 69, 0.8)";
-        searchInput.style.boxShadow = "0 6px 20px rgba(40, 167, 69, 0.2)";
-      } else {
-        searchInput.style.borderColor = "rgba(220, 53, 69, 0.8)";
-        searchInput.style.boxShadow = "0 6px 20px rgba(220, 53, 69, 0.2)";
-      }
-    }
-  } else if (searchInput) {
-    searchInput.style.borderColor = "rgba(255, 255, 255, 0.2)";
-    searchInput.style.boxShadow = "0 4px 12px rgba(0, 0, 0, 0.1)";
-  }
-}
-
-function initSearchHandlers() {
-  const searchInput = document.getElementById("search-input");
-  if (!searchInput) return;
-  let searchTimeout;
-  function performSearch() {
-    const term = searchInput.value.trim();
-    updateSearchHighlight(term, searchInput);
-  }
-  searchInput.addEventListener("input", function () {
-    clearTimeout(searchTimeout);
-    searchTimeout = setTimeout(performSearch, 150);
-  });
-  window.performSearch = performSearch;
-}
-
-function handleResize(chart, data) {
-  window.addEventListener("resize", function () {
-    if (chart && data) {
-      const newWidth = window.innerWidth - 80;
-      chart.width(newWidth);
-      d3.select("#chart").datum(data).call(chart);
-    }
-  });
-}
-
-function initFlamegraph() {
-  ensureLibraryLoaded();
-
-  // Extract string table if present and resolve string indices
-  let processedData = EMBEDDED_DATA;
-  if (EMBEDDED_DATA.strings) {
-    stringTable = EMBEDDED_DATA.strings;
-    processedData = resolveStringIndices(EMBEDDED_DATA);
-  }
-
-  // Store original data for filtering
-  originalData = processedData;
-
-  // Initialize thread filter dropdown
-  initThreadFilter(processedData);
-
-  const tooltip = createPythonTooltip(processedData);
-  const chart = createFlamegraph(tooltip, processedData.value);
-  renderFlamegraph(chart, processedData);
-  attachPanelControls();
-  initSearchHandlers();
-  handleResize(chart, processedData);
-}
-
-if (document.readyState === "loading") {
-  document.addEventListener("DOMContentLoaded", initFlamegraph);
-} else {
-  initFlamegraph();
-}
-
-function populateStats(data) {
-  const totalSamples = data.value || 0;
-
-  // Collect all functions with their metrics, aggregated by function name
-  const functionMap = new Map();
-
-  function collectFunctions(node) {
-    if (!node) return;
-
-    let filename = typeof node.filename === 'number' ? resolveString(node.filename) : node.filename;
-    let funcname = typeof node.funcname === 'number' ? resolveString(node.funcname) : node.funcname;
-
-    if (!filename || !funcname) {
-      const nameStr = typeof node.name === 'number' ? resolveString(node.name) : node.name;
-      if (nameStr?.includes('(')) {
-        const match = nameStr.match(/^(.+?)\s*\((.+?):(\d+)\)$/);
-        if (match) {
-          funcname = funcname || match[1];
-          filename = filename || match[2];
-        }
-      }
-    }
-
-    filename = filename || 'unknown';
-    funcname = funcname || 'unknown';
-
-    if (filename !== 'unknown' && funcname !== 'unknown' && node.value > 0) {
-      // Calculate direct samples (this node's value minus children's values)
-      let childrenValue = 0;
-      if (node.children) {
-        childrenValue = node.children.reduce((sum, child) => sum + child.value, 0);
-      }
-      const directSamples = Math.max(0, node.value - childrenValue);
-
-      // Use file:line:funcname as key to ensure uniqueness
-      const funcKey = `${filename}:${node.lineno || '?'}:${funcname}`;
-
-      if (functionMap.has(funcKey)) {
-        const existing = functionMap.get(funcKey);
-        existing.directSamples += directSamples;
-        existing.directPercent = (existing.directSamples / totalSamples) * 100;
-        // Keep the most representative file/line (the one with more samples)
-        if (directSamples > existing.maxSingleSamples) {
-          existing.filename = filename;
-          existing.lineno = node.lineno || '?';
-          existing.maxSingleSamples = directSamples;
-        }
-      } else {
-        functionMap.set(funcKey, {
-          filename: filename,
-          lineno: node.lineno || '?',
-          funcname: funcname,
-          directSamples,
-          directPercent: (directSamples / totalSamples) * 100,
-          maxSingleSamples: directSamples
-        });
-      }
-    }
-
-    if (node.children) {
-      node.children.forEach(child => collectFunctions(child));
-    }
-  }
-
-  collectFunctions(data);
-
-  // Convert map to array and get top 3 hotspots
-  const hotSpots = Array.from(functionMap.values())
-    .filter(f => f.directPercent > 0.5) // At least 0.5% to be significant
-    .sort((a, b) => b.directPercent - a.directPercent)
-    .slice(0, 3);
-
-  // Populate the 3 cards
-  for (let i = 0; i < 3; i++) {
-    const num = i + 1;
-    if (i < hotSpots.length && hotSpots[i]) {
-      const hotspot = hotSpots[i];
-      const filename = hotspot.filename || 'unknown';
-      const lineno = hotspot.lineno ?? '?';
-      let funcDisplay = hotspot.funcname || 'unknown';
-      if (funcDisplay.length > 35) {
-        funcDisplay = funcDisplay.substring(0, 32) + '...';
-      }
-
-      // Don't show file:line for special frames like <GC> and <native>
-      const isSpecialFrame = filename === '~' && (lineno === 0 || lineno === '?');
-      let fileDisplay;
-      if (isSpecialFrame) {
-        fileDisplay = '--';
-      } else {
-        const basename = filename !== 'unknown' ? filename.split('/').pop() : 'unknown';
-        fileDisplay = `${basename}:${lineno}`;
-      }
-
-      document.getElementById(`hotspot-file-${num}`).textContent = fileDisplay;
-      document.getElementById(`hotspot-func-${num}`).textContent = funcDisplay;
-      document.getElementById(`hotspot-detail-${num}`).textContent = `${hotspot.directPercent.toFixed(1)}% samples (${hotspot.directSamples.toLocaleString()})`;
-    } else {
-      document.getElementById(`hotspot-file-${num}`).textContent = '--';
-      document.getElementById(`hotspot-func-${num}`).textContent = '--';
-      document.getElementById(`hotspot-detail-${num}`).textContent = '--';
-    }
-  }
-}
-
-// Control functions
-function resetZoom() {
-  if (window.flamegraphChart) {
-    window.flamegraphChart.resetZoom();
-  }
-}
-
-function exportSVG() {
-  const svgElement = document.querySelector("#chart svg");
-  if (svgElement) {
-    const serializer = new XMLSerializer();
-    const svgString = serializer.serializeToString(svgElement);
-    const blob = new Blob([svgString], { type: "image/svg+xml" });
-    const url = URL.createObjectURL(blob);
-    const a = document.createElement("a");
-    a.href = url;
-    a.download = "python-performance-flamegraph.svg";
-    a.click();
-    URL.revokeObjectURL(url);
-  }
-}
-
-function toggleLegend() {
-  const legendPanel = document.getElementById("legend-panel");
-  const isHidden =
-    legendPanel.style.display === "none" || legendPanel.style.display === "";
-  legendPanel.style.display = isHidden ? "block" : "none";
-}
-
-function clearSearch() {
-  const searchInput = document.getElementById("search-input");
-  if (searchInput) {
-    searchInput.value = "";
-    if (window.flamegraphChart) {
-      window.flamegraphChart.clear();
-    }
-  }
-}
-
-function initThreadFilter(data) {
-  const threadFilter = document.getElementById('thread-filter');
-  const threadWrapper = document.querySelector('.thread-filter-wrapper');
-
-  if (!threadFilter || !data.threads) {
-    return;
-  }
-
-  // Clear existing options except "All Threads"
-  threadFilter.innerHTML = '<option value="all">All Threads</option>';
-
-  // Add thread options
-  const threads = data.threads || [];
-  threads.forEach(threadId => {
-    const option = document.createElement('option');
-    option.value = threadId;
-    option.textContent = `Thread ${threadId}`;
-    threadFilter.appendChild(option);
-  });
-
-  // Show filter if more than one thread
-  if (threads.length > 1 && threadWrapper) {
-    threadWrapper.style.display = 'inline-flex';
-  }
-}
-
-function filterByThread() {
-  const threadFilter = document.getElementById('thread-filter');
-  if (!threadFilter || !originalData) return;
-
-  const selectedThread = threadFilter.value;
-  currentThreadFilter = selectedThread;
-
-  let filteredData;
-  if (selectedThread === 'all') {
-    // Show all data
-    filteredData = originalData;
-  } else {
-    // Filter data by thread
-    const threadId = parseInt(selectedThread);
-    filteredData = filterDataByThread(originalData, threadId);
-
-    if (filteredData.strings) {
-      stringTable = filteredData.strings;
-      filteredData = resolveStringIndices(filteredData);
-    }
-  }
-
-  // Re-render flamegraph with filtered data
-  const tooltip = createPythonTooltip(filteredData);
-  const chart = createFlamegraph(tooltip, filteredData.value);
-  renderFlamegraph(chart, filteredData);
-}
-
-function filterDataByThread(data, threadId) {
-  function filterNode(node) {
-    if (!node.threads || !node.threads.includes(threadId)) {
-      return null;
-    }
-
-    const filteredNode = {
-      ...node,
-      children: []
-    };
-
-    if (node.children && Array.isArray(node.children)) {
-      filteredNode.children = node.children
-        .map(child => filterNode(child))
-        .filter(child => child !== null);
-    }
-
-    return filteredNode;
-  }
-
-  const filteredRoot = {
-    ...data,
-    children: []
-  };
-
-  if (data.children && Array.isArray(data.children)) {
-    filteredRoot.children = data.children
-      .map(child => filterNode(child))
-      .filter(child => child !== null);
-  }
-
-  function recalculateValue(node) {
-    if (!node.children || node.children.length === 0) {
-      return node.value || 0;
-    }
-    const childrenValue = node.children.reduce((sum, child) => sum + recalculateValue(child), 0);
-    node.value = Math.max(node.value || 0, childrenValue);
-    return node.value;
-  }
-
-  recalculateValue(filteredRoot);
-
-  return filteredRoot;
-}
-
diff --git a/Lib/profiling/sampling/flamegraph_template.html b/Lib/profiling/sampling/flamegraph_template.html
deleted file mode 100644
index 585a1abb61f..00000000000
--- a/Lib/profiling/sampling/flamegraph_template.html
+++ /dev/null
@@ -1,153 +0,0 @@
-<!doctype html>
-<html lang="en">
-  <head>
-    <meta charset="UTF-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Python Performance Flamegraph</title>
-    <!-- INLINE_VENDOR_D3_JS -->
-    <!-- INLINE_VENDOR_FLAMEGRAPH_CSS -->
-    <!-- INLINE_VENDOR_FLAMEGRAPH_JS -->
-    <!-- INLINE_VENDOR_FLAMEGRAPH_TOOLTIP_JS -->
-    <!-- INLINE_CSS -->
-  </head>
-  <body>
-    <div class="header">
-      <div class="header-content">
-        <div class="python-logo"><!-- INLINE_LOGO --></div>
-        <div class="header-text">
-          <h1>Tachyon Profiler Performance Flamegraph</h1>
-          <div class="subtitle">
-            Interactive visualization of function call performance
-          </div>
-        </div>
-        <div class="header-search">
-          <input type="text" id="search-input" placeholder="🔍 Search functions..." />
-        </div>
-      </div>
-    </div>
-
-    <div class="stats-section">
-      <div class="stats-container">
-        <div class="stat-card hotspot-card">
-          <div class="stat-icon">🥇</div>
-          <div class="stat-content">
-            <div class="stat-label">#1 Hot Spot</div>
-            <div class="stat-file" id="hotspot-file-1">--</div>
-            <div class="stat-value" id="hotspot-func-1">--</div>
-            <div class="stat-detail" id="hotspot-detail-1">--</div>
-          </div>
-        </div>
-        <div class="stat-card hotspot-card">
-          <div class="stat-icon">🥈</div>
-          <div class="stat-content">
-            <div class="stat-label">#2 Hot Spot</div>
-            <div class="stat-file" id="hotspot-file-2">--</div>
-            <div class="stat-value" id="hotspot-func-2">--</div>
-            <div class="stat-detail" id="hotspot-detail-2">--</div>
-          </div>
-        </div>
-        <div class="stat-card hotspot-card">
-          <div class="stat-icon">🥉</div>
-          <div class="stat-content">
-            <div class="stat-label">#3 Hot Spot</div>
-            <div class="stat-file" id="hotspot-file-3">--</div>
-            <div class="stat-value" id="hotspot-func-3">--</div>
-            <div class="stat-detail" id="hotspot-detail-3">--</div>
-          </div>
-        </div>
-      </div>
-    </div>
-
-    <div class="controls">
-      <div class="controls-content">
-        <button onclick="resetZoom()">🏠 Reset Zoom</button>
-        <button onclick="exportSVG()" class="secondary">📁 Export SVG</button>
-        <button onclick="toggleLegend()">🔥 Heat Map Legend</button>
-        <div class="thread-filter-wrapper">
-          <label class="thread-filter-label">🧵 Thread:</label>
-          <select id="thread-filter" class="thread-filter-select" onchange="filterByThread()">
-            <option value="all">All Threads</option>
-          </select>
-        </div>
-      </div>
-    </div>
-
-    <button id="show-info-btn" title="Show navigation guide">&#8505;</button>
-
-    <div class="info-panel" id="info-panel">
-      <button id="close-info-btn" title="Close">&times;</button>
-      <h3>Navigation Guide</h3>
-      <p><strong>Click:</strong> Zoom into function</p>
-      <p><strong>Hover:</strong> Show detailed information</p>
-      <p><strong>Width:</strong> Time spent in function</p>
-      <p><strong>Height:</strong> Call stack depth</p>
-      <p><strong>Color:</strong> Performance intensity</p>
-    </div>
-
-    <div class="legend-panel" id="legend-panel">
-      <h3>🔥 Performance Heat Map</h3>
-      <div class="legend-item">
-        <div class="legend-color" style="background-color: #3776ab"></div>
-        <div>
-          <div class="legend-label">Hottest Functions (≥60%)</div>
-          <div class="legend-description">Highest performance impact</div>
-        </div>
-      </div>
-      <div class="legend-item">
-        <div class="legend-color" style="background-color: #4584bb"></div>
-        <div>
-          <div class="legend-label">Very Hot Functions (35-60%)</div>
-          <div class="legend-description">High performance impact</div>
-        </div>
-      </div>
-      <div class="legend-item">
-        <div class="legend-color" style="background-color: #5592cc"></div>
-        <div>
-          <div class="legend-label">Hot Functions (18-35%)</div>
-          <div class="legend-description">Notable performance cost</div>
-        </div>
-      </div>
-      <div class="legend-item">
-        <div class="legend-color" style="background-color: #ffd43b"></div>
-        <div>
-          <div class="legend-label">Warm Functions (12-18%)</div>
-          <div class="legend-description">Moderate impact</div>
-        </div>
-      </div>
-      <div class="legend-item">
-        <div class="legend-color" style="background-color: #ffdc5c"></div>
-        <div>
-          <div class="legend-label">Medium Functions (6-12%)</div>
-          <div class="legend-description">Some performance impact</div>
-        </div>
-      </div>
-      <div class="legend-item">
-        <div class="legend-color" style="background-color: #ffe47d"></div>
-        <div>
-          <div class="legend-label">Cool Functions (3-6%)</div>
-          <div class="legend-description">Low performance impact</div>
-        </div>
-      </div>
-      <div class="legend-item">
-        <div class="legend-color" style="background-color: #ffec9e"></div>
-        <div>
-          <div class="legend-label">Cold Functions (1-3%)</div>
-          <div class="legend-description">Minimal performance impact</div>
-        </div>
-      </div>
-      <div class="legend-item">
-        <div class="legend-color" style="background-color: #fff4bf"></div>
-        <div>
-          <div class="legend-label">Coldest Functions (<1%)</div>
-          <div class="legend-description">Very low performance impact</div>
-        </div>
-      </div>
-    </div>
-
-    <div class="chart-container">
-      <div id="chart"></div>
-    </div>
-
-    <!-- INLINE_JS -->
-  </body>
-</html>
diff --git a/Lib/profiling/sampling/gecko_collector.py b/Lib/profiling/sampling/gecko_collector.py
index 21c427b7c86..921cd625f04 100644
--- a/Lib/profiling/sampling/gecko_collector.py
+++ b/Lib/profiling/sampling/gecko_collector.py
@@ -56,7 +56,8 @@
 
 
 class GeckoCollector(Collector):
-    def __init__(self, *, skip_idle=False):
+    def __init__(self, sample_interval_usec, *, skip_idle=False):
+        self.sample_interval_usec = sample_interval_usec
         self.skip_idle = skip_idle
         self.start_time = time.time() * 1000  # milliseconds since epoch
 
diff --git a/Lib/profiling/sampling/heatmap_collector.py b/Lib/profiling/sampling/heatmap_collector.py
new file mode 100644
index 00000000000..8a8ba9628df
--- /dev/null
+++ b/Lib/profiling/sampling/heatmap_collector.py
@@ -0,0 +1,944 @@
+"""Heatmap collector for Python profiling with line-level execution heat visualization."""
+
+import base64
+import collections
+import html
+import importlib.resources
+import json
+import math
+import os
+import platform
+import site
+import sys
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Dict, List, Tuple
+
+from ._css_utils import get_combined_css
+from .stack_collector import StackTraceCollector
+
+
+# ============================================================================
+# Data Classes
+# ============================================================================
+
+@dataclass
+class FileStats:
+    """Statistics for a single profiled file."""
+    filename: str
+    module_name: str
+    module_type: str
+    total_samples: int
+    total_self_samples: int
+    num_lines: int
+    max_samples: int
+    max_self_samples: int
+    percentage: float = 0.0
+
+
+@dataclass
+class TreeNode:
+    """Node in the hierarchical file tree structure."""
+    files: List[FileStats] = field(default_factory=list)
+    samples: int = 0
+    count: int = 0
+    children: Dict[str, 'TreeNode'] = field(default_factory=dict)
+
+
+# ============================================================================
+# Module Path Analysis
+# ============================================================================
+
+def get_python_path_info():
+    """Get information about Python installation paths for module extraction.
+
+    Returns:
+        dict: Dictionary containing stdlib path, site-packages paths, and sys.path entries.
+    """
+    info = {
+        'stdlib': None,
+        'site_packages': [],
+        'sys_path': []
+    }
+
+    # Get standard library path from os module location
+    try:
+        if hasattr(os, '__file__') and os.__file__:
+            info['stdlib'] = Path(os.__file__).parent
+    except (AttributeError, OSError):
+        pass  # Silently continue if we can't determine stdlib path
+
+    # Get site-packages directories
+    site_packages = []
+    try:
+        site_packages.extend(Path(p) for p in site.getsitepackages())
+    except (AttributeError, OSError):
+        pass  # Continue without site packages if unavailable
+
+    # Get user site-packages
+    try:
+        user_site = site.getusersitepackages()
+        if user_site and Path(user_site).exists():
+            site_packages.append(Path(user_site))
+    except (AttributeError, OSError):
+        pass  # Continue without user site packages
+
+    info['site_packages'] = site_packages
+    info['sys_path'] = [Path(p) for p in sys.path if p]
+
+    return info
+
+
+def extract_module_name(filename, path_info):
+    """Extract Python module name and type from file path.
+
+    Args:
+        filename: Path to the Python file
+        path_info: Dictionary from get_python_path_info()
+
+    Returns:
+        tuple: (module_name, module_type) where module_type is one of:
+               'stdlib', 'site-packages', 'project', or 'other'
+    """
+    if not filename:
+        return ('unknown', 'other')
+
+    try:
+        file_path = Path(filename)
+    except (ValueError, OSError):
+        return (str(filename), 'other')
+
+    # Check if it's in stdlib
+    if path_info['stdlib'] and _is_subpath(file_path, path_info['stdlib']):
+        try:
+            rel_path = file_path.relative_to(path_info['stdlib'])
+            return (_path_to_module(rel_path), 'stdlib')
+        except ValueError:
+            pass
+
+    # Check site-packages
+    for site_pkg in path_info['site_packages']:
+        if _is_subpath(file_path, site_pkg):
+            try:
+                rel_path = file_path.relative_to(site_pkg)
+                return (_path_to_module(rel_path), 'site-packages')
+            except ValueError:
+                continue
+
+    # Check other sys.path entries (project files)
+    if not str(file_path).startswith(('<', '[')):  # Skip special files
+        for path_entry in path_info['sys_path']:
+            if _is_subpath(file_path, path_entry):
+                try:
+                    rel_path = file_path.relative_to(path_entry)
+                    return (_path_to_module(rel_path), 'project')
+                except ValueError:
+                    continue
+
+    # Fallback: just use the filename
+    return (_path_to_module(file_path), 'other')
+
+
+def _is_subpath(file_path, parent_path):
+    try:
+        file_path.relative_to(parent_path)
+        return True
+    except (ValueError, OSError):
+        return False
+
+
+def _path_to_module(path):
+    if isinstance(path, str):
+        path = Path(path)
+
+    # Remove .py extension
+    if path.suffix == '.py':
+        path = path.with_suffix('')
+
+    # Convert path separators to dots
+    parts = path.parts
+
+    # Handle __init__ files - they represent the package itself
+    if parts and parts[-1] == '__init__':
+        parts = parts[:-1]
+
+    return '.'.join(parts) if parts else path.stem
+
+
+# ============================================================================
+# Helper Classes
+# ============================================================================
+
+class _TemplateLoader:
+    """Loads and caches HTML/CSS/JS templates for heatmap generation."""
+
+    def __init__(self):
+        """Load all templates and assets once."""
+        self.index_template = None
+        self.file_template = None
+        self.index_css = None
+        self.index_js = None
+        self.file_css = None
+        self.file_js = None
+        self.logo_html = None
+
+        self._load_templates()
+
+    def _load_templates(self):
+        """Load all template files from _heatmap_assets."""
+        try:
+            template_dir = importlib.resources.files(__package__)
+            assets_dir = template_dir / "_heatmap_assets"
+
+            # Load HTML templates
+            self.index_template = (assets_dir / "heatmap_index_template.html").read_text(encoding="utf-8")
+            self.file_template = (assets_dir / "heatmap_pyfile_template.html").read_text(encoding="utf-8")
+
+            # Load CSS (same file used for both index and file pages)
+            css_content = get_combined_css("heatmap")
+            self.index_css = css_content
+            self.file_css = css_content
+
+            # Load JS
+            shared_js = (assets_dir / "heatmap_shared.js").read_text(encoding="utf-8")
+            self.index_js = f"{shared_js}\n{(assets_dir / 'heatmap_index.js').read_text(encoding='utf-8')}"
+            self.file_js = f"{shared_js}\n{(assets_dir / 'heatmap.js').read_text(encoding='utf-8')}"
+
+            # Load Python logo
+            logo_dir = template_dir / "_assets"
+            try:
+                png_path = logo_dir / "python-logo-only.png"
+                b64_logo = base64.b64encode(png_path.read_bytes()).decode("ascii")
+                self.logo_html = f'<img src="data:image/png;base64,{b64_logo}" alt="Python logo" class="python-logo"/>'
+            except (FileNotFoundError, IOError) as e:
+                self.logo_html = '<div class="python-logo-placeholder"></div>'
+                print(f"Warning: Could not load Python logo: {e}")
+
+        except (FileNotFoundError, IOError) as e:
+            raise RuntimeError(f"Failed to load heatmap template files: {e}") from e
+
+
+class _TreeBuilder:
+    """Builds hierarchical tree structure from file statistics."""
+
+    @staticmethod
+    def build_file_tree(file_stats: List[FileStats]) -> Dict[str, TreeNode]:
+        """Build hierarchical tree grouped by module type, then by module structure.
+
+        Args:
+            file_stats: List of FileStats objects
+
+        Returns:
+            Dictionary mapping module types to their tree roots
+        """
+        # Group by module type first
+        type_groups = {'stdlib': [], 'site-packages': [], 'project': [], 'other': []}
+        for stat in file_stats:
+            type_groups[stat.module_type].append(stat)
+
+        # Build tree for each type
+        trees = {}
+        for module_type, stats in type_groups.items():
+            if not stats:
+                continue
+
+            root_node = TreeNode()
+
+            for stat in stats:
+                module_name = stat.module_name
+                parts = module_name.split('.')
+
+                # Navigate/create tree structure
+                current_node = root_node
+                for i, part in enumerate(parts):
+                    if i == len(parts) - 1:
+                        # Last part - store the file
+                        current_node.files.append(stat)
+                    else:
+                        # Intermediate part - create or navigate
+                        if part not in current_node.children:
+                            current_node.children[part] = TreeNode()
+                        current_node = current_node.children[part]
+
+            # Calculate aggregate stats for this type's tree
+            _TreeBuilder._calculate_node_stats(root_node)
+            trees[module_type] = root_node
+
+        return trees
+
+    @staticmethod
+    def _calculate_node_stats(node: TreeNode) -> Tuple[int, int]:
+        """Recursively calculate aggregate statistics for tree nodes.
+
+        Args:
+            node: TreeNode to calculate stats for
+
+        Returns:
+            Tuple of (total_samples, file_count)
+        """
+        total_samples = 0
+        file_count = 0
+
+        # Count files at this level
+        for file_stat in node.files:
+            total_samples += file_stat.total_samples
+            file_count += 1
+
+        # Recursively process children
+        for child in node.children.values():
+            child_samples, child_count = _TreeBuilder._calculate_node_stats(child)
+            total_samples += child_samples
+            file_count += child_count
+
+        node.samples = total_samples
+        node.count = file_count
+        return total_samples, file_count
+
+
+class _HtmlRenderer:
+    """Renders hierarchical tree structures as HTML."""
+
+    def __init__(self, file_index: Dict[str, str]):
+        """Initialize renderer with file index.
+
+        Args:
+            file_index: Mapping from filenames to HTML file names
+        """
+        self.file_index = file_index
+        self.heatmap_bar_height = 16
+
+    def render_hierarchical_html(self, trees: Dict[str, TreeNode]) -> str:
+        """Build hierarchical HTML with type sections and collapsible module folders.
+
+        Args:
+            trees: Dictionary mapping module types to tree roots
+
+        Returns:
+            Complete HTML string for all sections
+        """
+        type_names = {
+            'stdlib': '📚 Standard Library',
+            'site-packages': '📦 Site Packages',
+            'project': '🏗️ Project Files',
+            'other': '📄 Other Files'
+        }
+
+        sections = []
+        for module_type in ['project', 'stdlib', 'site-packages', 'other']:
+            if module_type not in trees:
+                continue
+
+            tree = trees[module_type]
+
+            # Project starts expanded, others start collapsed
+            is_collapsed = module_type in {'stdlib', 'site-packages', 'other'}
+            icon = '▶' if is_collapsed else '▼'
+            content_style = ' style="display: none;"' if is_collapsed else ''
+
+            section_html = f'''
+<div class="type-section">
+  <div class="type-header" onclick="toggleTypeSection(this)">
+    <span class="type-icon">{icon}</span>
+    <span class="type-title">{type_names[module_type]}</span>
+    <span class="type-stats">({tree.count} files, {tree.samples:,} samples)</span>
+  </div>
+  <div class="type-content"{content_style}>
+'''
+
+            # Render root folders
+            root_folders = sorted(tree.children.items(),
+                                key=lambda x: x[1].samples, reverse=True)
+
+            for folder_name, folder_node in root_folders:
+                section_html += self._render_folder(folder_node, folder_name, level=1)
+
+            # Render root files (files not in any module)
+            if tree.files:
+                sorted_files = sorted(tree.files, key=lambda x: x.total_samples, reverse=True)
+                section_html += '    <div class="files-list">\n'
+                for stat in sorted_files:
+                    section_html += self._render_file_item(stat, indent='      ')
+                section_html += '    </div>\n'
+
+            section_html += '  </div>\n</div>\n'
+            sections.append(section_html)
+
+        return '\n'.join(sections)
+
+    def _render_folder(self, node: TreeNode, name: str, level: int = 1) -> str:
+        """Render a single folder node recursively.
+
+        Args:
+            node: TreeNode to render
+            name: Display name for the folder
+            level: Nesting level for indentation
+
+        Returns:
+            HTML string for this folder and its contents
+        """
+        indent = '  ' * level
+        parts = []
+
+        # Render folder header (collapsed by default)
+        parts.append(f'{indent}<div class="folder-node collapsed" data-level="{level}">')
+        parts.append(f'{indent}  <div class="folder-header" onclick="toggleFolder(this)">')
+        parts.append(f'{indent}    <span class="folder-icon">▶</span>')
+        parts.append(f'{indent}    <span class="folder-name">📁 {html.escape(name)}</span>')
+        parts.append(f'{indent}    <span class="folder-stats">({node.count} files, {node.samples:,} samples)</span>')
+        parts.append(f'{indent}  </div>')
+        parts.append(f'{indent}  <div class="folder-content" style="display: none;">')
+
+        # Render sub-folders sorted by sample count
+        subfolders = sorted(node.children.items(),
+                          key=lambda x: x[1].samples, reverse=True)
+
+        for subfolder_name, subfolder_node in subfolders:
+            parts.append(self._render_folder(subfolder_node, subfolder_name, level + 1))
+
+        # Render files in this folder
+        if node.files:
+            sorted_files = sorted(node.files, key=lambda x: x.total_samples, reverse=True)
+            parts.append(f'{indent}    <div class="files-list">')
+            for stat in sorted_files:
+                parts.append(self._render_file_item(stat, indent=f'{indent}      '))
+            parts.append(f'{indent}    </div>')
+
+        parts.append(f'{indent}  </div>')
+        parts.append(f'{indent}</div>')
+
+        return '\n'.join(parts)
+
+    def _render_file_item(self, stat: FileStats, indent: str = '') -> str:
+        """Render a single file item with heatmap bar.
+
+        Args:
+            stat: FileStats object
+            indent: Indentation string
+
+        Returns:
+            HTML string for file item
+        """
+        full_path = html.escape(stat.filename)
+        module_name = html.escape(stat.module_name)
+
+        intensity = stat.percentage / 100.0
+        bar_width = min(stat.percentage, 100)
+
+        html_file = self.file_index[stat.filename]
+
+        return (f'{indent}<div class="file-item">\n'
+                f'{indent}  <a href="{html_file}" class="file-link" title="{full_path}">📄 {module_name}</a>\n'
+                f'{indent}  <span class="file-samples">{stat.total_samples:,} samples</span>\n'
+                f'{indent}  <div class="heatmap-bar-container"><div class="heatmap-bar" style="width: {bar_width}px; height: {self.heatmap_bar_height}px;" data-intensity="{intensity:.3f}"></div></div>\n'
+                f'{indent}</div>\n')
+
+
+# ============================================================================
+# Main Collector Class
+# ============================================================================
+
+class HeatmapCollector(StackTraceCollector):
+    """Collector that generates coverage.py-style heatmap HTML output with line intensity.
+
+    This collector creates detailed HTML reports showing which lines of code
+    were executed most frequently during profiling, similar to coverage.py
+    but showing execution "heat" rather than just coverage.
+    """
+
+    # File naming and formatting constants
+    FILE_INDEX_FORMAT = "file_{:04d}.html"
+
+    def __init__(self, *args, **kwargs):
+        """Initialize the heatmap collector with data structures for analysis."""
+        super().__init__(*args, **kwargs)
+
+        # Sample counting data structures
+        self.line_samples = collections.Counter()
+        self.file_samples = collections.defaultdict(collections.Counter)
+        self.line_self_samples = collections.Counter()
+        self.file_self_samples = collections.defaultdict(collections.Counter)
+
+        # Call graph data structures for navigation
+        self.call_graph = collections.defaultdict(list)
+        self.callers_graph = collections.defaultdict(list)
+        self.function_definitions = {}
+
+        # Edge counting for call path analysis
+        self.edge_samples = collections.Counter()
+
+        # Statistics and metadata
+        self._total_samples = 0
+        self._path_info = get_python_path_info()
+        self.stats = {}
+
+        # Template loader (loads all templates once)
+        self._template_loader = _TemplateLoader()
+
+        # File index (populated during export)
+        self.file_index = {}
+
+    def set_stats(self, sample_interval_usec, duration_sec, sample_rate, error_rate=None, missed_samples=None, **kwargs):
+        """Set profiling statistics to include in heatmap output.
+
+        Args:
+            sample_interval_usec: Sampling interval in microseconds
+            duration_sec: Total profiling duration in seconds
+            sample_rate: Effective sampling rate
+            error_rate: Optional error rate during profiling
+            missed_samples: Optional percentage of missed samples
+            **kwargs: Additional statistics to include
+        """
+        self.stats = {
+            "sample_interval_usec": sample_interval_usec,
+            "duration_sec": duration_sec,
+            "sample_rate": sample_rate,
+            "error_rate": error_rate,
+            "missed_samples": missed_samples,
+            "python_version": sys.version,
+            "python_implementation": platform.python_implementation(),
+            "platform": platform.platform(),
+        }
+        self.stats.update(kwargs)
+
+    def process_frames(self, frames, thread_id):
+        """Process stack frames and count samples per line.
+
+        Args:
+            frames: List of frame tuples (filename, lineno, funcname)
+                    frames[0] is the leaf (top of stack, where execution is)
+            thread_id: Thread ID for this stack trace
+        """
+        self._total_samples += 1
+
+        # Count each line in the stack and build call graph
+        for i, frame_info in enumerate(frames):
+            filename, lineno, funcname = frame_info
+
+            if not self._is_valid_frame(filename, lineno):
+                continue
+
+            # frames[0] is the leaf - where execution is actually happening
+            is_leaf = (i == 0)
+            self._record_line_sample(filename, lineno, funcname, is_leaf=is_leaf)
+
+            # Build call graph for adjacent frames
+            if i + 1 < len(frames):
+                self._record_call_relationship(frames[i], frames[i + 1])
+
+    def _is_valid_frame(self, filename, lineno):
+        """Check if a frame should be included in the heatmap."""
+        # Skip internal or invalid files
+        if not filename or filename.startswith('<') or filename.startswith('['):
+            return False
+
+        # Skip invalid frames with corrupted filename data
+        if filename == "__init__" and lineno == 0:
+            return False
+
+        return True
+
+    def _record_line_sample(self, filename, lineno, funcname, is_leaf=False):
+        """Record a sample for a specific line."""
+        # Track cumulative samples (all occurrences in stack)
+        self.line_samples[(filename, lineno)] += 1
+        self.file_samples[filename][lineno] += 1
+
+        # Track self/leaf samples (only when at top of stack)
+        if is_leaf:
+            self.line_self_samples[(filename, lineno)] += 1
+            self.file_self_samples[filename][lineno] += 1
+
+        # Record function definition location
+        if funcname and (filename, funcname) not in self.function_definitions:
+            self.function_definitions[(filename, funcname)] = lineno
+
+    def _record_call_relationship(self, callee_frame, caller_frame):
+        """Record caller/callee relationship between adjacent frames."""
+        callee_filename, callee_lineno, callee_funcname = callee_frame
+        caller_filename, caller_lineno, caller_funcname = caller_frame
+
+        # Skip internal files for call graph
+        if callee_filename.startswith('<') or callee_filename.startswith('['):
+            return
+
+        # Get the callee's function definition line
+        callee_def_line = self.function_definitions.get(
+            (callee_filename, callee_funcname), callee_lineno
+        )
+
+        # Record caller -> callee relationship
+        caller_key = (caller_filename, caller_lineno)
+        callee_info = (callee_filename, callee_def_line, callee_funcname)
+        if callee_info not in self.call_graph[caller_key]:
+            self.call_graph[caller_key].append(callee_info)
+
+        # Record callee <- caller relationship
+        callee_key = (callee_filename, callee_def_line)
+        caller_info = (caller_filename, caller_lineno, caller_funcname)
+        if caller_info not in self.callers_graph[callee_key]:
+            self.callers_graph[callee_key].append(caller_info)
+
+        # Count this call edge for path analysis
+        edge_key = (caller_key, callee_key)
+        self.edge_samples[edge_key] += 1
+
+    def export(self, output_path):
+        """Export heatmap data as HTML files in a directory.
+
+        Args:
+            output_path: Path where to create the heatmap output directory
+        """
+        if not self.file_samples:
+            print("Warning: No heatmap data to export")
+            return
+
+        try:
+            output_dir = self._prepare_output_directory(output_path)
+            file_stats = self._calculate_file_stats()
+            self._create_file_index(file_stats)
+
+            # Generate individual file reports
+            self._generate_file_reports(output_dir, file_stats)
+
+            # Generate index page
+            self._generate_index_html(output_dir / 'index.html', file_stats)
+
+            self._print_export_summary(output_dir, file_stats)
+
+        except Exception as e:
+            print(f"Error: Failed to export heatmap: {e}")
+            raise
+
+    def _prepare_output_directory(self, output_path):
+        """Create output directory for heatmap files."""
+        output_dir = Path(output_path)
+        if output_dir.suffix == '.html':
+            output_dir = output_dir.with_suffix('')
+
+        try:
+            output_dir.mkdir(exist_ok=True, parents=True)
+        except (IOError, OSError) as e:
+            raise RuntimeError(f"Failed to create output directory {output_dir}: {e}") from e
+
+        return output_dir
+
+    def _create_file_index(self, file_stats: List[FileStats]):
+        """Create mapping from filenames to HTML file names."""
+        self.file_index = {
+            stat.filename: self.FILE_INDEX_FORMAT.format(i)
+            for i, stat in enumerate(file_stats)
+        }
+
+    def _generate_file_reports(self, output_dir, file_stats: List[FileStats]):
+        """Generate HTML report for each source file."""
+        for stat in file_stats:
+            file_path = output_dir / self.file_index[stat.filename]
+            line_counts = self.file_samples[stat.filename]
+            valid_line_counts = {line: count for line, count in line_counts.items() if line >= 0}
+
+            self_counts = self.file_self_samples.get(stat.filename, {})
+            valid_self_counts = {line: count for line, count in self_counts.items() if line >= 0}
+
+            self._generate_file_html(
+                file_path,
+                stat.filename,
+                valid_line_counts,
+                valid_self_counts,
+                stat
+            )
+
+    def _print_export_summary(self, output_dir, file_stats: List[FileStats]):
+        """Print summary of exported heatmap."""
+        print(f"Heatmap output written to {output_dir}/")
+        print(f"  - Index: {output_dir / 'index.html'}")
+        print(f"  - {len(file_stats)} source file(s) analyzed")
+
+    def _calculate_file_stats(self) -> List[FileStats]:
+        """Calculate statistics for each file.
+
+        Returns:
+            List of FileStats objects sorted by total samples
+        """
+        file_stats = []
+        for filename, line_counts in self.file_samples.items():
+            # Skip special frames
+            if filename in ('~', '...', '.') or filename.startswith('<') or filename.startswith('['):
+                continue
+
+            # Filter out lines with -1 (special frames)
+            valid_line_counts = {line: count for line, count in line_counts.items() if line >= 0}
+            if not valid_line_counts:
+                continue
+
+            # Get self samples for this file
+            self_line_counts = self.file_self_samples.get(filename, {})
+            valid_self_counts = {line: count for line, count in self_line_counts.items() if line >= 0}
+
+            total_samples = sum(valid_line_counts.values())
+            total_self_samples = sum(valid_self_counts.values())
+            num_lines = len(valid_line_counts)
+            max_samples = max(valid_line_counts.values())
+            max_self_samples = max(valid_self_counts.values()) if valid_self_counts else 0
+            module_name, module_type = extract_module_name(filename, self._path_info)
+
+            file_stats.append(FileStats(
+                filename=filename,
+                module_name=module_name,
+                module_type=module_type,
+                total_samples=total_samples,
+                total_self_samples=total_self_samples,
+                num_lines=num_lines,
+                max_samples=max_samples,
+                max_self_samples=max_self_samples,
+                percentage=0.0
+            ))
+
+        # Sort by total samples and calculate percentages
+        file_stats.sort(key=lambda x: x.total_samples, reverse=True)
+        if file_stats:
+            max_total = file_stats[0].total_samples
+            for stat in file_stats:
+                stat.percentage = (stat.total_samples / max_total * 100) if max_total > 0 else 0
+
+        return file_stats
+
+    def _generate_index_html(self, index_path: Path, file_stats: List[FileStats]):
+        """Generate index.html with list of all profiled files."""
+        # Build hierarchical tree
+        tree = _TreeBuilder.build_file_tree(file_stats)
+
+        # Render tree as HTML
+        renderer = _HtmlRenderer(self.file_index)
+        sections_html = renderer.render_hierarchical_html(tree)
+
+        # Format error rate and missed samples with bar classes
+        error_rate = self.stats.get('error_rate')
+        if error_rate is not None:
+            error_rate_str = f"{error_rate:.1f}%"
+            error_rate_width = min(error_rate, 100)
+            # Determine bar color class based on rate
+            if error_rate < 5:
+                error_rate_class = "good"
+            elif error_rate < 15:
+                error_rate_class = "warning"
+            else:
+                error_rate_class = "error"
+        else:
+            error_rate_str = "N/A"
+            error_rate_width = 0
+            error_rate_class = "good"
+
+        missed_samples = self.stats.get('missed_samples')
+        if missed_samples is not None:
+            missed_samples_str = f"{missed_samples:.1f}%"
+            missed_samples_width = min(missed_samples, 100)
+            if missed_samples < 5:
+                missed_samples_class = "good"
+            elif missed_samples < 15:
+                missed_samples_class = "warning"
+            else:
+                missed_samples_class = "error"
+        else:
+            missed_samples_str = "N/A"
+            missed_samples_width = 0
+            missed_samples_class = "good"
+
+        # Populate template
+        replacements = {
+            "<!-- INLINE_CSS -->": f"<style>\n{self._template_loader.index_css}\n</style>",
+            "<!-- INLINE_JS -->": f"<script>\n{self._template_loader.index_js}\n</script>",
+            "<!-- PYTHON_LOGO -->": self._template_loader.logo_html,
+            "<!-- NUM_FILES -->": str(len(file_stats)),
+            "<!-- TOTAL_SAMPLES -->": f"{self._total_samples:,}",
+            "<!-- DURATION -->": f"{self.stats.get('duration_sec', 0):.1f}s",
+            "<!-- SAMPLE_RATE -->": f"{self.stats.get('sample_rate', 0):.1f}",
+            "<!-- ERROR_RATE -->": error_rate_str,
+            "<!-- ERROR_RATE_WIDTH -->": str(error_rate_width),
+            "<!-- ERROR_RATE_CLASS -->": error_rate_class,
+            "<!-- MISSED_SAMPLES -->": missed_samples_str,
+            "<!-- MISSED_SAMPLES_WIDTH -->": str(missed_samples_width),
+            "<!-- MISSED_SAMPLES_CLASS -->": missed_samples_class,
+            "<!-- SECTIONS_HTML -->": sections_html,
+        }
+
+        html_content = self._template_loader.index_template
+        for placeholder, value in replacements.items():
+            html_content = html_content.replace(placeholder, value)
+
+        try:
+            index_path.write_text(html_content, encoding='utf-8')
+        except (IOError, OSError) as e:
+            raise RuntimeError(f"Failed to write index file {index_path}: {e}") from e
+
+    def _generate_file_html(self, output_path: Path, filename: str,
+                          line_counts: Dict[int, int], self_counts: Dict[int, int],
+                          file_stat: FileStats):
+        """Generate HTML for a single source file with heatmap coloring."""
+        # Read source file
+        try:
+            source_lines = Path(filename).read_text(encoding='utf-8', errors='replace').splitlines()
+        except (IOError, OSError) as e:
+            if not (filename.startswith('<') or filename.startswith('[') or
+                    filename in ('~', '...', '.') or len(filename) < 2):
+                print(f"Warning: Could not read source file {filename}: {e}")
+            source_lines = [f"# Source file not available: {filename}"]
+
+        # Generate HTML for each line
+        max_samples = max(line_counts.values()) if line_counts else 1
+        max_self_samples = max(self_counts.values()) if self_counts else 1
+        code_lines_html = [
+            self._build_line_html(line_num, line_content, line_counts, self_counts,
+                                max_samples, max_self_samples, filename)
+            for line_num, line_content in enumerate(source_lines, start=1)
+        ]
+
+        # Populate template
+        replacements = {
+            "<!-- FILENAME -->": html.escape(filename),
+            "<!-- TOTAL_SAMPLES -->": f"{file_stat.total_samples:,}",
+            "<!-- TOTAL_SELF_SAMPLES -->": f"{file_stat.total_self_samples:,}",
+            "<!-- NUM_LINES -->": str(file_stat.num_lines),
+            "<!-- PERCENTAGE -->": f"{file_stat.percentage:.2f}",
+            "<!-- MAX_SAMPLES -->": str(file_stat.max_samples),
+            "<!-- MAX_SELF_SAMPLES -->": str(file_stat.max_self_samples),
+            "<!-- CODE_LINES -->": ''.join(code_lines_html),
+            "<!-- INLINE_CSS -->": f"<style>\n{self._template_loader.file_css}\n</style>",
+            "<!-- INLINE_JS -->": f"<script>\n{self._template_loader.file_js}\n</script>",
+        }
+
+        html_content = self._template_loader.file_template
+        for placeholder, value in replacements.items():
+            html_content = html_content.replace(placeholder, value)
+
+        try:
+            output_path.write_text(html_content, encoding='utf-8')
+        except (IOError, OSError) as e:
+            raise RuntimeError(f"Failed to write file {output_path}: {e}") from e
+
+    def _build_line_html(self, line_num: int, line_content: str,
+                        line_counts: Dict[int, int], self_counts: Dict[int, int],
+                        max_samples: int, max_self_samples: int, filename: str) -> str:
+        """Build HTML for a single line of source code."""
+        cumulative_samples = line_counts.get(line_num, 0)
+        self_samples = self_counts.get(line_num, 0)
+
+        # Calculate colors for both self and cumulative modes
+        if cumulative_samples > 0:
+            log_cumulative = math.log(cumulative_samples + 1)
+            log_max = math.log(max_samples + 1)
+            cumulative_intensity = log_cumulative / log_max if log_max > 0 else 0
+
+            if self_samples > 0 and max_self_samples > 0:
+                log_self = math.log(self_samples + 1)
+                log_max_self = math.log(max_self_samples + 1)
+                self_intensity = log_self / log_max_self if log_max_self > 0 else 0
+            else:
+                self_intensity = 0
+
+            self_display = f"{self_samples:,}" if self_samples > 0 else ""
+            cumulative_display = f"{cumulative_samples:,}"
+            tooltip = f"Self: {self_samples:,}, Total: {cumulative_samples:,}"
+        else:
+            cumulative_intensity = 0
+            self_intensity = 0
+            self_display = ""
+            cumulative_display = ""
+            tooltip = ""
+
+        # Get navigation buttons
+        nav_buttons_html = self._build_navigation_buttons(filename, line_num)
+
+        # Build line HTML with intensity data attributes
+        line_html = html.escape(line_content.rstrip('\n'))
+        title_attr = f' title="{html.escape(tooltip)}"' if tooltip else ""
+
+        return (
+            f'        <div class="code-line" '
+            f'data-self-intensity="{self_intensity:.3f}" '
+            f'data-cumulative-intensity="{cumulative_intensity:.3f}" '
+            f'id="line-{line_num}"{title_attr}>\n'
+            f'            <div class="line-number">{line_num}</div>\n'
+            f'            <div class="line-samples-self">{self_display}</div>\n'
+            f'            <div class="line-samples-cumulative">{cumulative_display}</div>\n'
+            f'            <div class="line-content">{line_html}</div>\n'
+            f'            {nav_buttons_html}\n'
+            f'        </div>\n'
+        )
+
+    def _build_navigation_buttons(self, filename: str, line_num: int) -> str:
+        """Build navigation buttons for callers/callees."""
+        line_key = (filename, line_num)
+        caller_list = self._deduplicate_by_function(self.callers_graph.get(line_key, []))
+        callee_list = self._deduplicate_by_function(self.call_graph.get(line_key, []))
+
+        # Get edge counts for each caller/callee
+        callers_with_counts = self._get_edge_counts(line_key, caller_list, is_caller=True)
+        callees_with_counts = self._get_edge_counts(line_key, callee_list, is_caller=False)
+
+        # Build navigation buttons with counts
+        caller_btn = self._create_navigation_button(callers_with_counts, 'caller', '▲')
+        callee_btn = self._create_navigation_button(callees_with_counts, 'callee', '▼')
+
+        if caller_btn or callee_btn:
+            return f'<div class="line-nav-buttons">{caller_btn}{callee_btn}</div>'
+        return ''
+
+    def _get_edge_counts(self, line_key: Tuple[str, int],
+                        items: List[Tuple[str, int, str]],
+                        is_caller: bool) -> List[Tuple[str, int, str, int]]:
+        """Get sample counts for each caller/callee edge."""
+        result = []
+        for file, line, func in items:
+            edge_line_key = (file, line)
+            if is_caller:
+                edge_key = (edge_line_key, line_key)
+            else:
+                edge_key = (line_key, edge_line_key)
+
+            count = self.edge_samples.get(edge_key, 0)
+            result.append((file, line, func, count))
+
+        result.sort(key=lambda x: x[3], reverse=True)
+        return result
+
+    def _deduplicate_by_function(self, items: List[Tuple[str, int, str]]) -> List[Tuple[str, int, str]]:
+        """Remove duplicate entries based on (file, function) key."""
+        seen = {}
+        result = []
+        for file, line, func in items:
+            key = (file, func)
+            if key not in seen:
+                seen[key] = True
+                result.append((file, line, func))
+        return result
+
+    def _create_navigation_button(self, items_with_counts: List[Tuple[str, int, str, int]],
+                                 btn_class: str, arrow: str) -> str:
+        """Create HTML for a navigation button with sample counts."""
+        # Filter valid items
+        valid_items = [(f, l, fn, cnt) for f, l, fn, cnt in items_with_counts
+                      if f in self.file_index and l > 0]
+        if not valid_items:
+            return ""
+
+        if len(valid_items) == 1:
+            file, line, func, count = valid_items[0]
+            target_html = self.file_index[file]
+            nav_data = json.dumps({'link': f"{target_html}#line-{line}", 'func': func})
+            title = f"Go to {btn_class}: {html.escape(func)} ({count:,} samples)"
+            return f'<button class="nav-btn {btn_class}" data-nav=\'{html.escape(nav_data)}\' title="{title}">{arrow}</button>'
+
+        # Multiple items - create menu
+        total_samples = sum(cnt for _, _, _, cnt in valid_items)
+        items_data = [
+            {
+                'file': os.path.basename(file),
+                'func': func,
+                'count': count,
+                'link': f"{self.file_index[file]}#line-{line}"
+            }
+            for file, line, func, count in valid_items
+        ]
+        items_json = html.escape(json.dumps(items_data))
+        title = f"{len(items_data)} {btn_class}s ({total_samples:,} samples)"
+        return f'<button class="nav-btn {btn_class}" data-nav-multi=\'{items_json}\' title="{title}">{arrow}</button>'
diff --git a/Lib/profiling/sampling/live_collector/__init__.py b/Lib/profiling/sampling/live_collector/__init__.py
new file mode 100644
index 00000000000..175e4610d23
--- /dev/null
+++ b/Lib/profiling/sampling/live_collector/__init__.py
@@ -0,0 +1,200 @@
+"""Live profiling collector that displays top-like statistics using curses.
+
+                    ┌─────────────────────────────┐
+                    │   Target Python Process     │
+                    │   (being profiled)          │
+                    └──────────────┬──────────────┘
+                                   │ Stack sampling at
+                                   │ configured interval
+                                   │ (e.g., 10000µs)
+                                   ▼
+                    ┌─────────────────────────────┐
+                    │  LiveStatsCollector         │
+                    │  ┌───────────────────────┐  │
+                    │  │ collect()             │  │ Aggregates samples
+                    │  │ - Iterates frames     │  │ into statistics
+                    │  │ - Updates counters    │  │
+                    │  └───────────┬───────────┘  │
+                    │              │              │
+                    │              ▼              │
+                    │  ┌───────────────────────┐  │
+                    │  │ Data Storage          │  │
+                    │  │ - result dict         │  │ Tracks per-function:
+                    │  │ - direct_calls        │  │ • Direct samples
+                    │  │ - cumulative_calls    │  │ • Cumulative samples
+                    │  └───────────┬───────────┘  │ • Derived time stats
+                    │              │              │
+                    │              ▼              │
+                    │  ┌───────────────────────┐  │
+                    │  │ Display Update        │  │
+                    │  │ (10Hz by default)     │  │ Rate-limited refresh
+                    │  └───────────┬───────────┘  │
+                    └──────────────┼──────────────┘
+                                   │
+                                   ▼
+                    ┌─────────────────────────────┐
+                    │   DisplayInterface          │
+                    │   (Abstract layer)          │
+                    └──────────────┬──────────────┘
+                           ┌───────┴────────┐
+                           │                │
+                ┌──────────▼────────┐  ┌───▼──────────┐
+                │ CursesDisplay     │  │ MockDisplay  │
+                │ - Real terminal   │  │ - Testing    │
+                │ - ncurses backend │  │ - No UI      │
+                └─────────┬─────────┘  └──────────────┘
+                          │
+                          ▼
+                ┌─────────────────────────────────────┐
+                │      Widget-Based Rendering         │
+                │ ┌─────────────────────────────────┐ │
+                │ │  HeaderWidget                   │ │
+                │ │  • PID, uptime, time, interval  │ │
+                │ │  • Sample stats & progress bar  │ │
+                │ │  • Efficiency bar               │ │
+                │ │  • Thread status & GC stats     │ │
+                │ │  • Function summary             │ │
+                │ │  • Top 3 hottest functions      │ │
+                │ ├─────────────────────────────────┤ │
+                │ │  TableWidget                    │ │
+                │ │  • Column headers (sortable)    │ │ Interactive display
+                │ │  • Stats rows (scrolling)       │ │ with keyboard controls:
+                │ │    - nsamples  %  time          │ │ s: sort, p: pause
+                │ │    - function  file:line        │ │ r: reset, /: filter
+                │ ├─────────────────────────────────┤ │ q: quit, h: help
+                │ │  FooterWidget                   │ │
+                │ │  • Legend and status            │ │
+                │ │  • Filter input prompt          │ │
+                │ └─────────────────────────────────┘ │
+                └─────────────────────────────────────┘
+
+Architecture:
+
+The live collector is organized into four layers. The data collection layer
+(LiveStatsCollector) aggregates stack samples into per-function statistics without
+any knowledge of how they will be presented. The display abstraction layer
+(DisplayInterface) defines rendering operations without coupling to curses or any
+specific UI framework. The widget layer (Widget, HeaderWidget, TableWidget,
+FooterWidget, HelpWidget, ProgressBarWidget) encapsulates individual UI components
+with their own rendering logic, promoting modularity and reusability. The
+presentation layer (CursesDisplay/MockDisplay) implements the actual rendering for
+terminal output and testing.
+
+The system runs two independent update loops. The sampling loop is driven by the
+profiler at the configured interval (e.g., 10000µs) and continuously collects
+stack frames and updates statistics. The display loop runs at a fixed refresh rate
+(default 10Hz) and updates the terminal independently of sampling frequency. This
+separation allows high-frequency sampling without overwhelming the terminal with
+constant redraws.
+
+Statistics are computed incrementally as samples arrive. The collector maintains
+running counters (direct calls and cumulative calls) in a dictionary keyed by
+function location. Derived metrics like time estimates and percentages are computed
+on-demand during display updates rather than being stored, which minimizes memory
+overhead as the number of tracked functions grows.
+
+User input is processed asynchronously during display updates using non-blocking I/O.
+This allows interactive controls (sorting, filtering, pausing) without interrupting
+the data collection pipeline. The collector maintains mode flags (paused,
+filter_input_mode) that affect what gets displayed but not what gets collected.
+
+"""
+
+# Re-export all public classes and constants for backward compatibility
+from .collector import LiveStatsCollector
+from .display import DisplayInterface, CursesDisplay, MockDisplay
+from .widgets import (
+    Widget,
+    ProgressBarWidget,
+    HeaderWidget,
+    TableWidget,
+    FooterWidget,
+    HelpWidget,
+)
+from .constants import (
+    MICROSECONDS_PER_SECOND,
+    DISPLAY_UPDATE_HZ,
+    DISPLAY_UPDATE_INTERVAL,
+    MIN_TERMINAL_WIDTH,
+    MIN_TERMINAL_HEIGHT,
+    WIDTH_THRESHOLD_SAMPLE_PCT,
+    WIDTH_THRESHOLD_TOTTIME,
+    WIDTH_THRESHOLD_CUMUL_PCT,
+    WIDTH_THRESHOLD_CUMTIME,
+    HEADER_LINES,
+    FOOTER_LINES,
+    SAFETY_MARGIN,
+    TOP_FUNCTIONS_DISPLAY_COUNT,
+    COL_WIDTH_NSAMPLES,
+    COL_SPACING,
+    COL_WIDTH_SAMPLE_PCT,
+    COL_WIDTH_TIME,
+    MIN_FUNC_NAME_WIDTH,
+    MAX_FUNC_NAME_WIDTH,
+    MIN_AVAILABLE_SPACE,
+    MIN_BAR_WIDTH,
+    MAX_SAMPLE_RATE_BAR_WIDTH,
+    MAX_EFFICIENCY_BAR_WIDTH,
+    MIN_SAMPLE_RATE_FOR_SCALING,
+    FINISHED_BANNER_EXTRA_LINES,
+    COLOR_PAIR_HEADER_BG,
+    COLOR_PAIR_CYAN,
+    COLOR_PAIR_YELLOW,
+    COLOR_PAIR_GREEN,
+    COLOR_PAIR_MAGENTA,
+    COLOR_PAIR_RED,
+    COLOR_PAIR_SORTED_HEADER,
+    DEFAULT_SORT_BY,
+    DEFAULT_DISPLAY_LIMIT,
+)
+
+__all__ = [
+    # Main collector
+    "LiveStatsCollector",
+    # Display interfaces
+    "DisplayInterface",
+    "CursesDisplay",
+    "MockDisplay",
+    # Widgets
+    "Widget",
+    "ProgressBarWidget",
+    "HeaderWidget",
+    "TableWidget",
+    "FooterWidget",
+    "HelpWidget",
+    # Constants
+    "MICROSECONDS_PER_SECOND",
+    "DISPLAY_UPDATE_HZ",
+    "DISPLAY_UPDATE_INTERVAL",
+    "MIN_TERMINAL_WIDTH",
+    "MIN_TERMINAL_HEIGHT",
+    "WIDTH_THRESHOLD_SAMPLE_PCT",
+    "WIDTH_THRESHOLD_TOTTIME",
+    "WIDTH_THRESHOLD_CUMUL_PCT",
+    "WIDTH_THRESHOLD_CUMTIME",
+    "HEADER_LINES",
+    "FOOTER_LINES",
+    "SAFETY_MARGIN",
+    "TOP_FUNCTIONS_DISPLAY_COUNT",
+    "COL_WIDTH_NSAMPLES",
+    "COL_SPACING",
+    "COL_WIDTH_SAMPLE_PCT",
+    "COL_WIDTH_TIME",
+    "MIN_FUNC_NAME_WIDTH",
+    "MAX_FUNC_NAME_WIDTH",
+    "MIN_AVAILABLE_SPACE",
+    "MIN_BAR_WIDTH",
+    "MAX_SAMPLE_RATE_BAR_WIDTH",
+    "MAX_EFFICIENCY_BAR_WIDTH",
+    "MIN_SAMPLE_RATE_FOR_SCALING",
+    "FINISHED_BANNER_EXTRA_LINES",
+    "COLOR_PAIR_HEADER_BG",
+    "COLOR_PAIR_CYAN",
+    "COLOR_PAIR_YELLOW",
+    "COLOR_PAIR_GREEN",
+    "COLOR_PAIR_MAGENTA",
+    "COLOR_PAIR_RED",
+    "COLOR_PAIR_SORTED_HEADER",
+    "DEFAULT_SORT_BY",
+    "DEFAULT_DISPLAY_LIMIT",
+]
diff --git a/Lib/profiling/sampling/live_collector/collector.py b/Lib/profiling/sampling/live_collector/collector.py
new file mode 100644
index 00000000000..5edb02e6e88
--- /dev/null
+++ b/Lib/profiling/sampling/live_collector/collector.py
@@ -0,0 +1,993 @@
+"""LiveStatsCollector - Main collector class for live profiling."""
+
+import collections
+import contextlib
+import curses
+from dataclasses import dataclass, field
+import os
+import site
+import sys
+import sysconfig
+import time
+import _colorize
+
+from ..collector import Collector
+from ..constants import (
+    THREAD_STATUS_HAS_GIL,
+    THREAD_STATUS_ON_CPU,
+    THREAD_STATUS_UNKNOWN,
+    THREAD_STATUS_GIL_REQUESTED,
+    PROFILING_MODE_CPU,
+    PROFILING_MODE_GIL,
+    PROFILING_MODE_WALL,
+)
+from .constants import (
+    MICROSECONDS_PER_SECOND,
+    DISPLAY_UPDATE_INTERVAL,
+    MIN_TERMINAL_WIDTH,
+    MIN_TERMINAL_HEIGHT,
+    HEADER_LINES,
+    FOOTER_LINES,
+    SAFETY_MARGIN,
+    FINISHED_BANNER_EXTRA_LINES,
+    DEFAULT_SORT_BY,
+    DEFAULT_DISPLAY_LIMIT,
+    COLOR_PAIR_HEADER_BG,
+    COLOR_PAIR_CYAN,
+    COLOR_PAIR_YELLOW,
+    COLOR_PAIR_GREEN,
+    COLOR_PAIR_MAGENTA,
+    COLOR_PAIR_RED,
+    COLOR_PAIR_SORTED_HEADER,
+)
+from .display import CursesDisplay
+from .widgets import HeaderWidget, TableWidget, FooterWidget, HelpWidget
+from .trend_tracker import TrendTracker
+
+
+@dataclass
+class ThreadData:
+    """Encapsulates all profiling data for a single thread."""
+
+    thread_id: int
+
+    # Function call statistics: {location: {direct_calls: int, cumulative_calls: int}}
+    result: dict = field(default_factory=lambda: collections.defaultdict(
+        lambda: dict(direct_calls=0, cumulative_calls=0)
+    ))
+
+    # Thread status statistics
+    has_gil: int = 0
+    on_cpu: int = 0
+    gil_requested: int = 0
+    unknown: int = 0
+    total: int = 0  # Total status samples for this thread
+
+    # Sample counts
+    sample_count: int = 0
+    gc_frame_samples: int = 0
+
+    def increment_status_flag(self, status_flags):
+        """Update status counts based on status bit flags."""
+        if status_flags & THREAD_STATUS_HAS_GIL:
+            self.has_gil += 1
+        if status_flags & THREAD_STATUS_ON_CPU:
+            self.on_cpu += 1
+        if status_flags & THREAD_STATUS_GIL_REQUESTED:
+            self.gil_requested += 1
+        if status_flags & THREAD_STATUS_UNKNOWN:
+            self.unknown += 1
+        self.total += 1
+
+    def as_status_dict(self):
+        """Return status counts as a dict for compatibility."""
+        return {
+            "has_gil": self.has_gil,
+            "on_cpu": self.on_cpu,
+            "gil_requested": self.gil_requested,
+            "unknown": self.unknown,
+            "total": self.total,
+        }
+
+
+class LiveStatsCollector(Collector):
+    """Collector that displays live top-like statistics using ncurses."""
+
+    def __init__(
+        self,
+        sample_interval_usec,
+        *,
+        skip_idle=False,
+        sort_by=DEFAULT_SORT_BY,
+        limit=DEFAULT_DISPLAY_LIMIT,
+        pid=None,
+        display=None,
+        mode=None,
+        async_aware=None,
+    ):
+        """
+        Initialize the live stats collector.
+
+        Args:
+            sample_interval_usec: Sampling interval in microseconds
+            skip_idle: Whether to skip idle threads
+            sort_by: Sort key ('tottime', 'nsamples', 'cumtime', 'sample_pct', 'cumul_pct')
+            limit: Maximum number of functions to display
+            pid: Process ID being profiled
+            display: DisplayInterface implementation (None means curses will be used)
+            mode: Profiling mode ('cpu', 'gil', etc.) - affects what stats are shown
+            async_aware: Async tracing mode - None (sync only), "all" or "running"
+        """
+        self.result = collections.defaultdict(
+            lambda: dict(total_rec_calls=0, direct_calls=0, cumulative_calls=0)
+        )
+        self.sample_interval_usec = sample_interval_usec
+        self.sample_interval_sec = (
+            sample_interval_usec / MICROSECONDS_PER_SECOND
+        )
+        self.skip_idle = skip_idle
+        self.sort_by = sort_by
+        self.limit = limit
+        self.total_samples = 0
+        self.start_time = None
+        self.stdscr = None
+        self.display = display  # DisplayInterface implementation
+        self.running = True
+        self.pid = pid
+        self.mode = mode  # Profiling mode
+        self.async_aware = async_aware  # Async tracing mode
+        # Pre-select frame iterator method to avoid per-call dispatch overhead
+        self._get_frame_iterator = self._get_async_frame_iterator if async_aware else self._get_sync_frame_iterator
+        self._saved_stdout = None
+        self._saved_stderr = None
+        self._devnull = None
+        self._last_display_update = None
+        self.max_sample_rate = 0  # Track maximum sample rate seen
+        self.successful_samples = 0  # Track samples that captured frames
+        self.failed_samples = 0  # Track samples that failed to capture frames
+        self.display_update_interval = DISPLAY_UPDATE_INTERVAL  # Instance variable for display refresh rate
+
+        # Thread status statistics (bit flags)
+        self.thread_status_counts = {
+            "has_gil": 0,
+            "on_cpu": 0,
+            "gil_requested": 0,
+            "unknown": 0,
+            "total": 0,  # Total thread count across all samples
+        }
+        self.gc_frame_samples = 0  # Track samples with GC frames
+
+        # Interactive controls state
+        self.paused = False  # Pause UI updates (profiling continues)
+        self.show_help = False  # Show help screen
+        self.filter_pattern = None  # Glob pattern to filter functions
+        self.filter_input_mode = False  # Currently entering filter text
+        self.filter_input_buffer = ""  # Buffer for filter input
+        self.finished = False  # Program has finished, showing final state
+        self.finish_timestamp = None  # When profiling finished (for time freezing)
+        self.finish_wall_time = None  # Wall clock time when profiling finished
+
+        # Thread tracking state
+        self.thread_ids = []  # List of thread IDs seen
+        self.view_mode = "ALL"  # "ALL" or "PER_THREAD"
+        self.current_thread_index = (
+            0  # Index into thread_ids when in PER_THREAD mode
+        )
+        self.per_thread_data = {}  # {thread_id: ThreadData}
+
+        # Calculate common path prefixes to strip
+        self._path_prefixes = self._get_common_path_prefixes()
+
+        # Widgets (initialized when display is available)
+        self.header_widget = None
+        self.table_widget = None
+        self.footer_widget = None
+        self.help_widget = None
+
+        # Color mode
+        self._can_colorize = _colorize.can_colorize()
+
+        # Trend tracking (initialized after colors are set up)
+        self._trend_tracker = None
+
+    @property
+    def elapsed_time(self):
+        """Get the elapsed time, frozen when finished."""
+        if self.finished and self.finish_timestamp is not None:
+            return self.finish_timestamp - self.start_time
+        return time.perf_counter() - self.start_time if self.start_time else 0
+
+    @property
+    def current_time_display(self):
+        """Get the current time for display, frozen when finished."""
+        if self.finished and self.finish_wall_time is not None:
+            return time.strftime("%H:%M:%S", time.localtime(self.finish_wall_time))
+        return time.strftime("%H:%M:%S")
+
+    def _get_or_create_thread_data(self, thread_id):
+        """Get or create ThreadData for a thread ID."""
+        if thread_id not in self.per_thread_data:
+            self.per_thread_data[thread_id] = ThreadData(thread_id=thread_id)
+        return self.per_thread_data[thread_id]
+
+    def _get_current_thread_data(self):
+        """Get ThreadData for currently selected thread in PER_THREAD mode."""
+        if self.view_mode == "PER_THREAD" and self.current_thread_index < len(self.thread_ids):
+            thread_id = self.thread_ids[self.current_thread_index]
+            return self.per_thread_data.get(thread_id)
+        return None
+
+    def _get_current_result_source(self):
+        """Get result dict for current view mode (aggregated or per-thread)."""
+        if self.view_mode == "ALL":
+            return self.result
+        thread_data = self._get_current_thread_data()
+        return thread_data.result if thread_data else {}
+
+    def _get_common_path_prefixes(self):
+        """Get common path prefixes to strip from file paths."""
+        prefixes = []
+
+        # Get the actual stdlib location from the os module
+        # This works for both installed Python and development builds
+        os_module_file = os.__file__
+        if os_module_file:
+            # os.__file__ points to os.py, get its directory
+            stdlib_dir = os.path.dirname(os.path.abspath(os_module_file))
+            prefixes.append(stdlib_dir)
+
+        # Get stdlib location from sysconfig (may be different or same)
+        stdlib_path = sysconfig.get_path("stdlib")
+        if stdlib_path:
+            prefixes.append(stdlib_path)
+
+        # Get platstdlib location (platform-specific stdlib)
+        platstdlib_path = sysconfig.get_path("platstdlib")
+        if platstdlib_path:
+            prefixes.append(platstdlib_path)
+
+        # Get site-packages locations
+        for site_path in site.getsitepackages():
+            prefixes.append(site_path)
+
+        # Also check user site-packages
+        user_site = site.getusersitepackages()
+        if user_site:
+            prefixes.append(user_site)
+
+        # Remove duplicates and sort by length (longest first) to match most specific paths first
+        prefixes = list(set(prefixes))
+        prefixes.sort(key=lambda x: len(x), reverse=True)
+
+        return prefixes
+
+    def simplify_path(self, filepath):
+        """Simplify a file path by removing common prefixes."""
+        # Try to match against known prefixes
+        for prefix_path in self._path_prefixes:
+            if filepath.startswith(prefix_path):
+                # Remove the prefix completely
+                relative = filepath[len(prefix_path) :].lstrip(os.sep)
+                return relative
+
+        # If no match, return the original path
+        return filepath
+
+    def process_frames(self, frames, thread_id=None):
+        """Process a single thread's frame stack.
+
+        Args:
+            frames: List of frame information
+            thread_id: Thread ID for per-thread tracking (optional)
+        """
+        if not frames:
+            return
+
+        # Get per-thread data if tracking per-thread
+        thread_data = self._get_or_create_thread_data(thread_id) if thread_id is not None else None
+
+        # Process each frame in the stack to track cumulative calls
+        for frame in frames:
+            location = (frame.filename, frame.lineno, frame.funcname)
+            self.result[location]["cumulative_calls"] += 1
+            if thread_data:
+                thread_data.result[location]["cumulative_calls"] += 1
+
+        # The top frame gets counted as an inline call (directly executing)
+        top_location = (frames[0].filename, frames[0].lineno, frames[0].funcname)
+        self.result[top_location]["direct_calls"] += 1
+        if thread_data:
+            thread_data.result[top_location]["direct_calls"] += 1
+
+    def _get_sync_frame_iterator(self, stack_frames):
+        """Iterator for sync frames."""
+        return self._iter_all_frames(stack_frames, skip_idle=self.skip_idle)
+
+    def _get_async_frame_iterator(self, stack_frames):
+        """Iterator for async frames, yielding (frames, thread_id) tuples."""
+        for frames, thread_id, task_id in self._iter_async_frames(stack_frames):
+            yield frames, thread_id
+
+    def collect_failed_sample(self):
+        self.failed_samples += 1
+        self.total_samples += 1
+
+    def collect(self, stack_frames):
+        """Collect and display profiling data."""
+        if self.start_time is None:
+            self.start_time = time.perf_counter()
+            self._last_display_update = self.start_time
+
+        has_gc_frame = False
+
+        # Collect thread status stats (only available in sync mode)
+        if not self.async_aware:
+            status_counts, sample_has_gc, per_thread_stats = self._collect_thread_status_stats(stack_frames)
+            for key, count in status_counts.items():
+                self.thread_status_counts[key] += count
+            if sample_has_gc:
+                has_gc_frame = True
+
+            for thread_id, stats in per_thread_stats.items():
+                thread_data = self._get_or_create_thread_data(thread_id)
+                thread_data.has_gil += stats.get("has_gil", 0)
+                thread_data.on_cpu += stats.get("on_cpu", 0)
+                thread_data.gil_requested += stats.get("gil_requested", 0)
+                thread_data.unknown += stats.get("unknown", 0)
+                thread_data.total += stats.get("total", 0)
+                if stats.get("gc_samples", 0):
+                    thread_data.gc_frame_samples += stats["gc_samples"]
+
+        # Process frames using pre-selected iterator
+        for frames, thread_id in self._get_frame_iterator(stack_frames):
+            if not frames:
+                continue
+
+            self.process_frames(frames, thread_id=thread_id)
+
+            # Track thread IDs
+            if thread_id is not None and thread_id not in self.thread_ids:
+                self.thread_ids.append(thread_id)
+
+            if thread_id is not None:
+                thread_data = self._get_or_create_thread_data(thread_id)
+                thread_data.sample_count += 1
+
+        if has_gc_frame:
+            self.gc_frame_samples += 1
+
+        self.successful_samples += 1
+        self.total_samples += 1
+
+        # Handle input on every sample for instant responsiveness
+        if self.display is not None:
+            self._handle_input()
+
+        # Update display at configured rate if display is initialized and not paused
+        if self.display is not None and not self.paused:
+            current_time = time.perf_counter()
+            if (
+                self._last_display_update is None
+                or (current_time - self._last_display_update)
+                >= self.display_update_interval
+            ):
+                self._update_display()
+                self._last_display_update = current_time
+
+    def _prepare_display_data(self, height):
+        """Prepare data for display rendering."""
+        elapsed = self.elapsed_time
+        stats_list = self.build_stats_list()
+
+        # Calculate available space for stats
+        # Add extra lines for finished banner when in finished state
+        extra_header_lines = (
+            FINISHED_BANNER_EXTRA_LINES if self.finished else 0
+        )
+        max_stats_lines = max(
+            0,
+            height
+            - HEADER_LINES
+            - extra_header_lines
+            - FOOTER_LINES
+            - SAFETY_MARGIN,
+        )
+        stats_list = stats_list[:max_stats_lines]
+
+        return elapsed, stats_list
+
+    def _initialize_widgets(self, colors):
+        """Initialize widgets with display and colors."""
+        if self.header_widget is None:
+            # Initialize trend tracker with colors
+            if self._trend_tracker is None:
+                self._trend_tracker = TrendTracker(colors, enabled=True)
+
+            self.header_widget = HeaderWidget(self.display, colors, self)
+            self.table_widget = TableWidget(self.display, colors, self)
+            self.footer_widget = FooterWidget(self.display, colors, self)
+            self.help_widget = HelpWidget(self.display, colors)
+
+    def _render_display_sections(
+        self, height, width, elapsed, stats_list, colors
+    ):
+        """Render all display sections to the screen."""
+        line = 0
+        try:
+            # Initialize widgets if not already done
+            self._initialize_widgets(colors)
+
+            # Render header
+            line = self.header_widget.render(
+                line, width, elapsed=elapsed, stats_list=stats_list
+            )
+
+            # Render table
+            line = self.table_widget.render(
+                line, width, height=height, stats_list=stats_list
+            )
+
+        except curses.error:
+            pass
+
+    def _update_display(self):
+        """Update the display with current stats."""
+        try:
+            # Clear screen and get dimensions
+            self.display.clear()
+            height, width = self.display.get_dimensions()
+
+            # Check terminal size
+            if width < MIN_TERMINAL_WIDTH or height < MIN_TERMINAL_HEIGHT:
+                self._show_terminal_too_small(height, width)
+                self.display.refresh()
+                return
+
+            # Setup colors and initialize widgets (needed for both help and normal display)
+            colors = self._setup_colors()
+            self._initialize_widgets(colors)
+
+            # Show help screen if requested
+            if self.show_help:
+                self.help_widget.render(0, width, height=height)
+                self.display.refresh()
+                return
+
+            # Prepare data
+            elapsed, stats_list = self._prepare_display_data(height)
+
+            # Render all sections
+            self._render_display_sections(
+                height, width, elapsed, stats_list, colors
+            )
+
+            # Footer
+            self.footer_widget.render(height - 2, width)
+
+            # Show filter input prompt if in filter input mode
+            if self.filter_input_mode:
+                self.footer_widget.render_filter_input_prompt(
+                    height - 1, width
+                )
+
+            # Refresh display
+            self.display.redraw()
+            self.display.refresh()
+
+        except Exception:
+            pass
+
+    def _cycle_sort(self, reverse=False):
+        """Cycle through different sort modes in column order.
+
+        Args:
+            reverse: If True, cycle backwards (right to left), otherwise forward (left to right)
+        """
+        sort_modes = [
+            "nsamples",
+            "sample_pct",
+            "tottime",
+            "cumul_pct",
+            "cumtime",
+        ]
+        try:
+            current_idx = sort_modes.index(self.sort_by)
+            if reverse:
+                self.sort_by = sort_modes[(current_idx - 1) % len(sort_modes)]
+            else:
+                self.sort_by = sort_modes[(current_idx + 1) % len(sort_modes)]
+        except ValueError:
+            self.sort_by = "nsamples"
+
+    def _setup_colors(self):
+        """Set up color pairs and return color attributes."""
+
+        A_BOLD = self.display.get_attr("A_BOLD")
+        A_REVERSE = self.display.get_attr("A_REVERSE")
+        A_UNDERLINE = self.display.get_attr("A_UNDERLINE")
+        A_NORMAL = self.display.get_attr("A_NORMAL")
+
+        # Check both curses color support and _colorize.can_colorize()
+        if self.display.has_colors() and self._can_colorize:
+            with contextlib.suppress(Exception):
+                # Color constants (using curses values for compatibility)
+                COLOR_CYAN = 6
+                COLOR_GREEN = 2
+                COLOR_YELLOW = 3
+                COLOR_BLACK = 0
+                COLOR_MAGENTA = 5
+                COLOR_RED = 1
+
+                # Initialize all color pairs used throughout the UI
+                self.display.init_color_pair(
+                    1, COLOR_CYAN, -1
+                )  # Data colors for stats rows
+                self.display.init_color_pair(2, COLOR_GREEN, -1)
+                self.display.init_color_pair(3, COLOR_YELLOW, -1)
+                self.display.init_color_pair(
+                    COLOR_PAIR_HEADER_BG, COLOR_BLACK, COLOR_GREEN
+                )
+                self.display.init_color_pair(
+                    COLOR_PAIR_CYAN, COLOR_CYAN, COLOR_BLACK
+                )
+                self.display.init_color_pair(
+                    COLOR_PAIR_YELLOW, COLOR_YELLOW, COLOR_BLACK
+                )
+                self.display.init_color_pair(
+                    COLOR_PAIR_GREEN, COLOR_GREEN, COLOR_BLACK
+                )
+                self.display.init_color_pair(
+                    COLOR_PAIR_MAGENTA, COLOR_MAGENTA, COLOR_BLACK
+                )
+                self.display.init_color_pair(
+                    COLOR_PAIR_RED, COLOR_RED, COLOR_BLACK
+                )
+                self.display.init_color_pair(
+                    COLOR_PAIR_SORTED_HEADER, COLOR_BLACK, COLOR_YELLOW
+                )
+
+                return {
+                    "header": self.display.get_color_pair(COLOR_PAIR_HEADER_BG)
+                    | A_BOLD,
+                    "cyan": self.display.get_color_pair(COLOR_PAIR_CYAN)
+                    | A_BOLD,
+                    "yellow": self.display.get_color_pair(COLOR_PAIR_YELLOW)
+                    | A_BOLD,
+                    "green": self.display.get_color_pair(COLOR_PAIR_GREEN)
+                    | A_BOLD,
+                    "magenta": self.display.get_color_pair(COLOR_PAIR_MAGENTA)
+                    | A_BOLD,
+                    "red": self.display.get_color_pair(COLOR_PAIR_RED)
+                    | A_BOLD,
+                    "sorted_header": self.display.get_color_pair(
+                        COLOR_PAIR_SORTED_HEADER
+                    )
+                    | A_BOLD,
+                    "normal_header": A_REVERSE | A_BOLD,
+                    "color_samples": self.display.get_color_pair(1),
+                    "color_file": self.display.get_color_pair(2),
+                    "color_func": self.display.get_color_pair(3),
+                    # Trend colors (stock-like indicators)
+                    "trend_up": self.display.get_color_pair(COLOR_PAIR_GREEN) | A_BOLD,
+                    "trend_down": self.display.get_color_pair(COLOR_PAIR_RED) | A_BOLD,
+                    "trend_stable": A_NORMAL,
+                }
+
+        # Fallback to non-color attributes
+        return {
+            "header": A_REVERSE | A_BOLD,
+            "cyan": A_BOLD,
+            "yellow": A_BOLD,
+            "green": A_BOLD,
+            "magenta": A_BOLD,
+            "red": A_BOLD,
+            "sorted_header": A_REVERSE | A_BOLD | A_UNDERLINE,
+            "normal_header": A_REVERSE | A_BOLD,
+            "color_samples": A_NORMAL,
+            "color_file": A_NORMAL,
+            "color_func": A_NORMAL,
+            # Trend colors (fallback to bold/normal for monochrome)
+            "trend_up": A_BOLD,
+            "trend_down": A_BOLD,
+            "trend_stable": A_NORMAL,
+        }
+
+    def build_stats_list(self):
+        """Build and sort the statistics list."""
+        stats_list = []
+        result_source = self._get_current_result_source()
+
+        for func, call_counts in result_source.items():
+            # Apply filter if set (using substring matching)
+            if self.filter_pattern:
+                filename, lineno, funcname = func
+                # Simple substring match (case-insensitive)
+                pattern_lower = self.filter_pattern.lower()
+                filename_lower = filename.lower()
+                funcname_lower = funcname.lower()
+
+                # Match if pattern is substring of filename, funcname, or combined
+                matched = (
+                    pattern_lower in filename_lower
+                    or pattern_lower in funcname_lower
+                    or pattern_lower in f"{filename_lower}:{funcname_lower}"
+                )
+                if not matched:
+                    continue
+
+            direct_calls = call_counts.get("direct_calls", 0)
+            cumulative_calls = call_counts.get("cumulative_calls", 0)
+            total_time = direct_calls * self.sample_interval_sec
+            cumulative_time = cumulative_calls * self.sample_interval_sec
+
+            # Calculate sample percentages
+            sample_pct = (direct_calls / self.total_samples * 100) if self.total_samples > 0 else 0
+            cumul_pct = (cumulative_calls / self.total_samples * 100) if self.total_samples > 0 else 0
+
+            # Calculate trends for all columns using TrendTracker
+            trends = {}
+            if self._trend_tracker is not None:
+                trends = self._trend_tracker.update_metrics(
+                    func,
+                    {
+                        'nsamples': direct_calls,
+                        'tottime': total_time,
+                        'cumtime': cumulative_time,
+                        'sample_pct': sample_pct,
+                        'cumul_pct': cumul_pct,
+                    }
+                )
+
+            stats_list.append(
+                {
+                    "func": func,
+                    "direct_calls": direct_calls,
+                    "cumulative_calls": cumulative_calls,
+                    "total_time": total_time,
+                    "cumulative_time": cumulative_time,
+                    "trends": trends,  # Dictionary of trends for all columns
+                }
+            )
+
+        # Sort the stats
+        if self.sort_by == "nsamples":
+            stats_list.sort(key=lambda x: x["direct_calls"], reverse=True)
+        elif self.sort_by == "tottime":
+            stats_list.sort(key=lambda x: x["total_time"], reverse=True)
+        elif self.sort_by == "cumtime":
+            stats_list.sort(key=lambda x: x["cumulative_time"], reverse=True)
+        elif self.sort_by == "sample_pct":
+            stats_list.sort(
+                key=lambda x: (x["direct_calls"] / self.total_samples * 100)
+                if self.total_samples > 0
+                else 0,
+                reverse=True,
+            )
+        elif self.sort_by == "cumul_pct":
+            stats_list.sort(
+                key=lambda x: (
+                    x["cumulative_calls"] / self.total_samples * 100
+                )
+                if self.total_samples > 0
+                else 0,
+                reverse=True,
+            )
+
+        return stats_list
+
+    def reset_stats(self):
+        """Reset all collected statistics."""
+        self.result.clear()
+        self.per_thread_data.clear()
+        self.thread_ids.clear()
+        self.view_mode = "ALL"
+        self.current_thread_index = 0
+        self.total_samples = 0
+        self.successful_samples = 0
+        self.failed_samples = 0
+        self.max_sample_rate = 0
+        self.thread_status_counts = {
+            "has_gil": 0,
+            "on_cpu": 0,
+            "gil_requested": 0,
+            "unknown": 0,
+            "total": 0,
+        }
+        self.gc_frame_samples = 0
+        # Clear trend tracking
+        if self._trend_tracker is not None:
+            self._trend_tracker.clear()
+        # Reset finished state and finish timestamp
+        self.finished = False
+        self.finish_timestamp = None
+        self.finish_wall_time = None
+        self.start_time = time.perf_counter()
+        self._last_display_update = self.start_time
+
+    def mark_finished(self):
+        """Mark the profiling session as finished."""
+        self.finished = True
+        # Capture the finish timestamp to freeze all timing displays
+        self.finish_timestamp = time.perf_counter()
+        self.finish_wall_time = time.time()  # Wall clock time for display
+        # Force a final display update to show the finished message
+        if self.display is not None:
+            self._update_display()
+
+    def _handle_finished_input_update(self, had_input):
+        """Update display after input when program is finished."""
+        if self.finished and had_input and self.display is not None:
+            self._update_display()
+
+    def _show_terminal_too_small(self, height, width):
+        """Display a message when terminal is too small."""
+        A_BOLD = self.display.get_attr("A_BOLD")
+        msg1 = "Terminal too small!"
+        msg2 = f"Need: {MIN_TERMINAL_WIDTH}x{MIN_TERMINAL_HEIGHT}"
+        msg3 = f"Have: {width}x{height}"
+        msg4 = "Please resize"
+
+        # Center the messages
+        if height >= 4:
+            self.display.add_str(
+                height // 2 - 2,
+                max(0, (width - len(msg1)) // 2),
+                msg1[: width - 1],
+                A_BOLD,
+            )
+            self.display.add_str(
+                height // 2 - 1,
+                max(0, (width - len(msg2)) // 2),
+                msg2[: width - 1],
+            )
+            self.display.add_str(
+                height // 2,
+                max(0, (width - len(msg3)) // 2),
+                msg3[: width - 1],
+            )
+            self.display.add_str(
+                height // 2 + 1,
+                max(0, (width - len(msg4)) // 2),
+                msg4[: width - 1],
+            )
+        elif height >= 1:
+            self.display.add_str(0, 0, msg1[: width - 1], A_BOLD)
+
+    def _show_terminal_size_warning_and_wait(self, height, width):
+        """Show terminal size warning during initialization and wait for user acknowledgment."""
+        A_BOLD = self.display.get_attr("A_BOLD")
+        A_DIM = self.display.get_attr("A_DIM")
+
+        self.display.clear()
+        msg1 = "WARNING: Terminal too small!"
+        msg2 = f"Required: {MIN_TERMINAL_WIDTH}x{MIN_TERMINAL_HEIGHT}"
+        msg3 = f"Current:  {width}x{height}"
+        msg4 = "Please resize your terminal for best experience"
+        msg5 = "Press any key to continue..."
+
+        # Center the messages
+        if height >= 5:
+            self.display.add_str(
+                height // 2 - 2,
+                max(0, (width - len(msg1)) // 2),
+                msg1[: width - 1],
+                A_BOLD,
+            )
+            self.display.add_str(
+                height // 2 - 1,
+                max(0, (width - len(msg2)) // 2),
+                msg2[: width - 1],
+            )
+            self.display.add_str(
+                height // 2,
+                max(0, (width - len(msg3)) // 2),
+                msg3[: width - 1],
+            )
+            self.display.add_str(
+                height // 2 + 1,
+                max(0, (width - len(msg4)) // 2),
+                msg4[: width - 1],
+            )
+            self.display.add_str(
+                height // 2 + 3,
+                max(0, (width - len(msg5)) // 2),
+                msg5[: width - 1],
+                A_DIM,
+            )
+        elif height >= 1:
+            self.display.add_str(0, 0, msg1[: width - 1], A_BOLD)
+
+        self.display.refresh()
+        # Wait for user acknowledgment (2 seconds timeout)
+        self.display.set_nodelay(False)
+        # Note: timeout is curses-specific, skipping for now
+        self.display.get_input()
+        self.display.set_nodelay(True)
+
+    def _handle_input(self):
+        """Handle keyboard input (non-blocking)."""
+        from . import constants
+
+        self.display.set_nodelay(True)
+        ch = self.display.get_input()
+
+        # Handle filter input mode FIRST - takes precedence over all commands
+        if self.filter_input_mode:
+            if ch == 27:  # ESC key
+                self.filter_input_mode = False
+                self.filter_input_buffer = ""
+            elif ch == 10 or ch == 13:  # Enter key
+                self.filter_pattern = (
+                    self.filter_input_buffer
+                    if self.filter_input_buffer
+                    else None
+                )
+                self.filter_input_mode = False
+                self.filter_input_buffer = ""
+            elif ch == 127 or ch == 263:  # Backspace
+                if self.filter_input_buffer:
+                    self.filter_input_buffer = self.filter_input_buffer[:-1]
+            elif ch >= 32 and ch < 127:  # Printable characters
+                self.filter_input_buffer += chr(ch)
+
+            # Update display if input was processed while finished
+            self._handle_finished_input_update(ch != -1)
+            return
+
+        # Handle help toggle keys
+        if ch == ord("h") or ch == ord("H") or ch == ord("?"):
+            self.show_help = not self.show_help
+            return
+
+        # If showing help, any other key closes it
+        if self.show_help and ch != -1:
+            self.show_help = False
+            return
+
+        # Handle regular commands
+        if ch == ord("q") or ch == ord("Q"):
+            self.running = False
+
+        elif ch == ord("s"):
+            self._cycle_sort(reverse=False)
+
+        elif ch == ord("S"):
+            self._cycle_sort(reverse=True)
+
+        elif ch == ord("p") or ch == ord("P"):
+            self.paused = not self.paused
+
+        elif ch == ord("r") or ch == ord("R"):
+            # Don't allow reset when profiling is finished
+            if not self.finished:
+                self.reset_stats()
+
+        elif ch == ord("+") or ch == ord("="):
+            # Decrease update interval (faster refresh)
+            self.display_update_interval = max(
+                0.05, self.display_update_interval - 0.05
+            )  # Min 20Hz
+
+        elif ch == ord("-") or ch == ord("_"):
+            # Increase update interval (slower refresh)
+            self.display_update_interval = min(
+                1.0, self.display_update_interval + 0.05
+            )  # Max 1Hz
+
+        elif ch == ord("c") or ch == ord("C"):
+            if self.filter_pattern:
+                self.filter_pattern = None
+
+        elif ch == ord("/"):
+            self.filter_input_mode = True
+            self.filter_input_buffer = self.filter_pattern or ""
+
+        elif ch == ord("t") or ch == ord("T"):
+            # Toggle between ALL and PER_THREAD modes
+            if self.view_mode == "ALL":
+                if len(self.thread_ids) > 0:
+                    self.view_mode = "PER_THREAD"
+                    self.current_thread_index = 0
+            else:
+                self.view_mode = "ALL"
+
+        elif ch == ord("x") or ch == ord("X"):
+            # Toggle trend colors on/off
+            if self._trend_tracker is not None:
+                self._trend_tracker.toggle()
+
+        elif ch == curses.KEY_LEFT or ch == curses.KEY_UP:
+            # Navigate to previous thread in PER_THREAD mode, or switch from ALL to PER_THREAD
+            if len(self.thread_ids) > 0:
+                if self.view_mode == "ALL":
+                    self.view_mode = "PER_THREAD"
+                    self.current_thread_index = 0
+                else:
+                    self.current_thread_index = (
+                        self.current_thread_index - 1
+                    ) % len(self.thread_ids)
+
+        elif ch == curses.KEY_RIGHT or ch == curses.KEY_DOWN:
+            # Navigate to next thread in PER_THREAD mode, or switch from ALL to PER_THREAD
+            if len(self.thread_ids) > 0:
+                if self.view_mode == "ALL":
+                    self.view_mode = "PER_THREAD"
+                    self.current_thread_index = 0
+                else:
+                    self.current_thread_index = (
+                        self.current_thread_index + 1
+                    ) % len(self.thread_ids)
+
+        # Update display if input was processed while finished
+        self._handle_finished_input_update(ch != -1)
+
+    def init_curses(self, stdscr):
+        """Initialize curses display and suppress stdout/stderr."""
+        self.stdscr = stdscr
+        self.display = CursesDisplay(stdscr)
+
+        # Check terminal size upfront and warn if too small
+        height, width = self.display.get_dimensions()
+
+        if width < MIN_TERMINAL_WIDTH or height < MIN_TERMINAL_HEIGHT:
+            # Show warning and wait briefly for user to see it
+            self._show_terminal_size_warning_and_wait(height, width)
+
+        curses.curs_set(0)  # Hide cursor
+        stdscr.nodelay(True)  # Non-blocking input
+        stdscr.scrollok(False)  # Disable scrolling
+        stdscr.idlok(False)  # Disable hardware insert/delete
+        stdscr.leaveok(True)  # Don't care about cursor position
+
+        if curses.has_colors():
+            curses.start_color()
+            curses.use_default_colors()
+
+        # Suppress stdout and stderr to prevent interfering with curses display
+        # Use contextlib.redirect_stdout/stderr for better resource management
+        self._saved_stdout = sys.stdout
+        self._saved_stderr = sys.stderr
+        # Open devnull and ensure it's cleaned up even if an exception occurs
+        try:
+            self._devnull = open(os.devnull, "w")
+            sys.stdout = self._devnull
+            sys.stderr = self._devnull
+        except Exception:
+            # If redirection fails, restore original streams
+            sys.stdout = self._saved_stdout
+            sys.stderr = self._saved_stderr
+            raise
+
+        # Initial clear
+        self.display.clear()
+        self.display.refresh()
+
+    def cleanup_curses(self):
+        """Clean up curses display and restore stdout/stderr."""
+        # Restore stdout and stderr in reverse order
+        # Use try-finally to ensure cleanup even if restoration fails
+        try:
+            if self._saved_stdout is not None:
+                sys.stdout = self._saved_stdout
+                self._saved_stdout = None
+            if self._saved_stderr is not None:
+                sys.stderr = self._saved_stderr
+                self._saved_stderr = None
+        finally:
+            # Always close devnull, even if stdout/stderr restoration fails
+            if self._devnull is not None:
+                with contextlib.suppress(Exception):
+                    self._devnull.close()
+                self._devnull = None
+
+        if self.display is not None and self.stdscr is not None:
+            with contextlib.suppress(Exception):
+                curses.curs_set(1)  # Show cursor
+                self.display.set_nodelay(False)
+
+    def export(self, filename):
+        """Export is not supported in live mode."""
+        raise NotImplementedError(
+            "Export to file is not supported in live mode. "
+            "Use the live TUI to view statistics in real-time."
+        )
diff --git a/Lib/profiling/sampling/live_collector/constants.py b/Lib/profiling/sampling/live_collector/constants.py
new file mode 100644
index 00000000000..e4690c90baf
--- /dev/null
+++ b/Lib/profiling/sampling/live_collector/constants.py
@@ -0,0 +1,59 @@
+"""Constants for the live profiling collector."""
+
+# Time conversion constants
+MICROSECONDS_PER_SECOND = 1_000_000
+
+# Display update constants
+DISPLAY_UPDATE_HZ = 10
+DISPLAY_UPDATE_INTERVAL = 1.0 / DISPLAY_UPDATE_HZ  # 0.1 seconds
+
+# Terminal size constraints
+MIN_TERMINAL_WIDTH = 60
+MIN_TERMINAL_HEIGHT = 12
+
+# Column width thresholds
+WIDTH_THRESHOLD_SAMPLE_PCT = 80
+WIDTH_THRESHOLD_TOTTIME = 100
+WIDTH_THRESHOLD_CUMUL_PCT = 120
+WIDTH_THRESHOLD_CUMTIME = 140
+
+# Display layout constants
+HEADER_LINES = 10  # Increased to include thread status line
+FOOTER_LINES = 2
+SAFETY_MARGIN = 1
+TOP_FUNCTIONS_DISPLAY_COUNT = 3
+
+# Column widths for data display
+COL_WIDTH_NSAMPLES = 13
+COL_SPACING = 2
+COL_WIDTH_SAMPLE_PCT = 5
+COL_WIDTH_TIME = 10
+
+# Function name display
+MIN_FUNC_NAME_WIDTH = 10
+MAX_FUNC_NAME_WIDTH = 40
+MIN_AVAILABLE_SPACE = 10
+
+# Progress bar display
+MIN_BAR_WIDTH = 10
+MAX_SAMPLE_RATE_BAR_WIDTH = 30
+MAX_EFFICIENCY_BAR_WIDTH = 60
+
+# Sample rate scaling
+MIN_SAMPLE_RATE_FOR_SCALING = 100
+
+# Finished banner display
+FINISHED_BANNER_EXTRA_LINES = 3  # Blank line + banner + blank line
+
+# Color pair IDs
+COLOR_PAIR_HEADER_BG = 4
+COLOR_PAIR_CYAN = 5
+COLOR_PAIR_YELLOW = 6
+COLOR_PAIR_GREEN = 7
+COLOR_PAIR_MAGENTA = 8
+COLOR_PAIR_RED = 9
+COLOR_PAIR_SORTED_HEADER = 10
+
+# Default display settings
+DEFAULT_SORT_BY = "nsamples"  # Number of samples in leaf (self time)
+DEFAULT_DISPLAY_LIMIT = 20
diff --git a/Lib/profiling/sampling/live_collector/display.py b/Lib/profiling/sampling/live_collector/display.py
new file mode 100644
index 00000000000..d7f65ad73fd
--- /dev/null
+++ b/Lib/profiling/sampling/live_collector/display.py
@@ -0,0 +1,236 @@
+"""Display interface abstractions for the live profiling collector."""
+
+import contextlib
+import curses
+from abc import ABC, abstractmethod
+
+
+class DisplayInterface(ABC):
+    """Abstract interface for display operations to enable testing."""
+
+    @abstractmethod
+    def get_dimensions(self):
+        """Get terminal dimensions as (height, width)."""
+        pass
+
+    @abstractmethod
+    def clear(self):
+        """Clear the screen."""
+        pass
+
+    @abstractmethod
+    def refresh(self):
+        """Refresh the screen to show changes."""
+        pass
+
+    @abstractmethod
+    def redraw(self):
+        """Redraw the entire window."""
+        pass
+
+    @abstractmethod
+    def add_str(self, line, col, text, attr=0):
+        """Add a string at the specified position."""
+        pass
+
+    @abstractmethod
+    def get_input(self):
+        """Get a character from input (non-blocking). Returns -1 if no input."""
+        pass
+
+    @abstractmethod
+    def set_nodelay(self, flag):
+        """Set non-blocking mode for input."""
+        pass
+
+    @abstractmethod
+    def has_colors(self):
+        """Check if terminal supports colors."""
+        pass
+
+    @abstractmethod
+    def init_color_pair(self, pair_id, fg, bg):
+        """Initialize a color pair."""
+        pass
+
+    @abstractmethod
+    def get_color_pair(self, pair_id):
+        """Get a color pair attribute."""
+        pass
+
+    @abstractmethod
+    def get_attr(self, name):
+        """Get a display attribute by name (e.g., 'A_BOLD', 'A_REVERSE')."""
+        pass
+
+
+class CursesDisplay(DisplayInterface):
+    """Real curses display implementation."""
+
+    def __init__(self, stdscr):
+        self.stdscr = stdscr
+
+    def get_dimensions(self):
+        return self.stdscr.getmaxyx()
+
+    def clear(self):
+        self.stdscr.clear()
+
+    def refresh(self):
+        self.stdscr.refresh()
+
+    def redraw(self):
+        self.stdscr.redrawwin()
+
+    def add_str(self, line, col, text, attr=0):
+        try:
+            height, width = self.get_dimensions()
+            if 0 <= line < height and 0 <= col < width:
+                max_len = width - col - 1
+                if len(text) > max_len:
+                    text = text[:max_len]
+                self.stdscr.addstr(line, col, text, attr)
+        except curses.error:
+            pass
+
+    def get_input(self):
+        try:
+            return self.stdscr.getch()
+        except (KeyError, curses.error):
+            return -1
+
+    def set_nodelay(self, flag):
+        self.stdscr.nodelay(flag)
+
+    def has_colors(self):
+        return curses.has_colors()
+
+    def init_color_pair(self, pair_id, fg, bg):
+        try:
+            curses.init_pair(pair_id, fg, bg)
+        except curses.error:
+            pass
+
+    def get_color_pair(self, pair_id):
+        return curses.color_pair(pair_id)
+
+    def get_attr(self, name):
+        return getattr(curses, name, 0)
+
+
+class MockDisplay(DisplayInterface):
+    """Mock display for testing."""
+
+    def __init__(self, height=40, width=160):
+        self.height = height
+        self.width = width
+        self.buffer = {}
+        self.cleared = False
+        self.refreshed = False
+        self.redrawn = False
+        self.input_queue = []
+        self.nodelay_flag = True
+        self.colors_supported = True
+        self.color_pairs = {}
+
+    def get_dimensions(self):
+        return (self.height, self.width)
+
+    def clear(self):
+        self.buffer.clear()
+        self.cleared = True
+
+    def refresh(self):
+        self.refreshed = True
+
+    def redraw(self):
+        self.redrawn = True
+
+    def add_str(self, line, col, text, attr=0):
+        if 0 <= line < self.height and 0 <= col < self.width:
+            max_len = self.width - col - 1
+            if len(text) > max_len:
+                text = text[:max_len]
+            self.buffer[(line, col)] = (text, attr)
+
+    def get_input(self):
+        if self.input_queue:
+            return self.input_queue.pop(0)
+        return -1
+
+    def set_nodelay(self, flag):
+        self.nodelay_flag = flag
+
+    def has_colors(self):
+        return self.colors_supported
+
+    def init_color_pair(self, pair_id, fg, bg):
+        self.color_pairs[pair_id] = (fg, bg)
+
+    def get_color_pair(self, pair_id):
+        return pair_id << 8
+
+    def get_attr(self, name):
+        attrs = {
+            "A_NORMAL": 0,
+            "A_BOLD": 1 << 16,
+            "A_REVERSE": 1 << 17,
+            "A_UNDERLINE": 1 << 18,
+            "A_DIM": 1 << 19,
+        }
+        return attrs.get(name, 0)
+
+    def simulate_input(self, char):
+        """Helper method for tests to simulate keyboard input."""
+        self.input_queue.append(char)
+
+    def get_text_at(self, line, col):
+        """Helper method for tests to inspect buffer content."""
+        if (line, col) in self.buffer:
+            return self.buffer[(line, col)][0]
+        return None
+
+    def get_all_lines(self):
+        """Get all display content as a list of lines (for testing)."""
+        if not self.buffer:
+            return []
+
+        max_line = max(pos[0] for pos in self.buffer.keys())
+        lines = []
+        for line_num in range(max_line + 1):
+            line_parts = []
+            for col in range(self.width):
+                if (line_num, col) in self.buffer:
+                    text, _ = self.buffer[(line_num, col)]
+                    line_parts.append((col, text))
+
+            # Reconstruct line from parts
+            if line_parts:
+                line_parts.sort(key=lambda x: x[0])
+                line = ""
+                last_col = 0
+                for col, text in line_parts:
+                    if col > last_col:
+                        line += " " * (col - last_col)
+                    line += text
+                    last_col = col + len(text)
+                lines.append(line.rstrip())
+            else:
+                lines.append("")
+
+        # Remove trailing empty lines
+        while lines and not lines[-1]:
+            lines.pop()
+
+        return lines
+
+    def find_text(self, pattern):
+        """Find text matching pattern in buffer (for testing). Returns (line, col) or None."""
+        for (line, col), (text, _) in self.buffer.items():
+            if pattern in text:
+                return (line, col)
+        return None
+
+    def contains_text(self, text):
+        """Check if display contains the given text anywhere (for testing)."""
+        return self.find_text(text) is not None
diff --git a/Lib/profiling/sampling/live_collector/trend_tracker.py b/Lib/profiling/sampling/live_collector/trend_tracker.py
new file mode 100644
index 00000000000..c025b83a134
--- /dev/null
+++ b/Lib/profiling/sampling/live_collector/trend_tracker.py
@@ -0,0 +1,157 @@
+"""TrendTracker - Encapsulated trend tracking for live profiling metrics.
+
+This module provides trend tracking functionality for profiling metrics,
+calculating direction indicators (up/down/stable) and managing associated
+visual attributes like colors.
+"""
+
+import curses
+from typing import Dict, Literal, Any
+
+TrendDirection = Literal["up", "down", "stable"]
+
+
+class TrendTracker:
+    """
+    Tracks metric trends over time and provides visual indicators.
+
+    This class encapsulates all logic for:
+    - Tracking previous values of metrics
+    - Calculating trend directions (up/down/stable)
+    - Determining visual attributes (colors) for trends
+    - Managing enable/disable state
+
+    Example:
+        tracker = TrendTracker(colors_dict)
+        tracker.update("func1", "nsamples", 10)
+        trend = tracker.get_trend("func1", "nsamples")
+        color = tracker.get_color("func1", "nsamples")
+    """
+
+    # Threshold for determining if a value has changed significantly
+    CHANGE_THRESHOLD = 0.001
+
+    def __init__(self, colors: Dict[str, int], enabled: bool = True):
+        """
+        Initialize the trend tracker.
+
+        Args:
+            colors: Dictionary containing color attributes including
+                   'trend_up', 'trend_down', 'trend_stable'
+            enabled: Whether trend tracking is initially enabled
+        """
+        self._previous_values: Dict[Any, Dict[str, float]] = {}
+        self._enabled = enabled
+        self._colors = colors
+
+    @property
+    def enabled(self) -> bool:
+        """Whether trend tracking is enabled."""
+        return self._enabled
+
+    def toggle(self) -> bool:
+        """
+        Toggle trend tracking on/off.
+
+        Returns:
+            New enabled state
+        """
+        self._enabled = not self._enabled
+        return self._enabled
+
+    def set_enabled(self, enabled: bool) -> None:
+        """Set trend tracking enabled state."""
+        self._enabled = enabled
+
+    def update(self, key: Any, metric: str, value: float) -> TrendDirection:
+        """
+        Update a metric value and calculate its trend.
+
+        Args:
+            key: Identifier for the entity (e.g., function)
+            metric: Name of the metric (e.g., 'nsamples', 'tottime')
+            value: Current value of the metric
+
+        Returns:
+            Trend direction: 'up', 'down', or 'stable'
+        """
+        # Initialize storage for this key if needed
+        if key not in self._previous_values:
+            self._previous_values[key] = {}
+
+        # Get previous value, defaulting to current if not tracked yet
+        prev_value = self._previous_values[key].get(metric, value)
+
+        # Calculate trend
+        if value > prev_value + self.CHANGE_THRESHOLD:
+            trend = "up"
+        elif value < prev_value - self.CHANGE_THRESHOLD:
+            trend = "down"
+        else:
+            trend = "stable"
+
+        # Update previous value for next iteration
+        self._previous_values[key][metric] = value
+
+        return trend
+
+    def get_trend(self, key: Any, metric: str) -> TrendDirection:
+        """
+        Get the current trend for a metric without updating.
+
+        Args:
+            key: Identifier for the entity
+            metric: Name of the metric
+
+        Returns:
+            Trend direction, or 'stable' if not tracked
+        """
+        # This would require storing trends separately, which we don't do
+        # For now, return stable if not found
+        return "stable"
+
+    def get_color(self, trend: TrendDirection) -> int:
+        """
+        Get the color attribute for a trend direction.
+
+        Args:
+            trend: The trend direction
+
+        Returns:
+            Curses color attribute (or A_NORMAL if disabled)
+        """
+        if not self._enabled:
+            return curses.A_NORMAL
+
+        if trend == "up":
+            return self._colors.get("trend_up", curses.A_BOLD)
+        elif trend == "down":
+            return self._colors.get("trend_down", curses.A_BOLD)
+        else:  # stable
+            return self._colors.get("trend_stable", curses.A_NORMAL)
+
+    def update_metrics(self, key: Any, metrics: Dict[str, float]) -> Dict[str, TrendDirection]:
+        """
+        Update multiple metrics at once and get their trends.
+
+        Args:
+            key: Identifier for the entity
+            metrics: Dictionary of metric_name -> value
+
+        Returns:
+            Dictionary of metric_name -> trend_direction
+        """
+        trends = {}
+        for metric, value in metrics.items():
+            trends[metric] = self.update(key, metric, value)
+        return trends
+
+    def clear(self) -> None:
+        """Clear all tracked values (useful on stats reset)."""
+        self._previous_values.clear()
+
+    def __repr__(self) -> str:
+        """String representation for debugging."""
+        status = "enabled" if self._enabled else "disabled"
+        tracked = len(self._previous_values)
+        return f"TrendTracker({status}, tracking {tracked} entities)"
diff --git a/Lib/profiling/sampling/live_collector/widgets.py b/Lib/profiling/sampling/live_collector/widgets.py
new file mode 100644
index 00000000000..2af8caa2c2f
--- /dev/null
+++ b/Lib/profiling/sampling/live_collector/widgets.py
@@ -0,0 +1,963 @@
+"""Widget classes for the live profiling collector UI."""
+
+import curses
+import time
+from abc import ABC, abstractmethod
+
+from .constants import (
+    TOP_FUNCTIONS_DISPLAY_COUNT,
+    MIN_FUNC_NAME_WIDTH,
+    MAX_FUNC_NAME_WIDTH,
+    WIDTH_THRESHOLD_SAMPLE_PCT,
+    WIDTH_THRESHOLD_TOTTIME,
+    WIDTH_THRESHOLD_CUMUL_PCT,
+    WIDTH_THRESHOLD_CUMTIME,
+    MICROSECONDS_PER_SECOND,
+    DISPLAY_UPDATE_INTERVAL,
+    MIN_BAR_WIDTH,
+    MAX_SAMPLE_RATE_BAR_WIDTH,
+    MAX_EFFICIENCY_BAR_WIDTH,
+    MIN_SAMPLE_RATE_FOR_SCALING,
+    FOOTER_LINES,
+    FINISHED_BANNER_EXTRA_LINES,
+)
+from ..constants import (
+    THREAD_STATUS_HAS_GIL,
+    THREAD_STATUS_ON_CPU,
+    THREAD_STATUS_UNKNOWN,
+    THREAD_STATUS_GIL_REQUESTED,
+    PROFILING_MODE_CPU,
+    PROFILING_MODE_GIL,
+    PROFILING_MODE_WALL,
+)
+
+
+class Widget(ABC):
+    """Base class for UI widgets."""
+
+    def __init__(self, display, colors):
+        """
+        Initialize widget.
+
+        Args:
+            display: DisplayInterface implementation
+            colors: Dictionary of color attributes
+        """
+        self.display = display
+        self.colors = colors
+
+    @abstractmethod
+    def render(self, line, width, **kwargs):
+        """
+        Render the widget starting at the given line.
+
+        Args:
+            line: Starting line number
+            width: Available width
+            **kwargs: Additional rendering parameters
+
+        Returns:
+            Next available line number after rendering
+        """
+        pass
+
+    def add_str(self, line, col, text, attr=0):
+        """Add a string to the display at the specified position."""
+        self.display.add_str(line, col, text, attr)
+
+
+class ProgressBarWidget(Widget):
+    """Reusable progress bar widget."""
+
+    def render(self, line, width, **kwargs):
+        """Render is not used for progress bars - use render_bar instead."""
+        raise NotImplementedError("Use render_bar method instead")
+
+    def render_bar(
+        self, filled, total, max_width, fill_char="█", empty_char="░"
+    ):
+        """
+        Render a progress bar and return the bar string and its length.
+
+        Args:
+            filled: Current filled amount
+            total: Total amount (max value)
+            max_width: Maximum width for the bar
+            fill_char: Character to use for filled portion
+            empty_char: Character to use for empty portion
+
+        Returns:
+            Tuple of (bar_string, bar_length)
+        """
+        bar_width = min(max_width, max_width)
+        normalized = min(filled / max(total, 1), 1.0)
+        bar_fill = int(normalized * bar_width)
+
+        bar = "["
+        for i in range(bar_width):
+            if i < bar_fill:
+                bar += fill_char
+            else:
+                bar += empty_char
+        bar += "]"
+        return bar, len(bar)
+
+
+class HeaderWidget(Widget):
+    """Widget for rendering the header section (lines 0-8)."""
+
+    def __init__(self, display, colors, collector):
+        """
+        Initialize header widget.
+
+        Args:
+            display: DisplayInterface implementation
+            colors: Dictionary of color attributes
+            collector: Reference to LiveStatsCollector for accessing stats
+        """
+        super().__init__(display, colors)
+        self.collector = collector
+        self.progress_bar = ProgressBarWidget(display, colors)
+
+    def render(self, line, width, **kwargs):
+        """
+        Render the complete header section.
+
+        Args:
+            line: Starting line number
+            width: Available width
+            kwargs: Must contain 'elapsed' key
+
+        Returns:
+            Next available line number
+        """
+        elapsed = kwargs["elapsed"]
+
+        line = self.draw_header_info(line, width, elapsed)
+        line = self.draw_sample_stats(line, width, elapsed)
+        line = self.draw_efficiency_bar(line, width)
+        line = self.draw_thread_status(line, width)
+        line = self.draw_function_stats(
+            line, width, kwargs.get("stats_list", [])
+        )
+        line = self.draw_top_functions(
+            line, width, kwargs.get("stats_list", [])
+        )
+
+        # Show prominent finished banner if profiling is complete
+        if self.collector.finished:
+            line = self.draw_finished_banner(line, width)
+
+        # Separator
+        A_DIM = self.display.get_attr("A_DIM")
+        separator = "─" * (width - 1)
+        self.add_str(line, 0, separator[: width - 1], A_DIM)
+        line += 1
+
+        return line
+
+    def format_uptime(self, elapsed):
+        """Format elapsed time as uptime string."""
+        uptime_sec = int(elapsed)
+        hours = uptime_sec // 3600
+        minutes = (uptime_sec % 3600) // 60
+        seconds = uptime_sec % 60
+        if hours > 0:
+            return f"{hours}h{minutes:02d}m{seconds:02d}s"
+        else:
+            return f"{minutes}m{seconds:02d}s"
+
+    def draw_header_info(self, line, width, elapsed):
+        """Draw the header information line with PID, uptime, time, and interval."""
+        # Draw title
+        A_BOLD = self.display.get_attr("A_BOLD")
+        title = "Tachyon Profiler"
+        self.add_str(line, 0, title, A_BOLD | self.colors["cyan"])
+        line += 1
+
+        current_time = self.collector.current_time_display
+        uptime = self.format_uptime(elapsed)
+
+        # Calculate display refresh rate
+        refresh_hz = (
+            1.0 / self.collector.display_update_interval if self.collector.display_update_interval > 0 else 0
+        )
+
+        # Get current view mode and thread display
+        if self.collector.view_mode == "ALL":
+            thread_name = "ALL"
+            thread_color = self.colors["green"]
+        else:
+            # PER_THREAD mode
+            if self.collector.current_thread_index < len(
+                self.collector.thread_ids
+            ):
+                thread_id = self.collector.thread_ids[
+                    self.collector.current_thread_index
+                ]
+                num_threads = len(self.collector.thread_ids)
+                thread_name = f"{thread_id} ({self.collector.current_thread_index + 1}/{num_threads})"
+                thread_color = self.colors["magenta"]
+            else:
+                thread_name = "ALL"
+                thread_color = self.colors["green"]
+
+        header_parts = [
+            ("PID: ", curses.A_BOLD),
+            (f"{self.collector.pid}", self.colors["cyan"]),
+            (" │ ", curses.A_DIM),
+            ("Thread: ", curses.A_BOLD),
+            (thread_name, thread_color),
+            (" │ ", curses.A_DIM),
+            ("Uptime: ", curses.A_BOLD),
+            (uptime, self.colors["green"]),
+            (" │ ", curses.A_DIM),
+            ("Time: ", curses.A_BOLD),
+            (current_time, self.colors["yellow"]),
+            (" │ ", curses.A_DIM),
+            ("Interval: ", curses.A_BOLD),
+            (
+                f"{self.collector.sample_interval_usec}µs",
+                self.colors["magenta"],
+            ),
+            (" │ ", curses.A_DIM),
+            ("Display: ", curses.A_BOLD),
+            (f"{refresh_hz:.1f}Hz", self.colors["cyan"]),
+        ]
+
+        col = 0
+        for text, attr in header_parts:
+            if col < width - 1:
+                self.add_str(line, col, text, attr)
+                col += len(text)
+        return line + 1
+
+    def format_rate_with_units(self, rate_hz):
+        """Format a rate in Hz with appropriate units (Hz, KHz, MHz)."""
+        if rate_hz >= 1_000_000:
+            return f"{rate_hz / 1_000_000:.1f}MHz"
+        elif rate_hz >= 1_000:
+            return f"{rate_hz / 1_000:.1f}KHz"
+        else:
+            return f"{rate_hz:.1f}Hz"
+
+    def draw_sample_stats(self, line, width, elapsed):
+        """Draw sample statistics with visual progress bar."""
+        sample_rate = (
+            self.collector.total_samples / elapsed if elapsed > 0 else 0
+        )
+
+        # Update max sample rate
+        if sample_rate > self.collector.max_sample_rate:
+            self.collector.max_sample_rate = sample_rate
+
+        col = 0
+        self.add_str(line, col, "Samples: ", curses.A_BOLD)
+        col += 9
+        self.add_str(
+            line,
+            col,
+            f"{self.collector.total_samples:>8}",
+            self.colors["cyan"],
+        )
+        col += 8
+        self.add_str(
+            line, col, f" total ({sample_rate:>7.1f}/s) ", curses.A_NORMAL
+        )
+        col += 23
+
+        # Draw sample rate bar
+        target_rate = (
+            MICROSECONDS_PER_SECOND / self.collector.sample_interval_usec
+        )
+
+        # Show current/target ratio with percentage
+        if sample_rate > 0 and target_rate > 0:
+            percentage = min((sample_rate / target_rate) * 100, 100)
+            current_formatted = self.format_rate_with_units(sample_rate)
+            target_formatted = self.format_rate_with_units(target_rate)
+
+            if percentage >= 99.5:  # Show 100% when very close
+                rate_label = f" {current_formatted}/{target_formatted} (100%)"
+            else:
+                rate_label = f" {current_formatted}/{target_formatted} ({percentage:>4.1f}%)"
+        else:
+            target_formatted = self.format_rate_with_units(target_rate)
+            rate_label = f" target: {target_formatted}"
+
+        available_width = width - col - len(rate_label) - 3
+
+        if available_width >= MIN_BAR_WIDTH:
+            bar_width = min(MAX_SAMPLE_RATE_BAR_WIDTH, available_width)
+            # Use target rate as the reference, with a minimum for scaling
+            reference_rate = max(target_rate, MIN_SAMPLE_RATE_FOR_SCALING)
+            normalized_rate = min(sample_rate / reference_rate, 1.0)
+            bar_fill = int(normalized_rate * bar_width)
+
+            bar = "["
+            for i in range(bar_width):
+                bar += "█" if i < bar_fill else "░"
+            bar += "]"
+            self.add_str(line, col, bar, self.colors["green"])
+            col += len(bar)
+
+            if col + len(rate_label) < width - 1:
+                self.add_str(line, col + 1, rate_label, curses.A_DIM)
+        return line + 1
+
+    def draw_efficiency_bar(self, line, width):
+        """Draw sample efficiency bar showing success/failure rates."""
+        success_pct = (
+            self.collector.successful_samples
+            / max(1, self.collector.total_samples)
+        ) * 100
+        failed_pct = (
+            self.collector.failed_samples
+            / max(1, self.collector.total_samples)
+        ) * 100
+
+        col = 0
+        self.add_str(line, col, "Efficiency:", curses.A_BOLD)
+        col += 11
+
+        label = f" {success_pct:>5.2f}% good, {failed_pct:>4.2f}% failed"
+        available_width = width - col - len(label) - 3
+
+        if available_width >= MIN_BAR_WIDTH:
+            bar_width = min(MAX_EFFICIENCY_BAR_WIDTH, available_width)
+            success_fill = int(
+                (
+                    self.collector.successful_samples
+                    / max(1, self.collector.total_samples)
+                )
+                * bar_width
+            )
+            failed_fill = bar_width - success_fill
+
+            self.add_str(line, col, "[", curses.A_NORMAL)
+            col += 1
+            if success_fill > 0:
+                self.add_str(
+                    line, col, "█" * success_fill, self.colors["green"]
+                )
+                col += success_fill
+            if failed_fill > 0:
+                self.add_str(line, col, "█" * failed_fill, self.colors["red"])
+                col += failed_fill
+            self.add_str(line, col, "]", curses.A_NORMAL)
+            col += 1
+
+            self.add_str(line, col + 1, label, curses.A_NORMAL)
+        return line + 1
+
+    def _add_percentage_stat(
+        self, line, col, value, label, color, add_separator=False
+    ):
+        """Add a percentage stat to the display.
+
+        Args:
+            line: Line number
+            col: Starting column
+            value: Percentage value
+            label: Label text
+            color: Color attribute
+            add_separator: Whether to add separator before the stat
+
+        Returns:
+            Updated column position
+        """
+        if add_separator:
+            self.add_str(line, col, " │ ", curses.A_DIM)
+            col += 3
+
+        self.add_str(line, col, f"{value:>4.1f}", color)
+        col += 4
+        self.add_str(line, col, f"% {label}", curses.A_NORMAL)
+        col += len(label) + 2
+
+        return col
+
+    def draw_thread_status(self, line, width):
+        """Draw thread status statistics and GC information."""
+        # Get status counts for current view mode
+        thread_data = self.collector._get_current_thread_data()
+        status_counts = thread_data.as_status_dict() if thread_data else self.collector.thread_status_counts
+
+        # Calculate percentages
+        total_threads = max(1, status_counts["total"])
+        pct_on_gil = (status_counts["has_gil"] / total_threads) * 100
+        pct_off_gil = 100.0 - pct_on_gil
+        pct_gil_requested = (status_counts["gil_requested"] / total_threads) * 100
+
+        # Get GC percentage based on view mode
+        if thread_data:
+            total_samples = max(1, thread_data.sample_count)
+            pct_gc = (thread_data.gc_frame_samples / total_samples) * 100
+        else:
+            total_samples = max(1, self.collector.total_samples)
+            pct_gc = (self.collector.gc_frame_samples / total_samples) * 100
+
+        col = 0
+        self.add_str(line, col, "Threads:   ", curses.A_BOLD)
+        col += 11
+
+        # Show GIL stats only if mode is not GIL (GIL mode filters to only GIL holders)
+        if self.collector.mode != PROFILING_MODE_GIL:
+            col = self._add_percentage_stat(
+                line, col, pct_on_gil, "on gil", self.colors["green"]
+            )
+            col = self._add_percentage_stat(
+                line,
+                col,
+                pct_off_gil,
+                "off gil",
+                self.colors["red"],
+                add_separator=True,
+            )
+
+        # Show "waiting for gil" only if mode is not GIL
+        if self.collector.mode != PROFILING_MODE_GIL and col < width - 30:
+            col = self._add_percentage_stat(
+                line,
+                col,
+                pct_gil_requested,
+                "waiting for gil",
+                self.colors["yellow"],
+                add_separator=True,
+            )
+
+        # Always show GC stats
+        if col < width - 15:
+            col = self._add_percentage_stat(
+                line,
+                col,
+                pct_gc,
+                "GC",
+                self.colors["magenta"],
+                add_separator=(col > 11),
+            )
+
+        return line + 1
+
+    def draw_function_stats(self, line, width, stats_list):
+        """Draw function statistics summary."""
+        result_set = self.collector._get_current_result_source()
+        total_funcs = len(result_set)
+        funcs_shown = len(stats_list)
+        executing_funcs = sum(
+            1 for f in result_set.values() if f.get("direct_calls", 0) > 0
+        )
+        stack_only = total_funcs - executing_funcs
+
+        col = 0
+        self.add_str(line, col, "Functions: ", curses.A_BOLD)
+        col += 11
+        self.add_str(line, col, f"{total_funcs:>5}", self.colors["cyan"])
+        col += 5
+        self.add_str(line, col, " total", curses.A_NORMAL)
+        col += 6
+
+        if col < width - 25:
+            self.add_str(line, col, " │ ", curses.A_DIM)
+            col += 3
+            self.add_str(
+                line, col, f"{executing_funcs:>5}", self.colors["green"]
+            )
+            col += 5
+            self.add_str(line, col, " exec", curses.A_NORMAL)
+            col += 5
+
+        if col < width - 25:
+            self.add_str(line, col, " │ ", curses.A_DIM)
+            col += 3
+            self.add_str(line, col, f"{stack_only:>5}", self.colors["yellow"])
+            col += 5
+            self.add_str(line, col, " stack", curses.A_NORMAL)
+            col += 6
+
+        if col < width - 20:
+            self.add_str(line, col, " │ ", curses.A_DIM)
+            col += 3
+            self.add_str(
+                line, col, f"{funcs_shown:>5}", self.colors["magenta"]
+            )
+            col += 5
+            self.add_str(line, col, " shown", curses.A_NORMAL)
+        return line + 1
+
+    def draw_top_functions(self, line, width, stats_list):
+        """Draw top N hottest functions."""
+        col = 0
+        self.add_str(
+            line,
+            col,
+            f"Top {TOP_FUNCTIONS_DISPLAY_COUNT}:     ",
+            curses.A_BOLD,
+        )
+        col += 11
+
+        top_by_samples = sorted(
+            stats_list, key=lambda x: x["direct_calls"], reverse=True
+        )
+        emojis = ["🥇", "🥈", "🥉"]
+        medal_colors = [
+            self.colors["red"],
+            self.colors["yellow"],
+            self.colors["green"],
+        ]
+
+        displayed = 0
+        for func_data in top_by_samples:
+            if displayed >= TOP_FUNCTIONS_DISPLAY_COUNT:
+                break
+            if col >= width - 20:
+                break
+            if func_data["direct_calls"] == 0:
+                continue
+
+            func_name = func_data["func"][2]
+            func_pct = (
+                func_data["direct_calls"]
+                / max(1, self.collector.total_samples)
+            ) * 100
+
+            # Medal emoji
+            if col + 3 < width - 15:
+                self.add_str(
+                    line, col, emojis[displayed] + " ", medal_colors[displayed]
+                )
+                col += 3
+
+            # Function name (truncate to fit)
+            available_for_name = width - col - 15
+            max_name_len = min(25, max(5, available_for_name))
+            if len(func_name) > max_name_len:
+                func_name = func_name[: max_name_len - 3] + "..."
+
+            if col + len(func_name) < width - 10:
+                self.add_str(line, col, func_name, medal_colors[displayed])
+                col += len(func_name)
+
+                pct_str = (
+                    f" ({func_pct:.1f}%)"
+                    if func_pct >= 0.1
+                    else f" ({func_data['direct_calls']})"
+                )
+                self.add_str(line, col, pct_str, curses.A_DIM)
+                col += len(pct_str)
+
+                displayed += 1
+
+                if displayed < 3 and col < width - 30:
+                    self.add_str(line, col, " │ ", curses.A_DIM)
+                    col += 3
+
+        if displayed == 0 and col < width - 25:
+            self.add_str(line, col, "(collecting samples...)", curses.A_DIM)
+
+        return line + 1
+
+    def draw_finished_banner(self, line, width):
+        """Draw a prominent banner when profiling is finished."""
+        A_REVERSE = self.display.get_attr("A_REVERSE")
+        A_BOLD = self.display.get_attr("A_BOLD")
+
+        # Add blank line for separation
+        line += 1
+
+        # Create the banner message
+        message = "  ✓ PROFILING COMPLETE - Final Results Below - Press 'q' to Quit  "
+
+        # Center the message and fill the width with reverse video
+        if len(message) < width - 1:
+            padding_total = width - len(message) - 1
+            padding_left = padding_total // 2
+            padding_right = padding_total - padding_left
+            full_message = " " * padding_left + message + " " * padding_right
+        else:
+            full_message = message[: width - 1]
+
+        # Draw the banner with reverse video and bold
+        self.add_str(
+            line, 0, full_message, A_REVERSE | A_BOLD | self.colors["green"]
+        )
+        line += 1
+
+        # Add blank line for separation
+        line += 1
+
+        return line
+
+
+class TableWidget(Widget):
+    """Widget for rendering column headers and data rows."""
+
+    def __init__(self, display, colors, collector):
+        """
+        Initialize table widget.
+
+        Args:
+            display: DisplayInterface implementation
+            colors: Dictionary of color attributes
+            collector: Reference to LiveStatsCollector for accessing stats
+        """
+        super().__init__(display, colors)
+        self.collector = collector
+
+    def render(self, line, width, **kwargs):
+        """
+        Render column headers and data rows.
+
+        Args:
+            line: Starting line number
+            width: Available width
+            kwargs: Must contain 'height' and 'stats_list' keys
+
+        Returns:
+            Next available line number
+        """
+        height = kwargs["height"]
+        stats_list = kwargs["stats_list"]
+
+        # Draw column headers
+        line, show_sample_pct, show_tottime, show_cumul_pct, show_cumtime = (
+            self.draw_column_headers(line, width)
+        )
+        column_flags = (
+            show_sample_pct,
+            show_tottime,
+            show_cumul_pct,
+            show_cumtime,
+        )
+
+        # Draw data rows
+        line = self.draw_stats_rows(
+            line, height, width, stats_list, column_flags
+        )
+
+        return line
+
+    def draw_column_headers(self, line, width):
+        """Draw column headers with sort indicators."""
+        col = 0
+
+        # Determine which columns to show based on width
+        show_sample_pct = width >= WIDTH_THRESHOLD_SAMPLE_PCT
+        show_tottime = width >= WIDTH_THRESHOLD_TOTTIME
+        show_cumul_pct = width >= WIDTH_THRESHOLD_CUMUL_PCT
+        show_cumtime = width >= WIDTH_THRESHOLD_CUMTIME
+
+        sorted_header = self.colors["sorted_header"]
+        normal_header = self.colors["normal_header"]
+
+        # Determine which column is sorted
+        sort_col = {
+            "nsamples": 0,
+            "sample_pct": 1,
+            "tottime": 2,
+            "cumul_pct": 3,
+            "cumtime": 4,
+        }.get(self.collector.sort_by, -1)
+
+        # Column 0: nsamples
+        attr = sorted_header if sort_col == 0 else normal_header
+        text = f"{'▼nsamples' if sort_col == 0 else 'nsamples':>13}"
+        self.add_str(line, col, text, attr)
+        col += 15
+
+        # Column 1: sample %
+        if show_sample_pct:
+            attr = sorted_header if sort_col == 1 else normal_header
+            text = f"{'▼%' if sort_col == 1 else '%':>5}"
+            self.add_str(line, col, text, attr)
+            col += 7
+
+        # Column 2: tottime
+        if show_tottime:
+            attr = sorted_header if sort_col == 2 else normal_header
+            text = f"{'▼tottime' if sort_col == 2 else 'tottime':>10}"
+            self.add_str(line, col, text, attr)
+            col += 12
+
+        # Column 3: cumul %
+        if show_cumul_pct:
+            attr = sorted_header if sort_col == 3 else normal_header
+            text = f"{'▼%' if sort_col == 3 else '%':>5}"
+            self.add_str(line, col, text, attr)
+            col += 7
+
+        # Column 4: cumtime
+        if show_cumtime:
+            attr = sorted_header if sort_col == 4 else normal_header
+            text = f"{'▼cumtime' if sort_col == 4 else 'cumtime':>10}"
+            self.add_str(line, col, text, attr)
+            col += 12
+
+        # Remaining headers
+        if col < width - 15:
+            remaining_space = width - col - 1
+            func_width = min(
+                MAX_FUNC_NAME_WIDTH,
+                max(MIN_FUNC_NAME_WIDTH, remaining_space // 2),
+            )
+            self.add_str(
+                line, col, f"{'function':<{func_width}}", normal_header
+            )
+            col += func_width + 2
+
+            if col < width - 10:
+                self.add_str(line, col, "file:line", normal_header)
+
+        return (
+            line + 1,
+            show_sample_pct,
+            show_tottime,
+            show_cumul_pct,
+            show_cumtime,
+        )
+
+    def draw_stats_rows(self, line, height, width, stats_list, column_flags):
+        """Draw the statistics data rows."""
+        show_sample_pct, show_tottime, show_cumul_pct, show_cumtime = (
+            column_flags
+        )
+
+        # Get color attributes from the colors dict (already initialized)
+        color_samples = self.colors.get("color_samples", curses.A_NORMAL)
+        color_file = self.colors.get("color_file", curses.A_NORMAL)
+        color_func = self.colors.get("color_func", curses.A_NORMAL)
+
+        # Get trend tracker for color decisions
+        trend_tracker = self.collector._trend_tracker
+
+        for stat in stats_list:
+            if line >= height - FOOTER_LINES:
+                break
+
+            func = stat["func"]
+            direct_calls = stat["direct_calls"]
+            cumulative_calls = stat["cumulative_calls"]
+            total_time = stat["total_time"]
+            cumulative_time = stat["cumulative_time"]
+            trends = stat.get("trends", {})
+
+            sample_pct = (
+                (direct_calls / self.collector.total_samples * 100)
+                if self.collector.total_samples > 0
+                else 0
+            )
+            cum_pct = (
+                (cumulative_calls / self.collector.total_samples * 100)
+                if self.collector.total_samples > 0
+                else 0
+            )
+
+            # Helper function to get trend color for a specific column
+            def get_trend_color(column_name):
+                trend = trends.get(column_name, "stable")
+                if trend_tracker is not None:
+                    return trend_tracker.get_color(trend)
+                return curses.A_NORMAL
+
+            filename, lineno, funcname = func[0], func[1], func[2]
+            samples_str = f"{direct_calls}/{cumulative_calls}"
+            col = 0
+
+            # Samples column - apply trend color based on nsamples trend
+            nsamples_color = get_trend_color("nsamples")
+            self.add_str(line, col, f"{samples_str:>13}", nsamples_color)
+            col += 15
+
+            # Sample % column
+            if show_sample_pct:
+                sample_pct_color = get_trend_color("sample_pct")
+                self.add_str(line, col, f"{sample_pct:>5.1f}", sample_pct_color)
+                col += 7
+
+            # Total time column
+            if show_tottime:
+                tottime_color = get_trend_color("tottime")
+                self.add_str(line, col, f"{total_time:>10.3f}", tottime_color)
+                col += 12
+
+            # Cumul % column
+            if show_cumul_pct:
+                cumul_pct_color = get_trend_color("cumul_pct")
+                self.add_str(line, col, f"{cum_pct:>5.1f}", cumul_pct_color)
+                col += 7
+
+            # Cumul time column
+            if show_cumtime:
+                cumtime_color = get_trend_color("cumtime")
+                self.add_str(line, col, f"{cumulative_time:>10.3f}", cumtime_color)
+                col += 12
+
+            # Function name column
+            if col < width - 15:
+                remaining_space = width - col - 1
+                func_width = min(
+                    MAX_FUNC_NAME_WIDTH,
+                    max(MIN_FUNC_NAME_WIDTH, remaining_space // 2),
+                )
+
+                func_display = funcname
+                if len(funcname) > func_width:
+                    func_display = funcname[: func_width - 3] + "..."
+                func_display = f"{func_display:<{func_width}}"
+                self.add_str(line, col, func_display, color_func)
+                col += func_width + 2
+
+                # File:line column
+                if col < width - 10:
+                    simplified_path = self.collector.simplify_path(filename)
+                    file_line = f"{simplified_path}:{lineno}"
+                    remaining_width = width - col - 1
+                    self.add_str(
+                        line, col, file_line[:remaining_width], color_file
+                    )
+
+            line += 1
+
+        return line
+
+
+class FooterWidget(Widget):
+    """Widget for rendering the footer section (legend and controls)."""
+
+    def __init__(self, display, colors, collector):
+        """
+        Initialize footer widget.
+
+        Args:
+            display: DisplayInterface implementation
+            colors: Dictionary of color attributes
+            collector: Reference to LiveStatsCollector for accessing state
+        """
+        super().__init__(display, colors)
+        self.collector = collector
+
+    def render(self, line, width, **kwargs):
+        """
+        Render the footer at the specified position.
+
+        Args:
+            line: Starting line number (should be height - 2)
+            width: Available width
+
+        Returns:
+            Next available line number
+        """
+        A_DIM = self.display.get_attr("A_DIM")
+        A_BOLD = self.display.get_attr("A_BOLD")
+
+        # Legend line
+        legend = "nsamples: direct/cumulative (direct=executing, cumulative=on stack)"
+        self.add_str(line, 0, legend[: width - 1], A_DIM)
+        line += 1
+
+        # Controls line with status
+        sort_names = {
+            "tottime": "Total Time",
+            "nsamples": "Direct Samples",
+            "cumtime": "Cumulative Time",
+            "sample_pct": "Sample %",
+            "cumul_pct": "Cumulative %",
+        }
+        sort_display = sort_names.get(
+            self.collector.sort_by, self.collector.sort_by
+        )
+
+        # Build status indicators
+        status = []
+        if self.collector.finished:
+            status.append("[PROFILING FINISHED - Press 'q' to quit]")
+        elif self.collector.paused:
+            status.append("[PAUSED]")
+        if self.collector.filter_pattern:
+            status.append(
+                f"[Filter: {self.collector.filter_pattern} (c to clear)]"
+            )
+        # Show trend colors status if disabled
+        if self.collector._trend_tracker is not None and not self.collector._trend_tracker.enabled:
+            status.append("[Trend colors: OFF]")
+        status_str = " ".join(status) + " " if status else ""
+
+        if self.collector.finished:
+            footer = f"{status_str}"
+        else:
+            footer = f"{status_str}Sort: {sort_display} | 't':mode 'x':trends ←→:thread 'h':help 'q':quit"
+        self.add_str(
+            line,
+            0,
+            footer[: width - 1],
+            A_BOLD
+            if (self.collector.paused or self.collector.finished)
+            else A_DIM,
+        )
+
+        return line + 1
+
+    def render_filter_input_prompt(self, line, width):
+        """Draw the filter input prompt at the bottom of the screen."""
+        A_BOLD = self.display.get_attr("A_BOLD")
+        A_REVERSE = self.display.get_attr("A_REVERSE")
+
+        # Draw prompt on last line
+        prompt = f"Function filter: {self.collector.filter_input_buffer}_"
+        self.add_str(line, 0, prompt[: width - 1], A_REVERSE | A_BOLD)
+
+
+class HelpWidget(Widget):
+    """Widget for rendering the help screen overlay."""
+
+    def render(self, line, width, **kwargs):
+        """
+        Render the help screen.
+
+        Args:
+            line: Starting line number (ignored, help is centered)
+            width: Available width
+            kwargs: Must contain 'height' key
+
+        Returns:
+            Next available line number (not used for overlays)
+        """
+        height = kwargs["height"]
+        A_BOLD = self.display.get_attr("A_BOLD")
+        A_NORMAL = self.display.get_attr("A_NORMAL")
+
+        help_lines = [
+            ("Tachyon Profiler - Interactive Commands", A_BOLD),
+            ("", A_NORMAL),
+            ("Navigation & Display:", A_BOLD),
+            ("  s           - Cycle through sort modes (forward)", A_NORMAL),
+            ("  S           - Cycle through sort modes (backward)", A_NORMAL),
+            ("  t           - Toggle view mode (ALL / per-thread)", A_NORMAL),
+            ("  x           - Toggle trend colors (on/off)", A_NORMAL),
+            ("  ← →  ↑ ↓   - Navigate threads (in per-thread mode)", A_NORMAL),
+            ("  +           - Faster display refresh rate", A_NORMAL),
+            ("  -           - Slower display refresh rate", A_NORMAL),
+            ("", A_NORMAL),
+            ("Control:", A_BOLD),
+            ("  p           - Freeze display (snapshot)", A_NORMAL),
+            ("  r           - Reset all statistics", A_NORMAL),
+            ("", A_NORMAL),
+            ("Filtering:", A_BOLD),
+            ("  /           - Enter function filter (substring)", A_NORMAL),
+            ("  c           - Clear filter", A_NORMAL),
+            ("  ESC         - Cancel filter input", A_NORMAL),
+            ("", A_NORMAL),
+            ("Other:", A_BOLD),
+            ("  h or ?      - Show/hide this help", A_NORMAL),
+            ("  q           - Quit profiler", A_NORMAL),
+            ("", A_NORMAL),
+            ("Press any key to close this help screen", A_BOLD),
+        ]
+
+        start_line = (height - len(help_lines)) // 2
+        for i, (text, attr) in enumerate(help_lines):
+            if start_line + i < height - 1:
+                col = 2  # Left-align with small margin
+                self.add_str(start_line + i, col, text[: width - 3], attr)
+
+        return line  # Not used for overlays
diff --git a/Lib/profiling/sampling/pstats_collector.py b/Lib/profiling/sampling/pstats_collector.py
index e06dbf40aa1..4fe3acfa9ff 100644
--- a/Lib/profiling/sampling/pstats_collector.py
+++ b/Lib/profiling/sampling/pstats_collector.py
@@ -1,6 +1,7 @@
 import collections
 import marshal
 
+from _colorize import ANSIColors
 from .collector import Collector
 
 
@@ -41,8 +42,14 @@ def _process_frames(self, frames):
             self.callers[callee][caller] += 1
 
     def collect(self, stack_frames):
-        for frames, thread_id in self._iter_all_frames(stack_frames, skip_idle=self.skip_idle):
-            self._process_frames(frames)
+        if stack_frames and hasattr(stack_frames[0], "awaited_by"):
+            # Async frame processing
+            for frames, thread_id, task_id in self._iter_async_frames(stack_frames):
+                self._process_frames(frames)
+        else:
+            # Regular frame processing
+            for frames, thread_id in self._iter_all_frames(stack_frames, skip_idle=self.skip_idle):
+                self._process_frames(frames)
 
     def export(self, filename):
         self.create_stats()
@@ -70,3 +77,342 @@ def create_stats(self):
                 cumulative,
                 callers,
             )
+
+    def print_stats(self, sort=-1, limit=None, show_summary=True, mode=None):
+        """Print formatted statistics to stdout."""
+        import pstats
+        from .constants import PROFILING_MODE_CPU
+
+        # Create stats object
+        stats = pstats.SampledStats(self).strip_dirs()
+        if not stats.stats:
+            print("No samples were collected.")
+            if mode == PROFILING_MODE_CPU:
+                print("This can happen in CPU mode when all threads are idle.")
+            return
+
+        # Get the stats data
+        stats_list = []
+        for func, (
+            direct_calls,
+            cumulative_calls,
+            total_time,
+            cumulative_time,
+            callers,
+        ) in stats.stats.items():
+            stats_list.append(
+                (
+                    func,
+                    direct_calls,
+                    cumulative_calls,
+                    total_time,
+                    cumulative_time,
+                    callers,
+                )
+            )
+
+        # Calculate total samples for percentage calculations (using direct_calls)
+        total_samples = sum(
+            direct_calls for _, direct_calls, _, _, _, _ in stats_list
+        )
+
+        # Sort based on the requested field
+        sort_field = sort
+        if sort_field == -1:  # stdname
+            stats_list.sort(key=lambda x: str(x[0]))
+        elif sort_field == 0:  # nsamples (direct samples)
+            stats_list.sort(key=lambda x: x[1], reverse=True)  # direct_calls
+        elif sort_field == 1:  # tottime
+            stats_list.sort(key=lambda x: x[3], reverse=True)  # total_time
+        elif sort_field == 2:  # cumtime
+            stats_list.sort(key=lambda x: x[4], reverse=True)  # cumulative_time
+        elif sort_field == 3:  # sample%
+            stats_list.sort(
+                key=lambda x: (x[1] / total_samples * 100)
+                if total_samples > 0
+                else 0,
+                reverse=True,  # direct_calls percentage
+            )
+        elif sort_field == 4:  # cumul%
+            stats_list.sort(
+                key=lambda x: (x[2] / total_samples * 100)
+                if total_samples > 0
+                else 0,
+                reverse=True,  # cumulative_calls percentage
+            )
+        elif sort_field == 5:  # nsamples (cumulative samples)
+            stats_list.sort(key=lambda x: x[2], reverse=True)  # cumulative_calls
+
+        # Apply limit if specified
+        if limit is not None:
+            stats_list = stats_list[:limit]
+
+        # Determine the best unit for time columns based on maximum values
+        max_total_time = max(
+            (total_time for _, _, _, total_time, _, _ in stats_list), default=0
+        )
+        max_cumulative_time = max(
+            (cumulative_time for _, _, _, _, cumulative_time, _ in stats_list),
+            default=0,
+        )
+
+        total_time_unit, total_time_scale = self._determine_best_unit(max_total_time)
+        cumulative_time_unit, cumulative_time_scale = self._determine_best_unit(
+            max_cumulative_time
+        )
+
+        # Define column widths for consistent alignment
+        col_widths = {
+            "nsamples": 15,  # "nsamples" column (inline/cumulative format)
+            "sample_pct": 8,  # "sample%" column
+            "tottime": max(12, len(f"tottime ({total_time_unit})")),
+            "cum_pct": 8,  # "cumul%" column
+            "cumtime": max(12, len(f"cumtime ({cumulative_time_unit})")),
+        }
+
+        # Print header with colors and proper alignment
+        print(f"{ANSIColors.BOLD_BLUE}Profile Stats:{ANSIColors.RESET}")
+
+        header_nsamples = f"{ANSIColors.BOLD_BLUE}{'nsamples':>{col_widths['nsamples']}}{ANSIColors.RESET}"
+        header_sample_pct = f"{ANSIColors.BOLD_BLUE}{'sample%':>{col_widths['sample_pct']}}{ANSIColors.RESET}"
+        header_tottime = f"{ANSIColors.BOLD_BLUE}{f'tottime ({total_time_unit})':>{col_widths['tottime']}}{ANSIColors.RESET}"
+        header_cum_pct = f"{ANSIColors.BOLD_BLUE}{'cumul%':>{col_widths['cum_pct']}}{ANSIColors.RESET}"
+        header_cumtime = f"{ANSIColors.BOLD_BLUE}{f'cumtime ({cumulative_time_unit})':>{col_widths['cumtime']}}{ANSIColors.RESET}"
+        header_filename = (
+            f"{ANSIColors.BOLD_BLUE}filename:lineno(function){ANSIColors.RESET}"
+        )
+
+        print(
+            f"{header_nsamples}  {header_sample_pct}  {header_tottime}  {header_cum_pct}  {header_cumtime}  {header_filename}"
+        )
+
+        # Print each line with proper alignment
+        for (
+            func,
+            direct_calls,
+            cumulative_calls,
+            total_time,
+            cumulative_time,
+            callers,
+        ) in stats_list:
+            # Calculate percentages
+            sample_pct = (
+                (direct_calls / total_samples * 100) if total_samples > 0 else 0
+            )
+            cum_pct = (
+                (cumulative_calls / total_samples * 100)
+                if total_samples > 0
+                else 0
+            )
+
+            # Format values with proper alignment - always use A/B format
+            nsamples_str = f"{direct_calls}/{cumulative_calls}"
+            nsamples_str = f"{nsamples_str:>{col_widths['nsamples']}}"
+            sample_pct_str = f"{sample_pct:{col_widths['sample_pct']}.1f}"
+            tottime = f"{total_time * total_time_scale:{col_widths['tottime']}.3f}"
+            cum_pct_str = f"{cum_pct:{col_widths['cum_pct']}.1f}"
+            cumtime = f"{cumulative_time * cumulative_time_scale:{col_widths['cumtime']}.3f}"
+
+            # Format the function name with colors
+            func_name = (
+                f"{ANSIColors.GREEN}{func[0]}{ANSIColors.RESET}:"
+                f"{ANSIColors.YELLOW}{func[1]}{ANSIColors.RESET}("
+                f"{ANSIColors.CYAN}{func[2]}{ANSIColors.RESET})"
+            )
+
+            # Print the formatted line with consistent spacing
+            print(
+                f"{nsamples_str}  {sample_pct_str}  {tottime}  {cum_pct_str}  {cumtime}  {func_name}"
+            )
+
+        # Print legend
+        print(f"\n{ANSIColors.BOLD_BLUE}Legend:{ANSIColors.RESET}")
+        print(
+            f"  {ANSIColors.YELLOW}nsamples{ANSIColors.RESET}: Direct/Cumulative samples (direct executing / on call stack)"
+        )
+        print(
+            f"  {ANSIColors.YELLOW}sample%{ANSIColors.RESET}: Percentage of total samples this function was directly executing"
+        )
+        print(
+            f"  {ANSIColors.YELLOW}tottime{ANSIColors.RESET}: Estimated total time spent directly in this function"
+        )
+        print(
+            f"  {ANSIColors.YELLOW}cumul%{ANSIColors.RESET}: Percentage of total samples when this function was on the call stack"
+        )
+        print(
+            f"  {ANSIColors.YELLOW}cumtime{ANSIColors.RESET}: Estimated cumulative time (including time in called functions)"
+        )
+        print(
+            f"  {ANSIColors.YELLOW}filename:lineno(function){ANSIColors.RESET}: Function location and name"
+        )
+
+        # Print summary of interesting functions if enabled
+        if show_summary and stats_list:
+            self._print_summary(stats_list, total_samples)
+
+    @staticmethod
+    def _determine_best_unit(max_value):
+        """Determine the best unit (s, ms, μs) and scale factor for a maximum value."""
+        if max_value >= 1.0:
+            return "s", 1.0
+        elif max_value >= 0.001:
+            return "ms", 1000.0
+        else:
+            return "μs", 1000000.0
+
+    def _print_summary(self, stats_list, total_samples):
+        """Print summary of interesting functions."""
+        print(
+            f"\n{ANSIColors.BOLD_BLUE}Summary of Interesting Functions:{ANSIColors.RESET}"
+        )
+
+        # Aggregate stats by fully qualified function name (ignoring line numbers)
+        func_aggregated = {}
+        for (
+            func,
+            direct_calls,
+            cumulative_calls,
+            total_time,
+            cumulative_time,
+            callers,
+        ) in stats_list:
+            # Use filename:function_name as the key to get fully qualified name
+            qualified_name = f"{func[0]}:{func[2]}"
+            if qualified_name not in func_aggregated:
+                func_aggregated[qualified_name] = [
+                    0,
+                    0,
+                    0,
+                    0,
+                ]  # direct_calls, cumulative_calls, total_time, cumulative_time
+            func_aggregated[qualified_name][0] += direct_calls
+            func_aggregated[qualified_name][1] += cumulative_calls
+            func_aggregated[qualified_name][2] += total_time
+            func_aggregated[qualified_name][3] += cumulative_time
+
+        # Convert aggregated data back to list format for processing
+        aggregated_stats = []
+        for qualified_name, (
+            prim_calls,
+            total_calls,
+            total_time,
+            cumulative_time,
+        ) in func_aggregated.items():
+            # Parse the qualified name back to filename and function name
+            if ":" in qualified_name:
+                filename, func_name = qualified_name.rsplit(":", 1)
+            else:
+                filename, func_name = "", qualified_name
+            # Create a dummy func tuple with filename and function name for display
+            dummy_func = (filename, "", func_name)
+            aggregated_stats.append(
+                (
+                    dummy_func,
+                    prim_calls,
+                    total_calls,
+                    total_time,
+                    cumulative_time,
+                    {},
+                )
+            )
+
+        # Determine best units for summary metrics
+        max_total_time = max(
+            (total_time for _, _, _, total_time, _, _ in aggregated_stats),
+            default=0,
+        )
+        max_cumulative_time = max(
+            (
+                cumulative_time
+                for _, _, _, _, cumulative_time, _ in aggregated_stats
+            ),
+            default=0,
+        )
+
+        total_unit, total_scale = self._determine_best_unit(max_total_time)
+        cumulative_unit, cumulative_scale = self._determine_best_unit(
+            max_cumulative_time
+        )
+
+        def _format_func_name(func):
+            """Format function name with colors."""
+            return (
+                f"{ANSIColors.GREEN}{func[0]}{ANSIColors.RESET}:"
+                f"{ANSIColors.YELLOW}{func[1]}{ANSIColors.RESET}("
+                f"{ANSIColors.CYAN}{func[2]}{ANSIColors.RESET})"
+            )
+
+        def _print_top_functions(stats_list, title, key_func, format_line, n=3):
+            """Print top N functions sorted by key_func with formatted output."""
+            print(f"\n{ANSIColors.BOLD_BLUE}{title}:{ANSIColors.RESET}")
+            sorted_stats = sorted(stats_list, key=key_func, reverse=True)
+            for stat in sorted_stats[:n]:
+                if line := format_line(stat):
+                    print(f"  {line}")
+
+        # Functions with highest direct/cumulative ratio (hot spots)
+        def format_hotspots(stat):
+            func, direct_calls, cumulative_calls, total_time, _, _ = stat
+            if direct_calls > 0 and cumulative_calls > 0:
+                ratio = direct_calls / cumulative_calls
+                direct_pct = (
+                    (direct_calls / total_samples * 100)
+                    if total_samples > 0
+                    else 0
+                )
+                return (
+                    f"{ratio:.3f} direct/cumulative ratio, "
+                    f"{direct_pct:.1f}% direct samples: {_format_func_name(func)}"
+                )
+            return None
+
+        _print_top_functions(
+            aggregated_stats,
+            "Functions with Highest Direct/Cumulative Ratio (Hot Spots)",
+            key_func=lambda x: (x[1] / x[2]) if x[2] > 0 else 0,
+            format_line=format_hotspots,
+        )
+
+        # Functions with highest call frequency (cumulative/direct difference)
+        def format_call_frequency(stat):
+            func, direct_calls, cumulative_calls, total_time, _, _ = stat
+            if cumulative_calls > direct_calls:
+                call_frequency = cumulative_calls - direct_calls
+                cum_pct = (
+                    (cumulative_calls / total_samples * 100)
+                    if total_samples > 0
+                    else 0
+                )
+                return (
+                    f"{call_frequency:d} indirect calls, "
+                    f"{cum_pct:.1f}% total stack presence: {_format_func_name(func)}"
+                )
+            return None
+
+        _print_top_functions(
+            aggregated_stats,
+            "Functions with Highest Call Frequency (Indirect Calls)",
+            key_func=lambda x: x[2] - x[1],  # Sort by (cumulative - direct)
+            format_line=format_call_frequency,
+        )
+
+        # Functions with highest cumulative-to-direct multiplier (call magnification)
+        def format_call_magnification(stat):
+            func, direct_calls, cumulative_calls, total_time, _, _ = stat
+            if direct_calls > 0 and cumulative_calls > direct_calls:
+                multiplier = cumulative_calls / direct_calls
+                indirect_calls = cumulative_calls - direct_calls
+                return (
+                    f"{multiplier:.1f}x call magnification, "
+                    f"{indirect_calls:d} indirect calls from {direct_calls:d} direct: {_format_func_name(func)}"
+                )
+            return None
+
+        _print_top_functions(
+            aggregated_stats,
+            "Functions with Highest Call Magnification (Cumulative/Direct)",
+            key_func=lambda x: (x[2] / x[1])
+            if x[1] > 0
+            else 0,  # Sort by cumulative/direct ratio
+            format_line=format_call_magnification,
+        )
diff --git a/Lib/profiling/sampling/sample.py b/Lib/profiling/sampling/sample.py
index 713931a639d..dd4ea1edbf6 100644
--- a/Lib/profiling/sampling/sample.py
+++ b/Lib/profiling/sampling/sample.py
@@ -1,9 +1,6 @@
-import argparse
 import _remote_debugging
 import os
 import pstats
-import socket
-import subprocess
 import statistics
 import sys
 import sysconfig
@@ -13,151 +10,48 @@
 
 from .pstats_collector import PstatsCollector
 from .stack_collector import CollapsedStackCollector, FlamegraphCollector
+from .heatmap_collector import HeatmapCollector
 from .gecko_collector import GeckoCollector
+from .constants import (
+    PROFILING_MODE_WALL,
+    PROFILING_MODE_CPU,
+    PROFILING_MODE_GIL,
+    PROFILING_MODE_ALL,
+)
+try:
+    from .live_collector import LiveStatsCollector
+except ImportError:
+    LiveStatsCollector = None
 
 _FREE_THREADED_BUILD = sysconfig.get_config_var("Py_GIL_DISABLED") is not None
 
-# Profiling mode constants
-PROFILING_MODE_WALL = 0
-PROFILING_MODE_CPU = 1
-PROFILING_MODE_GIL = 2
-PROFILING_MODE_ALL = 3  # Combines GIL + CPU checks
-
-
-def _parse_mode(mode_string):
-    """Convert mode string to mode constant."""
-    mode_map = {
-        "wall": PROFILING_MODE_WALL,
-        "cpu": PROFILING_MODE_CPU,
-        "gil": PROFILING_MODE_GIL,
-    }
-    return mode_map[mode_string]
-_HELP_DESCRIPTION = """Sample a process's stack frames and generate profiling data.
-Supports the following target modes:
-  - -p PID: Profile an existing process by PID
-  - -m MODULE [ARGS...]: Profile a module as python -m module ...
-  - filename [ARGS...]: Profile the specified script by running it in a subprocess
-
-Supports the following output formats:
-  - --pstats: Detailed profiling statistics with sorting options
-  - --collapsed: Stack traces for generating flamegraphs
-  - --flamegraph Interactive HTML flamegraph visualization (requires web browser)
-
-Examples:
-  # Profile process 1234 for 10 seconds with default settings
-  python -m profiling.sampling -p 1234
-
-  # Profile a script by running it in a subprocess
-  python -m profiling.sampling myscript.py arg1 arg2
-
-  # Profile a module by running it as python -m module in a subprocess
-  python -m profiling.sampling -m mymodule arg1 arg2
-
-  # Profile with custom interval and duration, save to file
-  python -m profiling.sampling -i 50 -d 30 -o profile.stats -p 1234
-
-  # Generate collapsed stacks for flamegraph
-  python -m profiling.sampling --collapsed -p 1234
-
-  # Generate a HTML flamegraph
-  python -m profiling.sampling --flamegraph -p 1234
-
-  # Profile all threads, sort by total time
-  python -m profiling.sampling -a --sort-tottime -p 1234
-
-  # Profile for 1 minute with 1ms sampling interval
-  python -m profiling.sampling -i 1000 -d 60 -p 1234
-
-  # Show only top 20 functions sorted by direct samples
-  python -m profiling.sampling --sort-nsamples -l 20 -p 1234
-
-  # Profile all threads and save collapsed stacks
-  python -m profiling.sampling -a --collapsed -o stacks.txt -p 1234
-
-  # Profile with real-time sampling statistics
-  python -m profiling.sampling --realtime-stats -p 1234
-
-  # Sort by sample percentage to find most sampled functions
-  python -m profiling.sampling --sort-sample-pct -p 1234
-
-  # Sort by cumulative samples to find functions most on call stack
-  python -m profiling.sampling --sort-nsamples-cumul -p 1234"""
-
-
-# Constants for socket synchronization
-_SYNC_TIMEOUT = 5.0
-_PROCESS_KILL_TIMEOUT = 2.0
-_READY_MESSAGE = b"ready"
-_RECV_BUFFER_SIZE = 1024
-
-
-def _run_with_sync(original_cmd):
-    """Run a command with socket-based synchronization and return the process."""
-    # Create a TCP socket for synchronization with better socket options
-    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sync_sock:
-        # Set SO_REUSEADDR to avoid "Address already in use" errors
-        sync_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-        sync_sock.bind(("127.0.0.1", 0))  # Let OS choose a free port
-        sync_port = sync_sock.getsockname()[1]
-        sync_sock.listen(1)
-        sync_sock.settimeout(_SYNC_TIMEOUT)
-
-        # Get current working directory to preserve it
-        cwd = os.getcwd()
-
-        # Build command using the sync coordinator
-        target_args = original_cmd[1:]  # Remove python executable
-        cmd = (sys.executable, "-m", "profiling.sampling._sync_coordinator", str(sync_port), cwd) + tuple(target_args)
-
-        # Start the process with coordinator
-        process = subprocess.Popen(cmd)
-
-        try:
-            # Wait for ready signal with timeout
-            with sync_sock.accept()[0] as conn:
-                ready_signal = conn.recv(_RECV_BUFFER_SIZE)
-
-                if ready_signal != _READY_MESSAGE:
-                    raise RuntimeError(f"Invalid ready signal received: {ready_signal!r}")
-
-        except socket.timeout:
-            # If we timeout, kill the process and raise an error
-            if process.poll() is None:
-                process.terminate()
-                try:
-                    process.wait(timeout=_PROCESS_KILL_TIMEOUT)
-                except subprocess.TimeoutExpired:
-                    process.kill()
-                    process.wait()
-            raise RuntimeError("Process failed to signal readiness within timeout")
-
-        return process
-
-
-
 
 class SampleProfiler:
-    def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL, native=False, gc=True, skip_non_matching_threads=True):
+    def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL, native=False, gc=True, skip_non_matching_threads=True, collect_stats=False):
         self.pid = pid
         self.sample_interval_usec = sample_interval_usec
         self.all_threads = all_threads
+        self.mode = mode  # Store mode for later use
+        self.collect_stats = collect_stats
         if _FREE_THREADED_BUILD:
             self.unwinder = _remote_debugging.RemoteUnwinder(
                 self.pid, all_threads=self.all_threads, mode=mode, native=native, gc=gc,
-                skip_non_matching_threads=skip_non_matching_threads
+                skip_non_matching_threads=skip_non_matching_threads, cache_frames=True,
+                stats=collect_stats
             )
         else:
             only_active_threads = bool(self.all_threads)
             self.unwinder = _remote_debugging.RemoteUnwinder(
                 self.pid, only_active_thread=only_active_threads, mode=mode, native=native, gc=gc,
-                skip_non_matching_threads=skip_non_matching_threads
+                skip_non_matching_threads=skip_non_matching_threads, cache_frames=True,
+                stats=collect_stats
             )
         # Track sample intervals and total sample count
         self.sample_intervals = deque(maxlen=100)
         self.total_samples = 0
         self.realtime_stats = False
 
-    def sample(self, collector, duration_sec=10):
+    def sample(self, collector, duration_sec=10, *, async_aware=False):
         sample_interval_sec = self.sample_interval_usec / 1_000_000
         running_time = 0
         num_samples = 0
@@ -166,63 +60,87 @@ def sample(self, collector, duration_sec=10):
         last_sample_time = start_time
         realtime_update_interval = 1.0  # Update every second
         last_realtime_update = start_time
+        interrupted = False
 
-        while running_time < duration_sec:
-            current_time = time.perf_counter()
-            if next_time < current_time:
-                try:
-                    stack_frames = self.unwinder.get_stack_trace()
-                    collector.collect(stack_frames)
-                except ProcessLookupError:
-                    duration_sec = current_time - start_time
+        try:
+            while running_time < duration_sec:
+                # Check if live collector wants to stop
+                if hasattr(collector, 'running') and not collector.running:
                     break
-                except (RuntimeError, UnicodeDecodeError, MemoryError, OSError):
-                    errors += 1
-                except Exception as e:
-                    if not self._is_process_running():
+
+                current_time = time.perf_counter()
+                if next_time < current_time:
+                    try:
+                        if async_aware == "all":
+                            stack_frames = self.unwinder.get_all_awaited_by()
+                        elif async_aware == "running":
+                            stack_frames = self.unwinder.get_async_stack_trace()
+                        else:
+                            stack_frames = self.unwinder.get_stack_trace()
+                        collector.collect(stack_frames)
+                    except ProcessLookupError:
+                        duration_sec = current_time - start_time
                         break
-                    raise e from None
+                    except (RuntimeError, UnicodeDecodeError, MemoryError, OSError):
+                        collector.collect_failed_sample()
+                        errors += 1
+                    except Exception as e:
+                        if not self._is_process_running():
+                            break
+                        raise e from None
 
-                # Track actual sampling intervals for real-time stats
-                if num_samples > 0:
-                    actual_interval = current_time - last_sample_time
-                    self.sample_intervals.append(
-                        1.0 / actual_interval
-                    )  # Convert to Hz
-                    self.total_samples += 1
+                    # Track actual sampling intervals for real-time stats
+                    if num_samples > 0:
+                        actual_interval = current_time - last_sample_time
+                        self.sample_intervals.append(
+                            1.0 / actual_interval
+                        )  # Convert to Hz
+                        self.total_samples += 1
 
-                    # Print real-time statistics if enabled
-                    if (
-                        self.realtime_stats
-                        and (current_time - last_realtime_update)
-                        >= realtime_update_interval
-                    ):
-                        self._print_realtime_stats()
-                        last_realtime_update = current_time
+                        # Print real-time statistics if enabled
+                        if (
+                            self.realtime_stats
+                            and (current_time - last_realtime_update)
+                            >= realtime_update_interval
+                        ):
+                            self._print_realtime_stats()
+                            last_realtime_update = current_time
 
-                last_sample_time = current_time
-                num_samples += 1
-                next_time += sample_interval_sec
+                    last_sample_time = current_time
+                    num_samples += 1
+                    next_time += sample_interval_sec
 
+                running_time = time.perf_counter() - start_time
+        except KeyboardInterrupt:
+            interrupted = True
             running_time = time.perf_counter() - start_time
+            print("Interrupted by user.")
 
         # Clear real-time stats line if it was being displayed
         if self.realtime_stats and len(self.sample_intervals) > 0:
             print()  # Add newline after real-time stats
 
-        sample_rate = num_samples / running_time
+        sample_rate = num_samples / running_time if running_time > 0 else 0
         error_rate = (errors / num_samples) * 100 if num_samples > 0 else 0
+        expected_samples = int(duration_sec / sample_interval_sec)
+        missed_samples = (expected_samples - num_samples) / expected_samples * 100 if expected_samples > 0 else 0
 
-        print(f"Captured {num_samples} samples in {running_time:.2f} seconds")
-        print(f"Sample rate: {sample_rate:.2f} samples/sec")
-        print(f"Error rate: {error_rate:.2f}%")
+        # Don't print stats for live mode (curses is handling display)
+        is_live_mode = LiveStatsCollector is not None and isinstance(collector, LiveStatsCollector)
+        if not is_live_mode:
+            print(f"Captured {num_samples} samples in {running_time:.2f} seconds")
+            print(f"Sample rate: {sample_rate:.2f} samples/sec")
+            print(f"Error rate: {error_rate:.2f}%")
+
+            # Print unwinder stats if stats collection is enabled
+            if self.collect_stats:
+                self._print_unwinder_stats()
 
         # Pass stats to flamegraph collector if it's the right type
         if hasattr(collector, 'set_stats'):
-            collector.set_stats(self.sample_interval_usec, running_time, sample_rate, error_rate)
+            collector.set_stats(self.sample_interval_usec, running_time, sample_rate, error_rate, missed_samples, mode=self.mode)
 
-        expected_samples = int(duration_sec / sample_interval_sec)
-        if num_samples < expected_samples:
+        if num_samples < expected_samples and not is_live_mode and not interrupted:
             print(
                 f"Warning: missed {expected_samples - num_samples} samples "
                 f"from the expected total of {expected_samples} "
@@ -265,712 +183,225 @@ def _print_realtime_stats(self):
             (1.0 / min_hz) * 1_000_000 if min_hz > 0 else 0
         )  # Max time = Min Hz
 
+        # Build cache stats string if stats collection is enabled
+        cache_stats_str = ""
+        if self.collect_stats:
+            try:
+                stats = self.unwinder.get_stats()
+                hits = stats.get('frame_cache_hits', 0)
+                partial = stats.get('frame_cache_partial_hits', 0)
+                misses = stats.get('frame_cache_misses', 0)
+                total = hits + partial + misses
+                if total > 0:
+                    hit_pct = (hits + partial) / total * 100
+                    cache_stats_str = f" {ANSIColors.MAGENTA}Cache: {hit_pct:.1f}% ({hits}+{partial}/{misses}){ANSIColors.RESET}"
+            except RuntimeError:
+                pass
+
         # Clear line and print stats
         print(
-            f"\r\033[K{ANSIColors.BOLD_BLUE}Real-time sampling stats:{ANSIColors.RESET} "
-            f"{ANSIColors.YELLOW}Mean: {mean_hz:.1f}Hz ({mean_us_per_sample:.2f}µs){ANSIColors.RESET} "
-            f"{ANSIColors.GREEN}Min: {min_hz:.1f}Hz ({max_us_per_sample:.2f}µs){ANSIColors.RESET} "
-            f"{ANSIColors.RED}Max: {max_hz:.1f}Hz ({min_us_per_sample:.2f}µs){ANSIColors.RESET} "
-            f"{ANSIColors.CYAN}Samples: {self.total_samples}{ANSIColors.RESET}",
+            f"\r\033[K{ANSIColors.BOLD_BLUE}Stats:{ANSIColors.RESET} "
+            f"{ANSIColors.YELLOW}{mean_hz:.1f}Hz ({mean_us_per_sample:.1f}µs){ANSIColors.RESET} "
+            f"{ANSIColors.GREEN}Min: {min_hz:.1f}Hz{ANSIColors.RESET} "
+            f"{ANSIColors.RED}Max: {max_hz:.1f}Hz{ANSIColors.RESET} "
+            f"{ANSIColors.CYAN}N={self.total_samples}{ANSIColors.RESET}"
+            f"{cache_stats_str}",
             end="",
             flush=True,
         )
 
+    def _print_unwinder_stats(self):
+        """Print unwinder statistics including cache performance."""
+        try:
+            stats = self.unwinder.get_stats()
+        except RuntimeError:
+            return  # Stats not enabled
 
-def _determine_best_unit(max_value):
-    """Determine the best unit (s, ms, μs) and scale factor for a maximum value."""
-    if max_value >= 1.0:
-        return "s", 1.0
-    elif max_value >= 0.001:
-        return "ms", 1000.0
-    else:
-        return "μs", 1000000.0
+        print(f"\n{ANSIColors.BOLD_BLUE}{'='*50}{ANSIColors.RESET}")
+        print(f"{ANSIColors.BOLD_BLUE}Unwinder Statistics:{ANSIColors.RESET}")
 
+        # Frame cache stats
+        total_samples = stats.get('total_samples', 0)
+        frame_cache_hits = stats.get('frame_cache_hits', 0)
+        frame_cache_partial_hits = stats.get('frame_cache_partial_hits', 0)
+        frame_cache_misses = stats.get('frame_cache_misses', 0)
+        total_lookups = frame_cache_hits + frame_cache_partial_hits + frame_cache_misses
 
-def print_sampled_stats(
-    stats, sort=-1, limit=None, show_summary=True, sample_interval_usec=100
-):
-    # Get the stats data
-    stats_list = []
-    for func, (
-        direct_calls,
-        cumulative_calls,
-        total_time,
-        cumulative_time,
-        callers,
-    ) in stats.stats.items():
-        stats_list.append(
-            (
-                func,
-                direct_calls,
-                cumulative_calls,
-                total_time,
-                cumulative_time,
-                callers,
-            )
-        )
-
-    # Calculate total samples for percentage calculations (using direct_calls)
-    total_samples = sum(
-        direct_calls for _, direct_calls, _, _, _, _ in stats_list
-    )
-
-    # Sort based on the requested field
-    sort_field = sort
-    if sort_field == -1:  # stdname
-        stats_list.sort(key=lambda x: str(x[0]))
-    elif sort_field == 0:  # nsamples (direct samples)
-        stats_list.sort(key=lambda x: x[1], reverse=True)  # direct_calls
-    elif sort_field == 1:  # tottime
-        stats_list.sort(key=lambda x: x[3], reverse=True)  # total_time
-    elif sort_field == 2:  # cumtime
-        stats_list.sort(key=lambda x: x[4], reverse=True)  # cumulative_time
-    elif sort_field == 3:  # sample%
-        stats_list.sort(
-            key=lambda x: (x[1] / total_samples * 100)
-            if total_samples > 0
-            else 0,
-            reverse=True,  # direct_calls percentage
-        )
-    elif sort_field == 4:  # cumul%
-        stats_list.sort(
-            key=lambda x: (x[2] / total_samples * 100)
-            if total_samples > 0
-            else 0,
-            reverse=True,  # cumulative_calls percentage
-        )
-    elif sort_field == 5:  # nsamples (cumulative samples)
-        stats_list.sort(key=lambda x: x[2], reverse=True)  # cumulative_calls
-
-    # Apply limit if specified
-    if limit is not None:
-        stats_list = stats_list[:limit]
-
-    # Determine the best unit for time columns based on maximum values
-    max_total_time = max(
-        (total_time for _, _, _, total_time, _, _ in stats_list), default=0
-    )
-    max_cumulative_time = max(
-        (cumulative_time for _, _, _, _, cumulative_time, _ in stats_list),
-        default=0,
-    )
-
-    total_time_unit, total_time_scale = _determine_best_unit(max_total_time)
-    cumulative_time_unit, cumulative_time_scale = _determine_best_unit(
-        max_cumulative_time
-    )
-
-    # Define column widths for consistent alignment
-    col_widths = {
-        "nsamples": 15,  # "nsamples" column (inline/cumulative format)
-        "sample_pct": 8,  # "sample%" column
-        "tottime": max(12, len(f"tottime ({total_time_unit})")),
-        "cum_pct": 8,  # "cumul%" column
-        "cumtime": max(12, len(f"cumtime ({cumulative_time_unit})")),
-    }
-
-    # Print header with colors and proper alignment
-    print(f"{ANSIColors.BOLD_BLUE}Profile Stats:{ANSIColors.RESET}")
-
-    header_nsamples = f"{ANSIColors.BOLD_BLUE}{'nsamples':>{col_widths['nsamples']}}{ANSIColors.RESET}"
-    header_sample_pct = f"{ANSIColors.BOLD_BLUE}{'sample%':>{col_widths['sample_pct']}}{ANSIColors.RESET}"
-    header_tottime = f"{ANSIColors.BOLD_BLUE}{f'tottime ({total_time_unit})':>{col_widths['tottime']}}{ANSIColors.RESET}"
-    header_cum_pct = f"{ANSIColors.BOLD_BLUE}{'cumul%':>{col_widths['cum_pct']}}{ANSIColors.RESET}"
-    header_cumtime = f"{ANSIColors.BOLD_BLUE}{f'cumtime ({cumulative_time_unit})':>{col_widths['cumtime']}}{ANSIColors.RESET}"
-    header_filename = (
-        f"{ANSIColors.BOLD_BLUE}filename:lineno(function){ANSIColors.RESET}"
-    )
-
-    print(
-        f"{header_nsamples}  {header_sample_pct}  {header_tottime}  {header_cum_pct}  {header_cumtime}  {header_filename}"
-    )
-
-    # Print each line with proper alignment
-    for (
-        func,
-        direct_calls,
-        cumulative_calls,
-        total_time,
-        cumulative_time,
-        callers,
-    ) in stats_list:
         # Calculate percentages
-        sample_pct = (
-            (direct_calls / total_samples * 100) if total_samples > 0 else 0
-        )
-        cum_pct = (
-            (cumulative_calls / total_samples * 100)
-            if total_samples > 0
-            else 0
-        )
+        hits_pct = (frame_cache_hits / total_lookups * 100) if total_lookups > 0 else 0
+        partial_pct = (frame_cache_partial_hits / total_lookups * 100) if total_lookups > 0 else 0
+        misses_pct = (frame_cache_misses / total_lookups * 100) if total_lookups > 0 else 0
 
-        # Format values with proper alignment - always use A/B format
-        nsamples_str = f"{direct_calls}/{cumulative_calls}"
-        nsamples_str = f"{nsamples_str:>{col_widths['nsamples']}}"
-        sample_pct_str = f"{sample_pct:{col_widths['sample_pct']}.1f}"
-        tottime = f"{total_time * total_time_scale:{col_widths['tottime']}.3f}"
-        cum_pct_str = f"{cum_pct:{col_widths['cum_pct']}.1f}"
-        cumtime = f"{cumulative_time * cumulative_time_scale:{col_widths['cumtime']}.3f}"
+        print(f"  {ANSIColors.CYAN}Frame Cache:{ANSIColors.RESET}")
+        print(f"    Total samples:    {total_samples:,}")
+        print(f"    Full hits:        {frame_cache_hits:,} ({ANSIColors.GREEN}{hits_pct:.1f}%{ANSIColors.RESET})")
+        print(f"    Partial hits:     {frame_cache_partial_hits:,} ({ANSIColors.YELLOW}{partial_pct:.1f}%{ANSIColors.RESET})")
+        print(f"    Misses:           {frame_cache_misses:,} ({ANSIColors.RED}{misses_pct:.1f}%{ANSIColors.RESET})")
 
-        # Format the function name with colors
-        func_name = (
-            f"{ANSIColors.GREEN}{func[0]}{ANSIColors.RESET}:"
-            f"{ANSIColors.YELLOW}{func[1]}{ANSIColors.RESET}("
-            f"{ANSIColors.CYAN}{func[2]}{ANSIColors.RESET})"
-        )
+        # Frame read stats
+        frames_from_cache = stats.get('frames_read_from_cache', 0)
+        frames_from_memory = stats.get('frames_read_from_memory', 0)
+        total_frames = frames_from_cache + frames_from_memory
+        cache_frame_pct = (frames_from_cache / total_frames * 100) if total_frames > 0 else 0
+        memory_frame_pct = (frames_from_memory / total_frames * 100) if total_frames > 0 else 0
 
-        # Print the formatted line with consistent spacing
-        print(
-            f"{nsamples_str}  {sample_pct_str}  {tottime}  {cum_pct_str}  {cumtime}  {func_name}"
-        )
+        print(f"  {ANSIColors.CYAN}Frame Reads:{ANSIColors.RESET}")
+        print(f"    From cache:       {frames_from_cache:,} ({ANSIColors.GREEN}{cache_frame_pct:.1f}%{ANSIColors.RESET})")
+        print(f"    From memory:      {frames_from_memory:,} ({ANSIColors.RED}{memory_frame_pct:.1f}%{ANSIColors.RESET})")
 
-    # Print legend
-    print(f"\n{ANSIColors.BOLD_BLUE}Legend:{ANSIColors.RESET}")
-    print(
-        f"  {ANSIColors.YELLOW}nsamples{ANSIColors.RESET}: Direct/Cumulative samples (direct executing / on call stack)"
-    )
-    print(
-        f"  {ANSIColors.YELLOW}sample%{ANSIColors.RESET}: Percentage of total samples this function was directly executing"
-    )
-    print(
-        f"  {ANSIColors.YELLOW}tottime{ANSIColors.RESET}: Estimated total time spent directly in this function"
-    )
-    print(
-        f"  {ANSIColors.YELLOW}cumul%{ANSIColors.RESET}: Percentage of total samples when this function was on the call stack"
-    )
-    print(
-        f"  {ANSIColors.YELLOW}cumtime{ANSIColors.RESET}: Estimated cumulative time (including time in called functions)"
-    )
-    print(
-        f"  {ANSIColors.YELLOW}filename:lineno(function){ANSIColors.RESET}: Function location and name"
-    )
+        # Code object cache stats
+        code_hits = stats.get('code_object_cache_hits', 0)
+        code_misses = stats.get('code_object_cache_misses', 0)
+        total_code = code_hits + code_misses
+        code_hits_pct = (code_hits / total_code * 100) if total_code > 0 else 0
+        code_misses_pct = (code_misses / total_code * 100) if total_code > 0 else 0
 
-    def _format_func_name(func):
-        """Format function name with colors."""
-        return (
-            f"{ANSIColors.GREEN}{func[0]}{ANSIColors.RESET}:"
-            f"{ANSIColors.YELLOW}{func[1]}{ANSIColors.RESET}("
-            f"{ANSIColors.CYAN}{func[2]}{ANSIColors.RESET})"
-        )
+        print(f"  {ANSIColors.CYAN}Code Object Cache:{ANSIColors.RESET}")
+        print(f"    Hits:             {code_hits:,} ({ANSIColors.GREEN}{code_hits_pct:.1f}%{ANSIColors.RESET})")
+        print(f"    Misses:           {code_misses:,} ({ANSIColors.RED}{code_misses_pct:.1f}%{ANSIColors.RESET})")
 
-    def _print_top_functions(stats_list, title, key_func, format_line, n=3):
-        """Print top N functions sorted by key_func with formatted output."""
-        print(f"\n{ANSIColors.BOLD_BLUE}{title}:{ANSIColors.RESET}")
-        sorted_stats = sorted(stats_list, key=key_func, reverse=True)
-        for stat in sorted_stats[:n]:
-            if line := format_line(stat):
-                print(f"  {line}")
+        # Memory operations
+        memory_reads = stats.get('memory_reads', 0)
+        memory_bytes = stats.get('memory_bytes_read', 0)
+        if memory_bytes >= 1024 * 1024:
+            memory_str = f"{memory_bytes / (1024 * 1024):.1f} MB"
+        elif memory_bytes >= 1024:
+            memory_str = f"{memory_bytes / 1024:.1f} KB"
+        else:
+            memory_str = f"{memory_bytes} B"
+        print(f"  {ANSIColors.CYAN}Memory:{ANSIColors.RESET}")
+        print(f"    Read operations:  {memory_reads:,} ({memory_str})")
 
-    # Print summary of interesting functions if enabled
-    if show_summary and stats_list:
-        print(
-            f"\n{ANSIColors.BOLD_BLUE}Summary of Interesting Functions:{ANSIColors.RESET}"
-        )
-
-        # Aggregate stats by fully qualified function name (ignoring line numbers)
-        func_aggregated = {}
-        for (
-            func,
-            direct_calls,
-            cumulative_calls,
-            total_time,
-            cumulative_time,
-            callers,
-        ) in stats_list:
-            # Use filename:function_name as the key to get fully qualified name
-            qualified_name = f"{func[0]}:{func[2]}"
-            if qualified_name not in func_aggregated:
-                func_aggregated[qualified_name] = [
-                    0,
-                    0,
-                    0,
-                    0,
-                ]  # direct_calls, cumulative_calls, total_time, cumulative_time
-            func_aggregated[qualified_name][0] += direct_calls
-            func_aggregated[qualified_name][1] += cumulative_calls
-            func_aggregated[qualified_name][2] += total_time
-            func_aggregated[qualified_name][3] += cumulative_time
-
-        # Convert aggregated data back to list format for processing
-        aggregated_stats = []
-        for qualified_name, (
-            prim_calls,
-            total_calls,
-            total_time,
-            cumulative_time,
-        ) in func_aggregated.items():
-            # Parse the qualified name back to filename and function name
-            if ":" in qualified_name:
-                filename, func_name = qualified_name.rsplit(":", 1)
-            else:
-                filename, func_name = "", qualified_name
-            # Create a dummy func tuple with filename and function name for display
-            dummy_func = (filename, "", func_name)
-            aggregated_stats.append(
-                (
-                    dummy_func,
-                    prim_calls,
-                    total_calls,
-                    total_time,
-                    cumulative_time,
-                    {},
-                )
-            )
-
-        # Determine best units for summary metrics
-        max_total_time = max(
-            (total_time for _, _, _, total_time, _, _ in aggregated_stats),
-            default=0,
-        )
-        max_cumulative_time = max(
-            (
-                cumulative_time
-                for _, _, _, _, cumulative_time, _ in aggregated_stats
-            ),
-            default=0,
-        )
-
-        total_unit, total_scale = _determine_best_unit(max_total_time)
-        cumulative_unit, cumulative_scale = _determine_best_unit(
-            max_cumulative_time
-        )
-
-        # Functions with highest direct/cumulative ratio (hot spots)
-        def format_hotspots(stat):
-            func, direct_calls, cumulative_calls, total_time, _, _ = stat
-            if direct_calls > 0 and cumulative_calls > 0:
-                ratio = direct_calls / cumulative_calls
-                direct_pct = (
-                    (direct_calls / total_samples * 100)
-                    if total_samples > 0
-                    else 0
-                )
-                return (
-                    f"{ratio:.3f} direct/cumulative ratio, "
-                    f"{direct_pct:.1f}% direct samples: {_format_func_name(func)}"
-                )
-            return None
-
-        _print_top_functions(
-            aggregated_stats,
-            "Functions with Highest Direct/Cumulative Ratio (Hot Spots)",
-            key_func=lambda x: (x[1] / x[2]) if x[2] > 0 else 0,
-            format_line=format_hotspots,
-        )
-
-        # Functions with highest call frequency (cumulative/direct difference)
-        def format_call_frequency(stat):
-            func, direct_calls, cumulative_calls, total_time, _, _ = stat
-            if cumulative_calls > direct_calls:
-                call_frequency = cumulative_calls - direct_calls
-                cum_pct = (
-                    (cumulative_calls / total_samples * 100)
-                    if total_samples > 0
-                    else 0
-                )
-                return (
-                    f"{call_frequency:d} indirect calls, "
-                    f"{cum_pct:.1f}% total stack presence: {_format_func_name(func)}"
-                )
-            return None
-
-        _print_top_functions(
-            aggregated_stats,
-            "Functions with Highest Call Frequency (Indirect Calls)",
-            key_func=lambda x: x[2] - x[1],  # Sort by (cumulative - direct)
-            format_line=format_call_frequency,
-        )
-
-        # Functions with highest cumulative-to-direct multiplier (call magnification)
-        def format_call_magnification(stat):
-            func, direct_calls, cumulative_calls, total_time, _, _ = stat
-            if direct_calls > 0 and cumulative_calls > direct_calls:
-                multiplier = cumulative_calls / direct_calls
-                indirect_calls = cumulative_calls - direct_calls
-                return (
-                    f"{multiplier:.1f}x call magnification, "
-                    f"{indirect_calls:d} indirect calls from {direct_calls:d} direct: {_format_func_name(func)}"
-                )
-            return None
-
-        _print_top_functions(
-            aggregated_stats,
-            "Functions with Highest Call Magnification (Cumulative/Direct)",
-            key_func=lambda x: (x[2] / x[1])
-            if x[1] > 0
-            else 0,  # Sort by cumulative/direct ratio
-            format_line=format_call_magnification,
-        )
+        # Stale invalidations
+        stale_invalidations = stats.get('stale_cache_invalidations', 0)
+        if stale_invalidations > 0:
+            print(f"  {ANSIColors.YELLOW}Stale cache invalidations: {stale_invalidations}{ANSIColors.RESET}")
 
 
 def sample(
     pid,
+    collector,
     *,
-    sort=2,
-    sample_interval_usec=100,
     duration_sec=10,
-    filename=None,
     all_threads=False,
-    limit=None,
-    show_summary=True,
-    output_format="pstats",
     realtime_stats=False,
     mode=PROFILING_MODE_WALL,
+    async_aware=None,
     native=False,
     gc=True,
 ):
+    """Sample a process using the provided collector.
+
+    Args:
+        pid: Process ID to sample
+        collector: Collector instance to use for gathering samples
+        duration_sec: How long to sample for (seconds)
+        all_threads: Whether to sample all threads
+        realtime_stats: Whether to print real-time sampling statistics
+        mode: Profiling mode - WALL (all samples), CPU (only when on CPU),
+              GIL (only when holding GIL), ALL (includes GIL and CPU status)
+        native: Whether to include native frames
+        gc: Whether to include GC frames
+
+    Returns:
+        The collector with collected samples
+    """
+    # Get sample interval from collector
+    sample_interval_usec = collector.sample_interval_usec
+
     # PROFILING_MODE_ALL implies no skipping at all
     if mode == PROFILING_MODE_ALL:
         skip_non_matching_threads = False
-        skip_idle = False
     else:
-        # Determine skip settings based on output format and mode
-        skip_non_matching_threads = output_format != "gecko"
-        skip_idle = mode != PROFILING_MODE_WALL
+        # For most modes, skip non-matching threads
+        # Gecko collector overrides this by setting skip_idle=False
+        skip_non_matching_threads = True
 
     profiler = SampleProfiler(
-        pid, sample_interval_usec, all_threads=all_threads, mode=mode, native=native, gc=gc,
-        skip_non_matching_threads=skip_non_matching_threads
+        pid,
+        sample_interval_usec,
+        all_threads=all_threads,
+        mode=mode,
+        native=native,
+        gc=gc,
+        skip_non_matching_threads=skip_non_matching_threads,
+        collect_stats=realtime_stats,
     )
     profiler.realtime_stats = realtime_stats
 
-    collector = None
-    match output_format:
-        case "pstats":
-            collector = PstatsCollector(sample_interval_usec, skip_idle=skip_idle)
-        case "collapsed":
-            collector = CollapsedStackCollector(skip_idle=skip_idle)
-            filename = filename or f"collapsed.{pid}.txt"
-        case "flamegraph":
-            collector = FlamegraphCollector(skip_idle=skip_idle)
-            filename = filename or f"flamegraph.{pid}.html"
-        case "gecko":
-            # Gecko format never skips idle threads to show full thread states
-            collector = GeckoCollector(skip_idle=False)
-            filename = filename or f"gecko.{pid}.json"
-        case _:
-            raise ValueError(f"Invalid output format: {output_format}")
+    # Run the sampling
+    profiler.sample(collector, duration_sec, async_aware=async_aware)
 
-    profiler.sample(collector, duration_sec)
+    return collector
 
-    if output_format == "pstats" and not filename:
-        stats = pstats.SampledStats(collector).strip_dirs()
-        if not stats.stats:
-            print("No samples were collected.")
-            if mode == PROFILING_MODE_CPU:
-                print("This can happen in CPU mode when all threads are idle.")
-        else:
-            print_sampled_stats(
-                stats, sort, limit, show_summary, sample_interval_usec
-            )
+
+def sample_live(
+    pid,
+    collector,
+    *,
+    duration_sec=10,
+    all_threads=False,
+    realtime_stats=False,
+    mode=PROFILING_MODE_WALL,
+    async_aware=None,
+    native=False,
+    gc=True,
+):
+    """Sample a process in live/interactive mode with curses TUI.
+
+    Args:
+        pid: Process ID to sample
+        collector: LiveStatsCollector instance
+        duration_sec: How long to sample for (seconds)
+        all_threads: Whether to sample all threads
+        realtime_stats: Whether to print real-time sampling statistics
+        mode: Profiling mode - WALL (all samples), CPU (only when on CPU),
+              GIL (only when holding GIL), ALL (includes GIL and CPU status)
+        native: Whether to include native frames
+        gc: Whether to include GC frames
+
+    Returns:
+        The collector with collected samples
+    """
+    import curses
+
+    # Get sample interval from collector
+    sample_interval_usec = collector.sample_interval_usec
+
+    # PROFILING_MODE_ALL implies no skipping at all
+    if mode == PROFILING_MODE_ALL:
+        skip_non_matching_threads = False
     else:
-        collector.export(filename)
+        skip_non_matching_threads = True
 
-
-def _validate_collapsed_format_args(args, parser):
-    # Check for incompatible pstats options
-    invalid_opts = []
-
-    # Get list of pstats-specific options
-    pstats_options = {"sort": None, "limit": None, "no_summary": False}
-
-    # Find the default values from the argument definitions
-    for action in parser._actions:
-        if action.dest in pstats_options and hasattr(action, "default"):
-            pstats_options[action.dest] = action.default
-
-    # Check if any pstats-specific options were provided by comparing with defaults
-    for opt, default in pstats_options.items():
-        if getattr(args, opt) != default:
-            invalid_opts.append(opt.replace("no_", ""))
-
-    if invalid_opts:
-        parser.error(
-            f"The following options are only valid with --pstats format: {', '.join(invalid_opts)}"
-        )
-
-    # Set default output filename for collapsed format only if we have a PID
-    # For module/script execution, this will be set later with the subprocess PID
-    if not args.outfile and args.pid is not None:
-        args.outfile = f"collapsed.{args.pid}.txt"
-
-
-def wait_for_process_and_sample(pid, sort_value, args):
-    """Sample the process immediately since it has already signaled readiness."""
-    # Set default filename with subprocess PID if not already set
-    filename = args.outfile
-    if not filename:
-        if args.format == "collapsed":
-            filename = f"collapsed.{pid}.txt"
-        elif args.format == "gecko":
-            filename = f"gecko.{pid}.json"
-
-    mode = _parse_mode(args.mode)
-
-    sample(
+    profiler = SampleProfiler(
         pid,
-        sort=sort_value,
-        sample_interval_usec=args.interval,
-        duration_sec=args.duration,
-        filename=filename,
-        all_threads=args.all_threads,
-        limit=args.limit,
-        show_summary=not args.no_summary,
-        output_format=args.format,
-        realtime_stats=args.realtime_stats,
+        sample_interval_usec,
+        all_threads=all_threads,
         mode=mode,
-        native=args.native,
-        gc=args.gc,
+        native=native,
+        gc=gc,
+        skip_non_matching_threads=skip_non_matching_threads,
+        collect_stats=realtime_stats,
     )
+    profiler.realtime_stats = realtime_stats
 
-
-def main():
-    # Create the main parser
-    parser = argparse.ArgumentParser(
-        description=_HELP_DESCRIPTION,
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-    )
-
-    # Target selection
-    target_group = parser.add_mutually_exclusive_group(required=False)
-    target_group.add_argument(
-        "-p", "--pid", type=int, help="Process ID to sample"
-    )
-    target_group.add_argument(
-        "-m", "--module",
-        help="Run and profile a module as python -m module [ARGS...]"
-    )
-    parser.add_argument(
-        "args",
-        nargs=argparse.REMAINDER,
-        help="Script to run and profile, with optional arguments"
-    )
-
-    # Sampling options
-    sampling_group = parser.add_argument_group("Sampling configuration")
-    sampling_group.add_argument(
-        "-i",
-        "--interval",
-        type=int,
-        default=100,
-        help="Sampling interval in microseconds (default: 100)",
-    )
-    sampling_group.add_argument(
-        "-d",
-        "--duration",
-        type=int,
-        default=10,
-        help="Sampling duration in seconds (default: 10)",
-    )
-    sampling_group.add_argument(
-        "-a",
-        "--all-threads",
-        action="store_true",
-        help="Sample all threads in the process instead of just the main thread",
-    )
-    sampling_group.add_argument(
-        "--realtime-stats",
-        action="store_true",
-        help="Print real-time sampling statistics (Hz, mean, min, max, stdev) during profiling",
-    )
-    sampling_group.add_argument(
-        "--native",
-        action="store_true",
-        help="Include artificial \"<native>\" frames to denote calls to non-Python code.",
-    )
-    sampling_group.add_argument(
-        "--no-gc",
-        action="store_false",
-        dest="gc",
-        help="Don't include artificial \"<GC>\" frames to denote active garbage collection.",
-    )
-
-    # Mode options
-    mode_group = parser.add_argument_group("Mode options")
-    mode_group.add_argument(
-        "--mode",
-        choices=["wall", "cpu", "gil"],
-        default="wall",
-        help="Sampling mode: wall (all threads), cpu (only CPU-running threads), gil (only GIL-holding threads) (default: wall)",
-    )
-
-    # Output format selection
-    output_group = parser.add_argument_group("Output options")
-    output_format = output_group.add_mutually_exclusive_group()
-    output_format.add_argument(
-        "--pstats",
-        action="store_const",
-        const="pstats",
-        dest="format",
-        default="pstats",
-        help="Generate pstats output (default)",
-    )
-    output_format.add_argument(
-        "--collapsed",
-        action="store_const",
-        const="collapsed",
-        dest="format",
-        help="Generate collapsed stack traces for flamegraphs",
-    )
-    output_format.add_argument(
-        "--flamegraph",
-        action="store_const",
-        const="flamegraph",
-        dest="format",
-        help="Generate HTML flamegraph visualization",
-    )
-    output_format.add_argument(
-        "--gecko",
-        action="store_const",
-        const="gecko",
-        dest="format",
-        help="Generate Gecko format for Firefox Profiler",
-    )
-
-    output_group.add_argument(
-        "-o",
-        "--outfile",
-        help="Save output to a file (if omitted, prints to stdout for pstats, "
-        "or saves to collapsed.<pid>.txt or flamegraph.<pid>.html for the "
-        "respective output formats)"
-    )
-
-    # pstats-specific options
-    pstats_group = parser.add_argument_group("pstats format options")
-    sort_group = pstats_group.add_mutually_exclusive_group()
-    sort_group.add_argument(
-        "--sort-nsamples",
-        action="store_const",
-        const=0,
-        dest="sort",
-        help="Sort by number of direct samples (nsamples column)",
-    )
-    sort_group.add_argument(
-        "--sort-tottime",
-        action="store_const",
-        const=1,
-        dest="sort",
-        help="Sort by total time (tottime column)",
-    )
-    sort_group.add_argument(
-        "--sort-cumtime",
-        action="store_const",
-        const=2,
-        dest="sort",
-        help="Sort by cumulative time (cumtime column, default)",
-    )
-    sort_group.add_argument(
-        "--sort-sample-pct",
-        action="store_const",
-        const=3,
-        dest="sort",
-        help="Sort by sample percentage (sample%% column)",
-    )
-    sort_group.add_argument(
-        "--sort-cumul-pct",
-        action="store_const",
-        const=4,
-        dest="sort",
-        help="Sort by cumulative sample percentage (cumul%% column)",
-    )
-    sort_group.add_argument(
-        "--sort-nsamples-cumul",
-        action="store_const",
-        const=5,
-        dest="sort",
-        help="Sort by cumulative samples (nsamples column, cumulative part)",
-    )
-    sort_group.add_argument(
-        "--sort-name",
-        action="store_const",
-        const=-1,
-        dest="sort",
-        help="Sort by function name",
-    )
-
-    pstats_group.add_argument(
-        "-l",
-        "--limit",
-        type=int,
-        help="Limit the number of rows in the output",
-        default=15,
-    )
-    pstats_group.add_argument(
-        "--no-summary",
-        action="store_true",
-        help="Disable the summary section in the output",
-    )
-
-    args = parser.parse_args()
-
-    # Validate format-specific arguments
-    if args.format in ("collapsed", "gecko"):
-        _validate_collapsed_format_args(args, parser)
-
-    # Validate that --mode is not used with --gecko
-    if args.format == "gecko" and args.mode != "wall":
-        parser.error("--mode option is incompatible with --gecko format. Gecko format automatically uses ALL mode (GIL + CPU analysis).")
-
-    sort_value = args.sort if args.sort is not None else 2
-
-    if args.module is not None and not args.module:
-        parser.error("argument -m/--module: expected one argument")
-
-    # Validate that we have exactly one target type
-    # Note: args can be present with -m (module arguments) but not as standalone script
-    has_pid = args.pid is not None
-    has_module = args.module is not None
-    has_script = bool(args.args) and args.module is None
-
-    target_count = sum([has_pid, has_module, has_script])
-
-    if target_count == 0:
-        parser.error("one of the arguments -p/--pid -m/--module or script name is required")
-    elif target_count > 1:
-        parser.error("only one target type can be specified: -p/--pid, -m/--module, or script")
-
-    # Use PROFILING_MODE_ALL for gecko format, otherwise parse user's choice
-    if args.format == "gecko":
-        mode = PROFILING_MODE_ALL
-    else:
-        mode = _parse_mode(args.mode)
-
-    if args.pid:
-        sample(
-            args.pid,
-            sample_interval_usec=args.interval,
-            duration_sec=args.duration,
-            filename=args.outfile,
-            all_threads=args.all_threads,
-            limit=args.limit,
-            sort=sort_value,
-            show_summary=not args.no_summary,
-            output_format=args.format,
-            realtime_stats=args.realtime_stats,
-            mode=mode,
-            native=args.native,
-            gc=args.gc,
-        )
-    elif args.module or args.args:
-        if args.module:
-            cmd = (sys.executable, "-m", args.module, *args.args)
-        else:
-            cmd = (sys.executable, *args.args)
-
-        # Use synchronized process startup
-        process = _run_with_sync(cmd)
-
-        # Process has already signaled readiness, start sampling immediately
+    def curses_wrapper_func(stdscr):
+        collector.init_curses(stdscr)
         try:
-            wait_for_process_and_sample(process.pid, sort_value, args)
+            profiler.sample(collector, duration_sec, async_aware=async_aware)
+            # Mark as finished and keep the TUI running until user presses 'q'
+            collector.mark_finished()
+            # Keep processing input until user quits
+            while collector.running:
+                collector._handle_input()
+                time.sleep(0.05)  # Small sleep to avoid busy waiting
         finally:
-            if process.poll() is None:
-                process.terminate()
-                try:
-                    process.wait(timeout=2)
-                except subprocess.TimeoutExpired:
-                    process.kill()
-                    process.wait()
+            collector.cleanup_curses()
 
-if __name__ == "__main__":
-    main()
+    try:
+        curses.wrapper(curses_wrapper_func)
+    except KeyboardInterrupt:
+        pass
+
+    return collector
diff --git a/Lib/profiling/sampling/stack_collector.py b/Lib/profiling/sampling/stack_collector.py
index 1436811976a..1f766682858 100644
--- a/Lib/profiling/sampling/stack_collector.py
+++ b/Lib/profiling/sampling/stack_collector.py
@@ -6,19 +6,29 @@
 import linecache
 import os
 
+from ._css_utils import get_combined_css
 from .collector import Collector
 from .string_table import StringTable
 
 
 class StackTraceCollector(Collector):
-    def __init__(self, *, skip_idle=False):
+    def __init__(self, sample_interval_usec, *, skip_idle=False):
+        self.sample_interval_usec = sample_interval_usec
         self.skip_idle = skip_idle
 
     def collect(self, stack_frames, skip_idle=False):
-        for frames, thread_id in self._iter_all_frames(stack_frames, skip_idle=skip_idle):
-            if not frames:
-                continue
-            self.process_frames(frames, thread_id)
+        if stack_frames and hasattr(stack_frames[0], "awaited_by"):
+            # Async-aware mode: process async task frames
+            for frames, thread_id, task_id in self._iter_async_frames(stack_frames):
+                if not frames:
+                    continue
+                self.process_frames(frames, thread_id)
+        else:
+            # Sync-only mode
+            for frames, thread_id in self._iter_all_frames(stack_frames, skip_idle=skip_idle):
+                if not frames:
+                    continue
+                self.process_frames(frames, thread_id)
 
     def process_frames(self, frames, thread_id):
         pass
@@ -61,17 +71,67 @@ def __init__(self, *args, **kwargs):
         self.stats = {}
         self._root = {"samples": 0, "children": {}, "threads": set()}
         self._total_samples = 0
+        self._sample_count = 0  # Track actual number of samples (not thread traces)
         self._func_intern = {}
         self._string_table = StringTable()
         self._all_threads = set()
 
-    def set_stats(self, sample_interval_usec, duration_sec, sample_rate, error_rate=None):
+        # Thread status statistics (similar to LiveStatsCollector)
+        self.thread_status_counts = {
+            "has_gil": 0,
+            "on_cpu": 0,
+            "gil_requested": 0,
+            "unknown": 0,
+            "total": 0,
+        }
+        self.samples_with_gc_frames = 0
+
+        # Per-thread statistics
+        self.per_thread_stats = {}  # {thread_id: {has_gil, on_cpu, gil_requested, unknown, total, gc_samples}}
+
+    def collect(self, stack_frames, skip_idle=False):
+        """Override to track thread status statistics before processing frames."""
+        # Increment sample count once per sample
+        self._sample_count += 1
+
+        # Collect both aggregate and per-thread statistics using base method
+        status_counts, has_gc_frame, per_thread_stats = self._collect_thread_status_stats(stack_frames)
+
+        # Merge aggregate status counts
+        for key in status_counts:
+            self.thread_status_counts[key] += status_counts[key]
+
+        # Update aggregate GC frame count
+        if has_gc_frame:
+            self.samples_with_gc_frames += 1
+
+        # Merge per-thread statistics
+        for thread_id, stats in per_thread_stats.items():
+            if thread_id not in self.per_thread_stats:
+                self.per_thread_stats[thread_id] = {
+                    "has_gil": 0,
+                    "on_cpu": 0,
+                    "gil_requested": 0,
+                    "unknown": 0,
+                    "total": 0,
+                    "gc_samples": 0,
+                }
+            for key, value in stats.items():
+                self.per_thread_stats[thread_id][key] += value
+
+        # Call parent collect to process frames
+        super().collect(stack_frames, skip_idle=skip_idle)
+
+    def set_stats(self, sample_interval_usec, duration_sec, sample_rate,
+                  error_rate=None, missed_samples=None, mode=None):
         """Set profiling statistics to include in flamegraph data."""
         self.stats = {
             "sample_interval_usec": sample_interval_usec,
             "duration_sec": duration_sec,
             "sample_rate": sample_rate,
-            "error_rate": error_rate
+            "error_rate": error_rate,
+            "missed_samples": missed_samples,
+            "mode": mode
         }
 
     def export(self, filename):
@@ -116,7 +176,6 @@ def _format_function_name(func):
         return f"{funcname} ({filename}:{lineno})"
 
     def _convert_to_flamegraph_format(self):
-        """Convert aggregated trie to d3-flamegraph format with string table optimization."""
         if self._total_samples == 0:
             return {
                 "name": self._string_table.intern("No Data"),
@@ -177,6 +236,29 @@ def convert_children(children, min_samples):
                 "strings": self._string_table.get_strings()
             }
 
+        # Calculate thread status percentages for display
+        total_threads = max(1, self.thread_status_counts["total"])
+        thread_stats = {
+            "has_gil_pct": (self.thread_status_counts["has_gil"] / total_threads) * 100,
+            "on_cpu_pct": (self.thread_status_counts["on_cpu"] / total_threads) * 100,
+            "gil_requested_pct": (self.thread_status_counts["gil_requested"] / total_threads) * 100,
+            "gc_pct": (self.samples_with_gc_frames / max(1, self._sample_count)) * 100,
+            **self.thread_status_counts
+        }
+
+        # Calculate per-thread statistics with percentages
+        per_thread_stats_with_pct = {}
+        total_samples_denominator = max(1, self._sample_count)
+        for thread_id, stats in self.per_thread_stats.items():
+            total = max(1, stats["total"])
+            per_thread_stats_with_pct[thread_id] = {
+                "has_gil_pct": (stats["has_gil"] / total) * 100,
+                "on_cpu_pct": (stats["on_cpu"] / total) * 100,
+                "gil_requested_pct": (stats["gil_requested"] / total) * 100,
+                "gc_pct": (stats["gc_samples"] / total_samples_denominator) * 100,
+                **stats
+            }
+
         # If we only have one root child, make it the root to avoid redundant level
         if len(root_children) == 1:
             main_child = root_children[0]
@@ -184,7 +266,11 @@ def convert_children(children, min_samples):
             old_name = self._string_table.get_string(main_child["name"])
             new_name = f"Program Root: {old_name}"
             main_child["name"] = self._string_table.intern(new_name)
-            main_child["stats"] = self.stats
+            main_child["stats"] = {
+                **self.stats,
+                "thread_stats": thread_stats,
+                "per_thread_stats": per_thread_stats_with_pct
+            }
             main_child["threads"] = sorted(list(self._all_threads))
             main_child["strings"] = self._string_table.get_strings()
             return main_child
@@ -193,7 +279,11 @@ def convert_children(children, min_samples):
             "name": self._string_table.intern("Program Root"),
             "value": total_samples,
             "children": root_children,
-            "stats": self.stats,
+            "stats": {
+                **self.stats,
+                "thread_stats": thread_stats,
+                "per_thread_stats": per_thread_stats_with_pct
+            },
             "threads": sorted(list(self._all_threads)),
             "strings": self._string_table.get_strings()
         }
@@ -250,9 +340,9 @@ def _create_flamegraph_html(self, data):
         fg_js_path = d3_flame_graph_dir / "d3-flamegraph.min.js"
         fg_tooltip_js_path = d3_flame_graph_dir / "d3-flamegraph-tooltip.min.js"
 
-        html_template = (template_dir / "flamegraph_template.html").read_text(encoding="utf-8")
-        css_content = (template_dir / "flamegraph.css").read_text(encoding="utf-8")
-        js_content = (template_dir / "flamegraph.js").read_text(encoding="utf-8")
+        html_template = (template_dir / "_flamegraph_assets" / "flamegraph_template.html").read_text(encoding="utf-8")
+        css_content = get_combined_css("flamegraph")
+        js_content = (template_dir /  "_flamegraph_assets" / "flamegraph.js").read_text(encoding="utf-8")
 
         # Inline first-party CSS/JS
         html_template = html_template.replace(
diff --git a/Lib/re/_compiler.py b/Lib/re/_compiler.py
index 20dd561d1c1..c2ca8e25abe 100644
--- a/Lib/re/_compiler.py
+++ b/Lib/re/_compiler.py
@@ -375,7 +375,7 @@ def _optimize_charset(charset, iscased=None, fixup=None, fixes=None):
     # less significant byte is a bit index in the chunk (just like the
     # CHARSET matching).
 
-    charmap = bytes(charmap) # should be hashable
+    charmap = charmap.take_bytes() # should be hashable
     comps = {}
     mapping = bytearray(256)
     block = 0
diff --git a/Lib/shelve.py b/Lib/shelve.py
index 1010be1e09d..9f6296667fd 100644
--- a/Lib/shelve.py
+++ b/Lib/shelve.py
@@ -57,7 +57,6 @@
 """
 
 from pickle import DEFAULT_PROTOCOL, dumps, loads
-from io import BytesIO
 
 import collections.abc
 
diff --git a/Lib/ssl.py b/Lib/ssl.py
index 7ad7969a821..67a2990b281 100644
--- a/Lib/ssl.py
+++ b/Lib/ssl.py
@@ -185,7 +185,7 @@ class _TLSContentType:
 class _TLSAlertType:
     """Alert types for TLSContentType.ALERT messages
 
-    See RFC 8466, section B.2
+    See RFC 8446, section B.2
     """
     CLOSE_NOTIFY = 0
     UNEXPECTED_MESSAGE = 10
diff --git a/Lib/subprocess.py b/Lib/subprocess.py
index 79251bd5310..4d5ab6fbff0 100644
--- a/Lib/subprocess.py
+++ b/Lib/subprocess.py
@@ -1613,6 +1613,10 @@ def _readerthread(self, fh, buffer):
             fh.close()
 
 
+        def _writerthread(self, input):
+            self._stdin_write(input)
+
+
         def _communicate(self, input, endtime, orig_timeout):
             # Start reader threads feeding into a list hanging off of this
             # object, unless they've already been started.
@@ -1631,8 +1635,23 @@ def _communicate(self, input, endtime, orig_timeout):
                 self.stderr_thread.daemon = True
                 self.stderr_thread.start()
 
-            if self.stdin:
-                self._stdin_write(input)
+            # Start writer thread to send input to stdin, unless already
+            # started.  The thread writes input and closes stdin when done,
+            # or continues in the background on timeout.
+            if self.stdin and not hasattr(self, "_stdin_thread"):
+                self._stdin_thread = \
+                        threading.Thread(target=self._writerthread,
+                                         args=(input,))
+                self._stdin_thread.daemon = True
+                self._stdin_thread.start()
+
+            # Wait for the writer thread, or time out.  If we time out, the
+            # thread remains writing and the fd left open in case the user
+            # calls communicate again.
+            if hasattr(self, "_stdin_thread"):
+                self._stdin_thread.join(self._remaining_time(endtime))
+                if self._stdin_thread.is_alive():
+                    raise TimeoutExpired(self.args, orig_timeout)
 
             # Wait for the reader threads, or time out.  If we time out, the
             # threads remain reading and the fds left open in case the user
@@ -2077,6 +2096,10 @@ def _communicate(self, input, endtime, orig_timeout):
                     self.stdin.flush()
                 except BrokenPipeError:
                     pass  # communicate() must ignore BrokenPipeError.
+                except ValueError:
+                    # ignore ValueError: I/O operation on closed file.
+                    if not self.stdin.closed:
+                        raise
                 if not input:
                     try:
                         self.stdin.close()
@@ -2102,10 +2125,13 @@ def _communicate(self, input, endtime, orig_timeout):
             self._save_input(input)
 
             if self._input:
-                input_view = memoryview(self._input)
+                if not isinstance(self._input, memoryview):
+                    input_view = memoryview(self._input)
+                else:
+                    input_view = self._input.cast("b")  # byte input required
 
             with _PopenSelector() as selector:
-                if self.stdin and input:
+                if self.stdin and not self.stdin.closed and self._input:
                     selector.register(self.stdin, selectors.EVENT_WRITE)
                 if self.stdout and not self.stdout.closed:
                     selector.register(self.stdout, selectors.EVENT_READ)
@@ -2138,7 +2164,7 @@ def _communicate(self, input, endtime, orig_timeout):
                                 selector.unregister(key.fileobj)
                                 key.fileobj.close()
                             else:
-                                if self._input_offset >= len(self._input):
+                                if self._input_offset >= len(input_view):
                                     selector.unregister(key.fileobj)
                                     key.fileobj.close()
                         elif key.fileobj in (self.stdout, self.stderr):
diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py
index 0f9c5c22225..d03eb1dfb25 100644
--- a/Lib/test/_test_multiprocessing.py
+++ b/Lib/test/_test_multiprocessing.py
@@ -39,6 +39,7 @@
 from test.support import socket_helper
 from test.support import threading_helper
 from test.support import warnings_helper
+from test.support import subTests
 from test.support.script_helper import assert_python_failure, assert_python_ok
 
 # Skip tests if _multiprocessing wasn't built.
@@ -1204,7 +1205,7 @@ def test_put(self):
     @classmethod
     def _test_get(cls, queue, child_can_start, parent_can_continue):
         child_can_start.wait()
-        #queue.put(1)
+        queue.put(1)
         queue.put(2)
         queue.put(3)
         queue.put(4)
@@ -1229,15 +1230,16 @@ def test_get(self):
         child_can_start.set()
         parent_can_continue.wait()
 
-        time.sleep(DELTA)
+        for _ in support.sleeping_retry(support.SHORT_TIMEOUT):
+            if not queue_empty(queue):
+                break
         self.assertEqual(queue_empty(queue), False)
 
-        # Hangs unexpectedly, remove for now
-        #self.assertEqual(queue.get(), 1)
+        self.assertEqual(queue.get_nowait(), 1)
         self.assertEqual(queue.get(True, None), 2)
         self.assertEqual(queue.get(True), 3)
         self.assertEqual(queue.get(timeout=1), 4)
-        self.assertEqual(queue.get_nowait(), 5)
+        self.assertEqual(queue.get(), 5)
 
         self.assertEqual(queue_empty(queue), True)
 
@@ -4382,6 +4384,19 @@ def test_copy(self):
         self.assertEqual(bar.z, 2 ** 33)
 
 
+def resource_tracker_format_subtests(func):
+    """Run given test using both resource tracker communication formats"""
+    def _inner(self, *args, **kwargs):
+        tracker = resource_tracker._resource_tracker
+        for use_simple_format in False, True:
+            with (
+                self.subTest(use_simple_format=use_simple_format),
+                unittest.mock.patch.object(
+                    tracker, '_use_simple_format', use_simple_format)
+            ):
+                func(self, *args, **kwargs)
+    return _inner
+
 @unittest.skipUnless(HAS_SHMEM, "requires multiprocessing.shared_memory")
 @hashlib_helper.requires_hashdigest('sha256')
 class _TestSharedMemory(BaseTestCase):
@@ -4661,6 +4676,7 @@ def test_shared_memory_SharedMemoryServer_ignores_sigint(self):
         smm.shutdown()
 
     @unittest.skipIf(os.name != "posix", "resource_tracker is posix only")
+    @resource_tracker_format_subtests
     def test_shared_memory_SharedMemoryManager_reuses_resource_tracker(self):
         # bpo-36867: test that a SharedMemoryManager uses the
         # same resource_tracker process as its parent.
@@ -4912,6 +4928,7 @@ def test_shared_memory_cleaned_after_process_termination(self):
                     "shared_memory objects to clean up at shutdown", err)
 
     @unittest.skipIf(os.name != "posix", "resource_tracker is posix only")
+    @resource_tracker_format_subtests
     def test_shared_memory_untracking(self):
         # gh-82300: When a separate Python process accesses shared memory
         # with track=False, it must not cause the memory to be deleted
@@ -4939,6 +4956,7 @@ def test_shared_memory_untracking(self):
             mem.close()
 
     @unittest.skipIf(os.name != "posix", "resource_tracker is posix only")
+    @resource_tracker_format_subtests
     def test_shared_memory_tracking(self):
         # gh-82300: When a separate Python process accesses shared memory
         # with track=True, it must cause the memory to be deleted when
@@ -6956,28 +6974,13 @@ def test_std_streams_flushed_after_preload(self):
         if multiprocessing.get_start_method() != "forkserver":
             self.skipTest("forkserver specific test")
 
-        # Create a test module in the temporary directory on the child's path
-        # TODO: This can all be simplified once gh-126631 is fixed and we can
-        #       use __main__ instead of a module.
-        dirname = os.path.join(self._temp_dir, 'preloaded_module')
-        init_name = os.path.join(dirname, '__init__.py')
-        os.mkdir(dirname)
-        with open(init_name, "w") as f:
-            cmd = '''if 1:
-                import sys
-                print('stderr', end='', file=sys.stderr)
-                print('stdout', end='', file=sys.stdout)
-            '''
-            f.write(cmd)
-
         name = os.path.join(os.path.dirname(__file__), 'mp_preload_flush.py')
-        env = {'PYTHONPATH': self._temp_dir}
-        _, out, err = test.support.script_helper.assert_python_ok(name, **env)
+        _, out, err = test.support.script_helper.assert_python_ok(name)
 
         # Check stderr first, as it is more likely to be useful to see in the
         # event of a failure.
-        self.assertEqual(err.decode().rstrip(), 'stderr')
-        self.assertEqual(out.decode().rstrip(), 'stdout')
+        self.assertEqual(err.decode().rstrip(), '__main____mp_main__')
+        self.assertEqual(out.decode().rstrip(), '__main____mp_main__')
 
 
 class MiscTestCase(unittest.TestCase):
@@ -7367,13 +7370,18 @@ def test_forkpty(self):
 
 @unittest.skipUnless(HAS_SHMEM, "requires multiprocessing.shared_memory")
 class TestSharedMemoryNames(unittest.TestCase):
-    def test_that_shared_memory_name_with_colons_has_no_resource_tracker_errors(self):
+    @subTests('use_simple_format', (True, False))
+    def test_that_shared_memory_name_with_colons_has_no_resource_tracker_errors(
+            self, use_simple_format):
         # Test script that creates and cleans up shared memory with colon in name
         test_script = textwrap.dedent("""
             import sys
             from multiprocessing import shared_memory
+            from multiprocessing import resource_tracker
             import time
 
+            resource_tracker._resource_tracker._use_simple_format = %s
+
             # Test various patterns of colons in names
             test_names = [
                 "a:b",
@@ -7401,7 +7409,7 @@ def test_that_shared_memory_name_with_colons_has_no_resource_tracker_errors(self
                     sys.exit(1)
 
             print("SUCCESS")
-        """)
+        """ % use_simple_format)
 
         rc, out, err = assert_python_ok("-c", test_script)
         self.assertIn(b"SUCCESS", out)
diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py
index 7df27206206..ace56aab7ac 100644
--- a/Lib/test/datetimetester.py
+++ b/Lib/test/datetimetester.py
@@ -6300,21 +6300,21 @@ def test_vilnius_1941_fromutc(self):
 
         gdt = datetime(1941, 6, 23, 20, 59, 59, tzinfo=timezone.utc)
         ldt = gdt.astimezone(Vilnius)
-        self.assertEqual(ldt.strftime("%c %Z%z"),
+        self.assertEqual(ldt.strftime("%a %b %d %H:%M:%S %Y %Z%z"),
                          'Mon Jun 23 23:59:59 1941 MSK+0300')
         self.assertEqual(ldt.fold, 0)
         self.assertFalse(ldt.dst())
 
         gdt = datetime(1941, 6, 23, 21, tzinfo=timezone.utc)
         ldt = gdt.astimezone(Vilnius)
-        self.assertEqual(ldt.strftime("%c %Z%z"),
+        self.assertEqual(ldt.strftime("%a %b %d %H:%M:%S %Y %Z%z"),
                          'Mon Jun 23 23:00:00 1941 CEST+0200')
         self.assertEqual(ldt.fold, 1)
         self.assertTrue(ldt.dst())
 
         gdt = datetime(1941, 6, 23, 22, tzinfo=timezone.utc)
         ldt = gdt.astimezone(Vilnius)
-        self.assertEqual(ldt.strftime("%c %Z%z"),
+        self.assertEqual(ldt.strftime("%a %b %d %H:%M:%S %Y %Z%z"),
                          'Tue Jun 24 00:00:00 1941 CEST+0200')
         self.assertEqual(ldt.fold, 0)
         self.assertTrue(ldt.dst())
@@ -6324,22 +6324,22 @@ def test_vilnius_1941_toutc(self):
 
         ldt = datetime(1941, 6, 23, 22, 59, 59, tzinfo=Vilnius)
         gdt = ldt.astimezone(timezone.utc)
-        self.assertEqual(gdt.strftime("%c %Z"),
+        self.assertEqual(gdt.strftime("%a %b %d %H:%M:%S %Y %Z"),
                          'Mon Jun 23 19:59:59 1941 UTC')
 
         ldt = datetime(1941, 6, 23, 23, 59, 59, tzinfo=Vilnius)
         gdt = ldt.astimezone(timezone.utc)
-        self.assertEqual(gdt.strftime("%c %Z"),
+        self.assertEqual(gdt.strftime("%a %b %d %H:%M:%S %Y %Z"),
                          'Mon Jun 23 20:59:59 1941 UTC')
 
         ldt = datetime(1941, 6, 23, 23, 59, 59, tzinfo=Vilnius, fold=1)
         gdt = ldt.astimezone(timezone.utc)
-        self.assertEqual(gdt.strftime("%c %Z"),
+        self.assertEqual(gdt.strftime("%a %b %d %H:%M:%S %Y %Z"),
                          'Mon Jun 23 21:59:59 1941 UTC')
 
         ldt = datetime(1941, 6, 24, 0, tzinfo=Vilnius)
         gdt = ldt.astimezone(timezone.utc)
-        self.assertEqual(gdt.strftime("%c %Z"),
+        self.assertEqual(gdt.strftime("%a %b %d %H:%M:%S %Y %Z"),
                          'Mon Jun 23 22:00:00 1941 UTC')
 
     def test_constructors(self):
diff --git a/Lib/test/mp_preload_flush.py b/Lib/test/mp_preload_flush.py
index 3501554d366..c195a9ef6b2 100644
--- a/Lib/test/mp_preload_flush.py
+++ b/Lib/test/mp_preload_flush.py
@@ -1,15 +1,11 @@
 import multiprocessing
 import sys
 
-modname = 'preloaded_module'
+print(__name__, end='', file=sys.stderr)
+print(__name__, end='', file=sys.stdout)
 if __name__ == '__main__':
-    if modname in sys.modules:
-        raise AssertionError(f'{modname!r} is not in sys.modules')
     multiprocessing.set_start_method('forkserver')
-    multiprocessing.set_forkserver_preload([modname])
     for _ in range(2):
         p = multiprocessing.Process()
         p.start()
         p.join()
-elif modname not in sys.modules:
-    raise AssertionError(f'{modname!r} is not in sys.modules')
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
index e3663e44546..4e3468bfcde 100644
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -74,6 +74,15 @@ def count_opcode(code, pickle):
 def identity(x):
     return x
 
+def itersize(start, stop):
+    # Produce geometrical increasing sequence from start to stop
+    # (inclusively) for tests.
+    size = start
+    while size < stop:
+        yield size
+        size <<= 1
+    yield stop
+
 
 class UnseekableIO(io.BytesIO):
     def peek(self, *args):
@@ -853,9 +862,8 @@ def assert_is_copy(self, obj, objcopy, msg=None):
                 self.assertEqual(getattr(obj, slot, None),
                                  getattr(objcopy, slot, None), msg=msg)
 
-    def check_unpickling_error(self, errors, data):
-        with self.subTest(data=data), \
-             self.assertRaises(errors):
+    def check_unpickling_error_strict(self, errors, data):
+        with self.assertRaises(errors):
             try:
                 self.loads(data)
             except BaseException as exc:
@@ -864,6 +872,10 @@ def check_unpickling_error(self, errors, data):
                           (data, exc.__class__.__name__, exc))
                 raise
 
+    def check_unpickling_error(self, errors, data):
+        with self.subTest(data=data):
+            self.check_unpickling_error_strict(errors, data)
+
     def test_load_from_data0(self):
         self.assert_is_copy(self._testdata, self.loads(DATA0))
 
@@ -1150,6 +1162,155 @@ def test_negative_32b_binput(self):
         dumped = b'\x80\x03X\x01\x00\x00\x00ar\xff\xff\xff\xff.'
         self.check_unpickling_error(ValueError, dumped)
 
+    def test_too_large_put(self):
+        # Test that PUT with large id does not cause allocation of
+        # too large memo table. The C implementation uses a dict-based memo
+        # for sparse indices (when idx > memo_len * 2) instead of allocating
+        # a massive array. This test verifies large sparse indices work without
+        # causing memory exhaustion.
+        #
+        # The following simple pickle creates an empty list, memoizes it
+        # using a large index, then loads it back on the stack, builds
+        # a tuple containing 2 identical empty lists and returns it.
+        data = lambda n: (b'((lp' + str(n).encode() + b'\n' +
+                          b'g' + str(n).encode() + b'\nt.')
+        #    0: (    MARK
+        #    1: (        MARK
+        #    2: l            LIST       (MARK at 1)
+        #    3: p        PUT        1000000000000
+        #   18: g        GET        1000000000000
+        #   33: t        TUPLE      (MARK at 0)
+        #   34: .    STOP
+        for idx in [10**6, 10**9, 10**12]:
+            if idx > sys.maxsize:
+                continue
+            self.assertEqual(self.loads(data(idx)), ([],)*2)
+
+    def test_too_large_long_binput(self):
+        # Test that LONG_BINPUT with large id does not cause allocation of
+        # too large memo table. The C implementation uses a dict-based memo
+        # for sparse indices (when idx > memo_len * 2) instead of allocating
+        # a massive array. This test verifies large sparse indices work without
+        # causing memory exhaustion.
+        #
+        # The following simple pickle creates an empty list, memoizes it
+        # using a large index, then loads it back on the stack, builds
+        # a tuple containing 2 identical empty lists and returns it.
+        data = lambda n: (b'(]r' + struct.pack('<I', n) +
+                          b'j' + struct.pack('<I', n) + b't.')
+        #    0: (    MARK
+        #    1: ]        EMPTY_LIST
+        #    2: r        LONG_BINPUT 4294967295
+        #    7: j        LONG_BINGET 4294967295
+        #   12: t        TUPLE      (MARK at 0)
+        #   13: .    STOP
+        for idx in itersize(1 << 20, min(sys.maxsize, (1 << 32) - 1)):
+            self.assertEqual(self.loads(data(idx)), ([],)*2)
+
+    def _test_truncated_data(self, dumped, expected_error=None):
+        # Test that instructions to read large data without providing
+        # such amount of data do not cause large memory usage.
+        if expected_error is None:
+            expected_error = self.truncated_data_error
+        # BytesIO
+        with self.assertRaisesRegex(*expected_error):
+            self.loads(dumped)
+        if hasattr(self, 'unpickler'):
+            try:
+                with open(TESTFN, 'wb') as f:
+                    f.write(dumped)
+                # buffered file
+                with open(TESTFN, 'rb') as f:
+                    u = self.unpickler(f)
+                    with self.assertRaisesRegex(*expected_error):
+                        u.load()
+                # unbuffered file
+                with open(TESTFN, 'rb', buffering=0) as f:
+                    u = self.unpickler(f)
+                    with self.assertRaisesRegex(*expected_error):
+                        u.load()
+            finally:
+                os_helper.unlink(TESTFN)
+
+    def test_truncated_large_binstring(self):
+        data = lambda size: b'T' + struct.pack('<I', size) + b'.' * 5
+        #    0: T    BINSTRING  '....'
+        #    9: .    STOP
+        self.assertEqual(self.loads(data(4)), '....') # self-testing
+        for size in itersize(1 << 10, min(sys.maxsize - 5, (1 << 31) - 1)):
+            self._test_truncated_data(data(size))
+        self._test_truncated_data(data(1 << 31),
+            (pickle.UnpicklingError, 'truncated|exceeds|negative byte count'))
+
+    def test_truncated_large_binunicode(self):
+        data = lambda size: b'X' + struct.pack('<I', size) + b'.' * 5
+        #    0: X    BINUNICODE '....'
+        #    9: .    STOP
+        self.assertEqual(self.loads(data(4)), '....') # self-testing
+        for size in itersize(1 << 10, min(sys.maxsize - 5, (1 << 32) - 1)):
+            self._test_truncated_data(data(size))
+
+    def test_truncated_large_binbytes(self):
+        data = lambda size: b'B' + struct.pack('<I', size) + b'.' * 5
+        #    0: B    BINBYTES   b'....'
+        #    9: .    STOP
+        self.assertEqual(self.loads(data(4)), b'....') # self-testing
+        for size in itersize(1 << 10, min(sys.maxsize, 1 << 31)):
+            self._test_truncated_data(data(size))
+
+    def test_truncated_large_long4(self):
+        data = lambda size: b'\x8b' + struct.pack('<I', size) + b'.' * 5
+        #    0: \x8b LONG4      0x2e2e2e2e
+        #    9: .    STOP
+        self.assertEqual(self.loads(data(4)), 0x2e2e2e2e) # self-testing
+        for size in itersize(1 << 10, min(sys.maxsize - 5, (1 << 31) - 1)):
+            self._test_truncated_data(data(size))
+        self._test_truncated_data(data(1 << 31),
+            (pickle.UnpicklingError, 'LONG pickle has negative byte count'))
+
+    def test_truncated_large_frame(self):
+        data = lambda size: b'\x95' + struct.pack('<Q', size) + b'N.'
+        #    0: \x95 FRAME      2
+        #    9: N    NONE
+        #   10: .    STOP
+        self.assertIsNone(self.loads(data(2))) # self-testing
+        for size in itersize(1 << 10, sys.maxsize - 9):
+            self._test_truncated_data(data(size))
+        if sys.maxsize + 1 < 1 << 64:
+            self._test_truncated_data(data(sys.maxsize + 1),
+                ((OverflowError, ValueError),
+                 'FRAME length exceeds|frame size > sys.maxsize'))
+
+    def test_truncated_large_binunicode8(self):
+        data = lambda size: b'\x8d' + struct.pack('<Q', size) + b'.' * 5
+        #    0: \x8d BINUNICODE8 '....'
+        #   13: .    STOP
+        self.assertEqual(self.loads(data(4)), '....') # self-testing
+        for size in itersize(1 << 10, sys.maxsize - 9):
+            self._test_truncated_data(data(size))
+        if sys.maxsize + 1 < 1 << 64:
+            self._test_truncated_data(data(sys.maxsize + 1), self.size_overflow_error)
+
+    def test_truncated_large_binbytes8(self):
+        data = lambda size: b'\x8e' + struct.pack('<Q', size) + b'.' * 5
+        #    0: \x8e BINBYTES8  b'....'
+        #   13: .    STOP
+        self.assertEqual(self.loads(data(4)), b'....') # self-testing
+        for size in itersize(1 << 10, sys.maxsize):
+            self._test_truncated_data(data(size))
+        if sys.maxsize + 1 < 1 << 64:
+            self._test_truncated_data(data(sys.maxsize + 1), self.size_overflow_error)
+
+    def test_truncated_large_bytearray8(self):
+        data = lambda size: b'\x96' + struct.pack('<Q', size) + b'.' * 5
+        #    0: \x96 BYTEARRAY8 bytearray(b'....')
+        #   13: .    STOP
+        self.assertEqual(self.loads(data(4)), bytearray(b'....')) # self-testing
+        for size in itersize(1 << 10, sys.maxsize):
+            self._test_truncated_data(data(size))
+        if sys.maxsize + 1 < 1 << 64:
+            self._test_truncated_data(data(sys.maxsize + 1), self.size_overflow_error)
+
     def test_badly_escaped_string(self):
         self.check_unpickling_error(ValueError, b"S'\\'\n.")
 
diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py
index 1814a55b74e..185aa3fce39 100644
--- a/Lib/test/string_tests.py
+++ b/Lib/test/string_tests.py
@@ -90,6 +90,18 @@ def checkcall(self, obj, methodname, *args):
         args = self.fixtype(args)
         getattr(obj, methodname)(*args)
 
+    def _get_teststrings(self, charset, digits):
+        base = len(charset)
+        teststrings = set()
+        for i in range(base ** digits):
+            entry = []
+            for j in range(digits):
+                i, m = divmod(i, base)
+                entry.append(charset[m])
+            teststrings.add(''.join(entry))
+        teststrings = [self.fixtype(ts) for ts in teststrings]
+        return teststrings
+
     def test_count(self):
         self.checkequal(3, 'aaa', 'count', 'a')
         self.checkequal(0, 'aaa', 'count', 'b')
@@ -130,17 +142,7 @@ def test_count(self):
         # For a variety of combinations,
         #    verify that str.count() matches an equivalent function
         #    replacing all occurrences and then differencing the string lengths
-        charset = ['', 'a', 'b']
-        digits = 7
-        base = len(charset)
-        teststrings = set()
-        for i in range(base ** digits):
-            entry = []
-            for j in range(digits):
-                i, m = divmod(i, base)
-                entry.append(charset[m])
-            teststrings.add(''.join(entry))
-        teststrings = [self.fixtype(ts) for ts in teststrings]
+        teststrings = self._get_teststrings(['', 'a', 'b'], 7)
         for i in teststrings:
             n = len(i)
             for j in teststrings:
@@ -197,17 +199,7 @@ def test_find(self):
         # For a variety of combinations,
         #    verify that str.find() matches __contains__
         #    and that the found substring is really at that location
-        charset = ['', 'a', 'b', 'c']
-        digits = 5
-        base = len(charset)
-        teststrings = set()
-        for i in range(base ** digits):
-            entry = []
-            for j in range(digits):
-                i, m = divmod(i, base)
-                entry.append(charset[m])
-            teststrings.add(''.join(entry))
-        teststrings = [self.fixtype(ts) for ts in teststrings]
+        teststrings = self._get_teststrings(['', 'a', 'b', 'c'], 5)
         for i in teststrings:
             for j in teststrings:
                 loc = i.find(j)
@@ -244,17 +236,7 @@ def test_rfind(self):
         # For a variety of combinations,
         #    verify that str.rfind() matches __contains__
         #    and that the found substring is really at that location
-        charset = ['', 'a', 'b', 'c']
-        digits = 5
-        base = len(charset)
-        teststrings = set()
-        for i in range(base ** digits):
-            entry = []
-            for j in range(digits):
-                i, m = divmod(i, base)
-                entry.append(charset[m])
-            teststrings.add(''.join(entry))
-        teststrings = [self.fixtype(ts) for ts in teststrings]
+        teststrings = self._get_teststrings(['', 'a', 'b', 'c'], 5)
         for i in teststrings:
             for j in teststrings:
                 loc = i.rfind(j)
@@ -295,6 +277,19 @@ def test_index(self):
         else:
             self.checkraises(TypeError, 'hello', 'index', 42)
 
+        # For a variety of combinations,
+        #    verify that str.index() matches __contains__
+        #    and that the found substring is really at that location
+        teststrings = self._get_teststrings(['', 'a', 'b', 'c'], 5)
+        for i in teststrings:
+            for j in teststrings:
+                if j in i:
+                    loc = i.index(j)
+                    self.assertGreaterEqual(loc, 0)
+                    self.assertEqual(i[loc:loc+len(j)], j)
+                else:
+                    self.assertRaises(ValueError, i.index, j)
+
     def test_rindex(self):
         self.checkequal(12, 'abcdefghiabc', 'rindex', '')
         self.checkequal(3,  'abcdefghiabc', 'rindex', 'def')
@@ -321,6 +316,19 @@ def test_rindex(self):
         else:
             self.checkraises(TypeError, 'hello', 'rindex', 42)
 
+        # For a variety of combinations,
+        #    verify that str.rindex() matches __contains__
+        #    and that the found substring is really at that location
+        teststrings = self._get_teststrings(['', 'a', 'b', 'c'], 5)
+        for i in teststrings:
+            for j in teststrings:
+                if j in i:
+                    loc = i.rindex(j)
+                    self.assertGreaterEqual(loc, 0)
+                    self.assertEqual(i[loc:loc+len(j)], j)
+                else:
+                    self.assertRaises(ValueError, i.rindex, j)
+
     def test_find_periodic_pattern(self):
         """Cover the special path for periodic patterns."""
         def reference_find(p, s):
diff --git a/Lib/test/test__colorize.py b/Lib/test/test__colorize.py
index 25012466840..67e0595943d 100644
--- a/Lib/test/test__colorize.py
+++ b/Lib/test/test__colorize.py
@@ -166,6 +166,17 @@ def test_colorized_detection_checks_for_file(self):
                 file.isatty.return_value = False
                 self.assertEqual(_colorize.can_colorize(file=file), False)
 
+            # The documentation for file.fileno says:
+            # > An OSError is raised if the IO object does not use a file descriptor.
+            # gh-141570: Check OSError is caught and handled
+            with unittest.mock.patch("os.isatty", side_effect=ZeroDivisionError):
+                file = unittest.mock.MagicMock()
+                file.fileno.side_effect = OSError
+                file.isatty.return_value = True
+                self.assertEqual(_colorize.can_colorize(file=file), True)
+                file.isatty.return_value = False
+                self.assertEqual(_colorize.can_colorize(file=file), False)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py
index 3a8be68a546..ab5382e41e7 100644
--- a/Lib/test/test_argparse.py
+++ b/Lib/test/test_argparse.py
@@ -581,13 +581,22 @@ class TestOptionalsShortLong(ParserTestCase):
 class TestOptionalsDest(ParserTestCase):
     """Tests various means of setting destination"""
 
-    argument_signatures = [Sig('--foo-bar'), Sig('--baz', dest='zabbaz')]
+    argument_signatures = [
+        Sig('-x', '-foobar', '--foo-bar', '-barfoo', '-X'),
+        Sig('--baz', dest='zabbaz'),
+        Sig('-y', '-qux', '-Y'),
+        Sig('-z'),
+    ]
     failures = ['a']
     successes = [
-        ('--foo-bar f', NS(foo_bar='f', zabbaz=None)),
-        ('--baz g', NS(foo_bar=None, zabbaz='g')),
-        ('--foo-bar h --baz i', NS(foo_bar='h', zabbaz='i')),
-        ('--baz j --foo-bar k', NS(foo_bar='k', zabbaz='j')),
+        ('--foo-bar f', NS(foo_bar='f', zabbaz=None, qux=None, z=None)),
+        ('-x f', NS(foo_bar='f', zabbaz=None, qux=None, z=None)),
+        ('--baz g', NS(foo_bar=None, zabbaz='g', qux=None, z=None)),
+        ('--foo-bar h --baz i', NS(foo_bar='h', zabbaz='i', qux=None, z=None)),
+        ('--baz j --foo-bar k', NS(foo_bar='k', zabbaz='j', qux=None, z=None)),
+        ('-qux l', NS(foo_bar=None, zabbaz=None, qux='l', z=None)),
+        ('-y l', NS(foo_bar=None, zabbaz=None, qux='l', z=None)),
+        ('-z m', NS(foo_bar=None, zabbaz=None, qux=None, z='m')),
     ]
 
 
@@ -796,6 +805,76 @@ def test_invalid_name(self):
         self.assertEqual(str(cm.exception),
                          "invalid option name '--no-foo' for BooleanOptionalAction")
 
+class TestBooleanOptionalActionSingleDash(ParserTestCase):
+    """Tests BooleanOptionalAction with single dash"""
+
+    argument_signatures = [
+        Sig('-foo', '-x', action=argparse.BooleanOptionalAction),
+    ]
+    failures = ['--foo', '--no-foo', '-no-foo', '-no-x', '-nox']
+    successes = [
+        ('', NS(foo=None)),
+        ('-foo', NS(foo=True)),
+        ('-nofoo', NS(foo=False)),
+        ('-x', NS(foo=True)),
+    ]
+
+    def test_invalid_name(self):
+        parser = argparse.ArgumentParser()
+        with self.assertRaises(ValueError) as cm:
+            parser.add_argument('-nofoo', action=argparse.BooleanOptionalAction)
+        self.assertEqual(str(cm.exception),
+                         "invalid option name '-nofoo' for BooleanOptionalAction")
+
+class TestBooleanOptionalActionAlternatePrefixChars(ParserTestCase):
+    """Tests BooleanOptionalAction with custom prefixes"""
+
+    parser_signature = Sig(prefix_chars='+-', add_help=False)
+    argument_signatures = [Sig('++foo', action=argparse.BooleanOptionalAction)]
+    failures = ['--foo', '--no-foo']
+    successes = [
+        ('', NS(foo=None)),
+        ('++foo', NS(foo=True)),
+        ('++no-foo', NS(foo=False)),
+    ]
+
+    def test_invalid_name(self):
+        parser = argparse.ArgumentParser(prefix_chars='+/')
+        with self.assertRaisesRegex(ValueError,
+                'BooleanOptionalAction.*is not valid for positional arguments'):
+            parser.add_argument('--foo', action=argparse.BooleanOptionalAction)
+        with self.assertRaises(ValueError) as cm:
+            parser.add_argument('++no-foo', action=argparse.BooleanOptionalAction)
+        self.assertEqual(str(cm.exception),
+                         "invalid option name '++no-foo' for BooleanOptionalAction")
+
+class TestBooleanOptionalActionSingleAlternatePrefixChar(ParserTestCase):
+    """Tests BooleanOptionalAction with single alternate prefix char"""
+
+    parser_signature = Sig(prefix_chars='+/', add_help=False)
+    argument_signatures = [
+        Sig('+foo', '+x', action=argparse.BooleanOptionalAction),
+    ]
+    failures = ['++foo', '++no-foo', '++nofoo',
+                '-no-foo', '-nofoo', '+no-foo', '-nofoo',
+                '+no-x', '+nox', '-no-x', '-nox']
+    successes = [
+        ('', NS(foo=None)),
+        ('+foo', NS(foo=True)),
+        ('+nofoo', NS(foo=False)),
+        ('+x', NS(foo=True)),
+    ]
+
+    def test_invalid_name(self):
+        parser = argparse.ArgumentParser(prefix_chars='+/')
+        with self.assertRaisesRegex(ValueError,
+                'BooleanOptionalAction.*is not valid for positional arguments'):
+            parser.add_argument('-foo', action=argparse.BooleanOptionalAction)
+        with self.assertRaises(ValueError) as cm:
+            parser.add_argument('+nofoo', action=argparse.BooleanOptionalAction)
+        self.assertEqual(str(cm.exception),
+                         "invalid option name '+nofoo' for BooleanOptionalAction")
+
 class TestBooleanOptionalActionRequired(ParserTestCase):
     """Tests BooleanOptionalAction required"""
 
@@ -2691,6 +2770,16 @@ def test_optional_subparsers(self):
         ret = parser.parse_args(())
         self.assertIsNone(ret.command)
 
+    def test_subparser_help_with_parent_required_optional(self):
+        parser = ErrorRaisingArgumentParser(prog='PROG')
+        parser.add_argument('--foo', required=True)
+        parser.add_argument('--bar')
+        subparsers = parser.add_subparsers()
+        parser_sub = subparsers.add_parser('sub')
+        parser_sub.add_argument('arg')
+        self.assertEqual(parser_sub.format_usage(),
+                         'usage: PROG --foo FOO sub [-h] arg\n')
+
     def test_help(self):
         self.assertEqual(self.parser.format_usage(),
                          'usage: PROG [-h] [--foo] bar {1,2,3} ...\n')
@@ -3309,12 +3398,11 @@ def test_help_subparser_all_mutually_exclusive_group_members_suppressed(self):
               '''
         self.assertEqual(cmd_foo.format_help(), textwrap.dedent(expected))
 
-    def test_empty_group(self):
+    def test_usage_empty_group(self):
         # See issue 26952
-        parser = argparse.ArgumentParser()
+        parser = ErrorRaisingArgumentParser(prog='PROG')
         group = parser.add_mutually_exclusive_group()
-        with self.assertRaises(ValueError):
-            parser.parse_args(['-h'])
+        self.assertEqual(parser.format_usage(), 'usage: PROG [-h]\n')
 
     def test_nested_mutex_groups(self):
         parser = argparse.ArgumentParser(prog='PROG')
@@ -3582,25 +3670,29 @@ def get_parser(self, required):
         group.add_argument('-b', action='store_true', help='b help')
         parser.add_argument('-y', action='store_true', help='y help')
         group.add_argument('-c', action='store_true', help='c help')
+        parser.add_argument('-z', action='store_true', help='z help')
         return parser
 
     failures = ['-a -b', '-b -c', '-a -c', '-a -b -c']
     successes = [
-        ('-a', NS(a=True, b=False, c=False, x=False, y=False)),
-        ('-b', NS(a=False, b=True, c=False, x=False, y=False)),
-        ('-c', NS(a=False, b=False, c=True, x=False, y=False)),
-        ('-a -x', NS(a=True, b=False, c=False, x=True, y=False)),
-        ('-y -b', NS(a=False, b=True, c=False, x=False, y=True)),
-        ('-x -y -c', NS(a=False, b=False, c=True, x=True, y=True)),
+        ('-a', NS(a=True, b=False, c=False, x=False, y=False, z=False)),
+        ('-b', NS(a=False, b=True, c=False, x=False, y=False, z=False)),
+        ('-c', NS(a=False, b=False, c=True, x=False, y=False, z=False)),
+        ('-a -x', NS(a=True, b=False, c=False, x=True, y=False, z=False)),
+        ('-y -b', NS(a=False, b=True, c=False, x=False, y=True, z=False)),
+        ('-x -y -c', NS(a=False, b=False, c=True, x=True, y=True, z=False)),
     ]
     successes_when_not_required = [
-        ('', NS(a=False, b=False, c=False, x=False, y=False)),
-        ('-x', NS(a=False, b=False, c=False, x=True, y=False)),
-        ('-y', NS(a=False, b=False, c=False, x=False, y=True)),
+        ('', NS(a=False, b=False, c=False, x=False, y=False, z=False)),
+        ('-x', NS(a=False, b=False, c=False, x=True, y=False, z=False)),
+        ('-y', NS(a=False, b=False, c=False, x=False, y=True, z=False)),
     ]
 
-    usage_when_required = usage_when_not_required = '''\
-        usage: PROG [-h] [-x] [-a] [-b] [-y] [-c]
+    usage_when_not_required = '''\
+        usage: PROG [-h] [-x] [-a | -b | -c] [-y] [-z]
+        '''
+    usage_when_required = '''\
+        usage: PROG [-h] [-x] (-a | -b | -c) [-y] [-z]
         '''
     help = '''\
 
@@ -3611,6 +3703,7 @@ def get_parser(self, required):
           -b          b help
           -y          y help
           -c          c help
+          -z          z help
         '''
 
 
@@ -3664,23 +3757,27 @@ def get_parser(self, required):
         group.add_argument('a', nargs='?', help='a help')
         group.add_argument('-b', action='store_true', help='b help')
         group.add_argument('-c', action='store_true', help='c help')
+        parser.add_argument('-z', action='store_true', help='z help')
         return parser
 
     failures = ['X A -b', '-b -c', '-c X A']
     successes = [
-        ('X A', NS(a='A', b=False, c=False, x='X', y=False)),
-        ('X -b', NS(a=None, b=True, c=False, x='X', y=False)),
-        ('X -c', NS(a=None, b=False, c=True, x='X', y=False)),
-        ('X A -y', NS(a='A', b=False, c=False, x='X', y=True)),
-        ('X -y -b', NS(a=None, b=True, c=False, x='X', y=True)),
+        ('X A', NS(a='A', b=False, c=False, x='X', y=False, z=False)),
+        ('X -b', NS(a=None, b=True, c=False, x='X', y=False, z=False)),
+        ('X -c', NS(a=None, b=False, c=True, x='X', y=False, z=False)),
+        ('X A -y', NS(a='A', b=False, c=False, x='X', y=True, z=False)),
+        ('X -y -b', NS(a=None, b=True, c=False, x='X', y=True, z=False)),
     ]
     successes_when_not_required = [
-        ('X', NS(a=None, b=False, c=False, x='X', y=False)),
-        ('X -y', NS(a=None, b=False, c=False, x='X', y=True)),
+        ('X', NS(a=None, b=False, c=False, x='X', y=False, z=False)),
+        ('X -y', NS(a=None, b=False, c=False, x='X', y=True, z=False)),
     ]
 
-    usage_when_required = usage_when_not_required = '''\
-        usage: PROG [-h] [-y] [-b] [-c] x [a]
+    usage_when_not_required = '''\
+        usage: PROG [-h] [-y] [-z] x [-b | -c | a]
+        '''
+    usage_when_required = '''\
+        usage: PROG [-h] [-y] [-z] x (-b | -c | a)
         '''
     help = '''\
 
@@ -3693,6 +3790,7 @@ def get_parser(self, required):
           -y          y help
           -b          b help
           -c          c help
+          -z          z help
         '''
 
 
@@ -4887,6 +4985,25 @@ def test_long_mutex_groups_wrap(self):
         ''')
         self.assertEqual(parser.format_usage(), usage)
 
+    def test_mutex_groups_with_mixed_optionals_positionals_wrap(self):
+        # https://github.com/python/cpython/issues/75949
+        # Mutually exclusive groups containing both optionals and positionals
+        # should preserve pipe separators when the usage line wraps.
+        parser = argparse.ArgumentParser(prog='PROG')
+        g = parser.add_mutually_exclusive_group()
+        g.add_argument('-v', '--verbose', action='store_true')
+        g.add_argument('-q', '--quiet', action='store_true')
+        g.add_argument('-x', '--extra-long-option-name', nargs='?')
+        g.add_argument('-y', '--yet-another-long-option', nargs='?')
+        g.add_argument('positional', nargs='?')
+
+        usage = textwrap.dedent('''\
+        usage: PROG [-h]
+                    [-v | -q | -x [EXTRA_LONG_OPTION_NAME] |
+                    -y [YET_ANOTHER_LONG_OPTION] | positional]
+        ''')
+        self.assertEqual(parser.format_usage(), usage)
+
 
 class TestHelpVariableExpansion(HelpTestCase):
     """Test that variables are expanded properly in help messages"""
@@ -5577,6 +5694,11 @@ def custom_formatter(prog):
                 a-very-long-command  command that does something
         '''))
 
+    def test_direct_formatter_instantiation(self):
+        formatter = argparse.HelpFormatter(prog="program")
+        formatter.add_usage(usage=None, actions=[], groups=[])
+        help_text = formatter.format_help()
+        self.assertEqual(help_text, "usage: program\n")
 
 # =====================================
 # Optional/Positional constructor tests
@@ -5611,6 +5733,8 @@ def test_invalid_option_strings(self):
         self.assertTypeError('-', errmsg='dest= is required')
         self.assertTypeError('--', errmsg='dest= is required')
         self.assertTypeError('---', errmsg='dest= is required')
+        self.assertTypeError('-', '--', '---',
+                errmsg="dest= is required for options like '-', '--', '---'")
 
     def test_invalid_prefix(self):
         self.assertValueError('--foo', '+foo',
@@ -7207,6 +7331,8 @@ def test_argparse_color(self):
         short_b = self.theme.short_option
         label_b = self.theme.label
         pos_b = self.theme.action
+        default = self.theme.default
+        default_value = self.theme.default_value
         reset = self.theme.reset
 
         # Act
@@ -7233,17 +7359,17 @@ def test_argparse_color(self):
 
                 {heading}options:{reset}
                   {short_b}-h{reset}, {long_b}--help{reset}            show this help message and exit
-                  {short_b}-v{reset}, {long_b}--verbose{reset}         more spam (default: False)
-                  {short_b}-q{reset}, {long_b}--quiet{reset}           less spam (default: False)
+                  {short_b}-v{reset}, {long_b}--verbose{reset}         more spam {default}(default: {default_value}False{default}){reset}
+                  {short_b}-q{reset}, {long_b}--quiet{reset}           less spam {default}(default: {default_value}False{default}){reset}
                   {short_b}-o{reset}, {long_b}--optional1{reset}
                   {long_b}--optional2{reset} {label_b}OPTIONAL2{reset}
-                                        pick one (default: None)
+                                        pick one {default}(default: {default_value}None{default}){reset}
                   {long_b}--optional3{reset} {label_b}{{X,Y,Z}}{reset}
-                  {long_b}--optional4{reset} {label_b}{{X,Y,Z}}{reset}   pick one (default: None)
-                  {long_b}--optional5{reset} {label_b}{{X,Y,Z}}{reset}   pick one (default: None)
-                  {long_b}--optional6{reset} {label_b}{{X,Y,Z}}{reset}   pick one (default: None)
+                  {long_b}--optional4{reset} {label_b}{{X,Y,Z}}{reset}   pick one {default}(default: {default_value}None{default}){reset}
+                  {long_b}--optional5{reset} {label_b}{{X,Y,Z}}{reset}   pick one {default}(default: {default_value}None{default}){reset}
+                  {long_b}--optional6{reset} {label_b}{{X,Y,Z}}{reset}   pick one {default}(default: {default_value}None{default}){reset}
                   {short_b}-p{reset}, {long_b}--optional7{reset} {label_b}{{Aaaaa,Bbbbb,Ccccc,Ddddd}}{reset}
-                                        pick one (default: None)
+                                        pick one {default}(default: {default_value}None{default}){reset}
                   {short_b}+f{reset} {label_b}F{reset}
                   {long_b}++bar{reset} {label_b}BAR{reset}
                   {long_b}-+baz{reset} {label_b}BAZ{reset}
@@ -7259,7 +7385,28 @@ def test_argparse_color(self):
             ),
         )
 
-    def test_argparse_color_usage(self):
+    def test_argparse_color_mutually_exclusive_group_usage(self):
+        parser = argparse.ArgumentParser(color=True, prog="PROG")
+        group = parser.add_mutually_exclusive_group()
+        group.add_argument('--foo', action='store_true', help='FOO')
+        group.add_argument('--spam', help='SPAM')
+        group.add_argument('badger', nargs='*', help='BADGER')
+
+        prog = self.theme.prog
+        heading = self.theme.heading
+        long = self.theme.summary_long_option
+        short = self.theme.summary_short_option
+        label = self.theme.summary_label
+        pos = self.theme.summary_action
+        reset = self.theme.reset
+
+        self.assertEqual(parser.format_usage(),
+            f"{heading}usage: {reset}{prog}PROG{reset} [{short}-h{reset}] "
+            f"[{long}--foo{reset} | "
+            f"{long}--spam {label}SPAM{reset} | "
+            f"{pos}badger ...{reset}]\n")
+
+    def test_argparse_color_custom_usage(self):
         # Arrange
         parser = argparse.ArgumentParser(
             add_help=False,
@@ -7411,6 +7558,40 @@ def test_error_and_warning_not_colorized_when_disabled(self):
         self.assertNotIn('\x1b[', warn)
         self.assertIn('warning:', warn)
 
+    def test_print_help_uses_target_file_for_color_decision(self):
+        parser = argparse.ArgumentParser(prog='PROG', color=True)
+        parser.add_argument('--opt')
+        output = io.StringIO()
+        calls = []
+
+        def fake_can_colorize(*, file=None):
+            calls.append(file)
+            return file is None
+
+        with swap_attr(_colorize, 'can_colorize', fake_can_colorize):
+            parser.print_help(file=output)
+
+        self.assertIs(calls[-1], output)
+        self.assertIn(output, calls)
+        self.assertNotIn('\x1b[', output.getvalue())
+
+    def test_print_usage_uses_target_file_for_color_decision(self):
+        parser = argparse.ArgumentParser(prog='PROG', color=True)
+        parser.add_argument('--opt')
+        output = io.StringIO()
+        calls = []
+
+        def fake_can_colorize(*, file=None):
+            calls.append(file)
+            return file is None
+
+        with swap_attr(_colorize, 'can_colorize', fake_can_colorize):
+            parser.print_usage(file=output)
+
+        self.assertIs(calls[-1], output)
+        self.assertIn(output, calls)
+        self.assertNotIn('\x1b[', output.getvalue())
+
 
 class TestModule(unittest.TestCase):
     def test_deprecated__version__(self):
diff --git a/Lib/test/test_ast/test_ast.py b/Lib/test/test_ast/test_ast.py
index 608ffdfad12..d2b76b46dbe 100644
--- a/Lib/test/test_ast/test_ast.py
+++ b/Lib/test/test_ast/test_ast.py
@@ -992,7 +992,8 @@ def next(self):
     @skip_wasi_stack_overflow()
     @skip_emscripten_stack_overflow()
     def test_ast_recursion_limit(self):
-        crash_depth = 500_000
+        # Android test devices have less memory.
+        crash_depth = 100_000 if sys.platform == "android" else 500_000
         success_depth = 200
         if _testinternalcapi is not None:
             remaining = _testinternalcapi.get_c_recursion_remaining()
diff --git a/Lib/test/test_asyncio/test_tasks.py b/Lib/test/test_asyncio/test_tasks.py
index 931a43816a2..9809621a324 100644
--- a/Lib/test/test_asyncio/test_tasks.py
+++ b/Lib/test/test_asyncio/test_tasks.py
@@ -3680,6 +3680,30 @@ def task_factory(loop, coro):
         (loop, context), kwargs = callback.call_args
         self.assertEqual(context['exception'], exc_context.exception)
 
+    def test_run_coroutine_threadsafe_and_cancel(self):
+        task = None
+        thread_future = None
+        # Use a custom task factory to capture the created Task
+        def task_factory(loop, coro):
+            nonlocal task
+            task = asyncio.Task(coro, loop=loop)
+            return task
+
+        self.addCleanup(self.loop.set_task_factory,
+                        self.loop.get_task_factory())
+
+        async def target():
+            nonlocal thread_future
+            self.loop.set_task_factory(task_factory)
+            thread_future = asyncio.run_coroutine_threadsafe(asyncio.sleep(10), self.loop)
+            await asyncio.sleep(0)
+
+            thread_future.cancel()
+
+        self.loop.run_until_complete(target())
+        self.assertTrue(task.cancelled())
+        self.assertTrue(thread_future.cancelled())
+
 
 class SleepTests(test_utils.TestCase):
     def setUp(self):
diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py
index 86898bfcab9..a55ec6cf3b8 100644
--- a/Lib/test/test_bytes.py
+++ b/Lib/test/test_bytes.py
@@ -549,6 +549,17 @@ def test_hex_separator_basics(self):
         self.assertEqual(three_bytes.hex(':', 2), 'b9:01ef')
         self.assertEqual(three_bytes.hex(':', 1), 'b9:01:ef')
         self.assertEqual(three_bytes.hex('*', -2), 'b901*ef')
+        self.assertEqual(three_bytes.hex(sep=':', bytes_per_sep=2), 'b9:01ef')
+        self.assertEqual(three_bytes.hex(sep='*', bytes_per_sep=-2), 'b901*ef')
+        for bytes_per_sep in 3, -3, 2**31-1, -(2**31-1):
+            with self.subTest(bytes_per_sep=bytes_per_sep):
+                self.assertEqual(three_bytes.hex(':', bytes_per_sep), 'b901ef')
+        for bytes_per_sep in 2**31, -2**31, 2**1000, -2**1000:
+            with self.subTest(bytes_per_sep=bytes_per_sep):
+                try:
+                    self.assertEqual(three_bytes.hex(':', bytes_per_sep), 'b901ef')
+                except OverflowError:
+                    pass
 
         value = b'{s\005\000\000\000worldi\002\000\000\000s\005\000\000\000helloi\001\000\000\0000'
         self.assertEqual(value.hex('.', 8), '7b7305000000776f.726c646902000000.730500000068656c.6c6f690100000030')
@@ -1524,6 +1535,32 @@ def test_take_bytes(self):
             self.assertRaises(BufferError, ba.take_bytes)
         self.assertEqual(ba.take_bytes(), b'abc')
 
+    @support.cpython_only  # tests an implementation detail
+    def test_take_bytes_optimization(self):
+        # Validate optimization around taking lots of little chunks out of a
+        # much bigger buffer. Save work by only copying a little rather than
+        # moving a lot.
+        ba = bytearray(b'abcdef' + b'0' * 1000)
+        start_alloc = ba.__alloc__()
+
+        # Take two bytes at a time, checking alloc doesn't change.
+        self.assertEqual(ba.take_bytes(2), b'ab')
+        self.assertEqual(ba.__alloc__(), start_alloc)
+        self.assertEqual(len(ba), 4 + 1000)
+        self.assertEqual(ba.take_bytes(2), b'cd')
+        self.assertEqual(ba.__alloc__(), start_alloc)
+        self.assertEqual(len(ba), 2 + 1000)
+        self.assertEqual(ba.take_bytes(2), b'ef')
+        self.assertEqual(ba.__alloc__(), start_alloc)
+        self.assertEqual(len(ba), 0 + 1000)
+        self.assertEqual(ba.__alloc__(), start_alloc)
+
+        # Take over half, alloc shrinks to exact size.
+        self.assertEqual(ba.take_bytes(501), b'0' * 501)
+        self.assertEqual(len(ba), 499)
+        bytes_header_size = sys.getsizeof(b'')
+        self.assertEqual(ba.__alloc__(), 499 + bytes_header_size)
+
     def test_setitem(self):
         def setitem_as_mapping(b, i, val):
             b[i] = val
@@ -2629,6 +2666,10 @@ def zfill(b, a):
             c = a.zfill(0x400000)
             assert not c or c[-1] not in (0xdd, 0xcd)
 
+        def resize(b, a):  # MODIFIES!
+            b.wait()
+            a.resize(10)
+
         def take_bytes(b, a):  # MODIFIES!
             b.wait()
             c = a.take_bytes()
@@ -2702,6 +2743,8 @@ def check(funcs, a=None, *args):
         check([clear] + [startswith] * 10)
         check([clear] + [strip] * 10)
 
+        check([clear] + [resize] * 10)
+
         check([clear] + [take_bytes] * 10)
         check([take_bytes_n] * 10, bytearray(b'0123456789' * 0x400))
         check([take_bytes_n] * 10, bytearray(b'0123456789' * 5))
diff --git a/Lib/test/test_capi/test_module.py b/Lib/test/test_capi/test_module.py
index 7ec23e637d7..823e2ab6b2e 100644
--- a/Lib/test/test_capi/test_module.py
+++ b/Lib/test/test_capi/test_module.py
@@ -3,7 +3,7 @@
 
 import unittest
 import types
-from test.support import import_helper, subTests
+from test.support import import_helper, subTests, requires_gil_enabled
 
 # Skip this test if the _testcapi module isn't available.
 _testcapi = import_helper.import_module('_testcapi')
@@ -25,6 +25,7 @@ def def_and_token(mod):
     )
 
 class TestModFromSlotsAndSpec(unittest.TestCase):
+    @requires_gil_enabled("empty slots re-enable GIL")
     def test_empty(self):
         mod = _testcapi.module_from_slots_empty(FakeSpec())
         self.assertIsInstance(mod, types.ModuleType)
diff --git a/Lib/test/test_capi/test_object.py b/Lib/test/test_capi/test_object.py
index d4056727d07..c5040913e9e 100644
--- a/Lib/test/test_capi/test_object.py
+++ b/Lib/test/test_capi/test_object.py
@@ -1,4 +1,5 @@
 import enum
+import os
 import sys
 import textwrap
 import unittest
@@ -13,6 +14,9 @@
 _testcapi = import_helper.import_module('_testcapi')
 _testinternalcapi = import_helper.import_module('_testinternalcapi')
 
+NULL = None
+STDERR_FD = 2
+
 
 class Constant(enum.IntEnum):
     Py_CONSTANT_NONE = 0
@@ -247,5 +251,53 @@ def func(x):
 
         func(object())
 
+    def pyobject_dump(self, obj, release_gil=False):
+        pyobject_dump = _testcapi.pyobject_dump
+
+        try:
+            old_stderr = os.dup(STDERR_FD)
+        except OSError as exc:
+            # os.dup(STDERR_FD) is not supported on WASI
+            self.skipTest(f"os.dup() failed with {exc!r}")
+
+        filename = os_helper.TESTFN
+        try:
+            try:
+                with open(filename, "wb") as fp:
+                    fd = fp.fileno()
+                    os.dup2(fd, STDERR_FD)
+                    pyobject_dump(obj, release_gil)
+            finally:
+                os.dup2(old_stderr, STDERR_FD)
+                os.close(old_stderr)
+
+            with open(filename) as fp:
+                return fp.read().rstrip()
+        finally:
+            os_helper.unlink(filename)
+
+    def test_pyobject_dump(self):
+        # test string object
+        str_obj = 'test string'
+        output = self.pyobject_dump(str_obj)
+        hex_regex = r'(0x)?[0-9a-fA-F]+'
+        regex = (
+            fr"object address  : {hex_regex}\n"
+             r"object refcount : [0-9]+\n"
+            fr"object type     : {hex_regex}\n"
+             r"object type name: str\n"
+             r"object repr     : 'test string'"
+        )
+        self.assertRegex(output, regex)
+
+        # release the GIL
+        output = self.pyobject_dump(str_obj, release_gil=True)
+        self.assertRegex(output, regex)
+
+        # test NULL object
+        output = self.pyobject_dump(NULL)
+        self.assertRegex(output, r'<object at .* is freed>')
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py
index 25372fee58e..51234a2e40f 100644
--- a/Lib/test/test_capi/test_opt.py
+++ b/Lib/test/test_capi/test_opt.py
@@ -2480,8 +2480,6 @@ def testfunc(n):
 
 
         testfunc(_testinternalcapi.TIER2_THRESHOLD)
-        ex = get_first_executor(testfunc)
-        assert ex is not None
         """))
 
     def test_pop_top_specialize_none(self):
@@ -2664,6 +2662,38 @@ def f():
                               f" {executor} at offset {idx} rather"
                               f" than expected _EXIT_TRACE")
 
+    def test_enter_executor_valid_op_arg(self):
+        script_helper.assert_python_ok("-c", textwrap.dedent("""
+            import sys
+            sys.setrecursionlimit(30) # reduce time of the run
+
+            str_v1 = ''
+            tuple_v2 = (None, None, None, None, None)
+            small_int_v3 = 4
+
+            def f1():
+
+                for _ in range(10):
+                    abs(0)
+
+                tuple_v2[small_int_v3]
+                tuple_v2[small_int_v3]
+                tuple_v2[small_int_v3]
+
+                def recursive_wrapper_4569():
+                    str_v1 > str_v1
+                    str_v1 > str_v1
+                    str_v1 > str_v1
+                    recursive_wrapper_4569()
+
+                recursive_wrapper_4569()
+
+            for i_f1 in range(19000):
+                try:
+                    f1()
+                except RecursionError:
+                    pass
+        """))
 
 
 def global_identity(x):
diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py
index 30f21875b22..fa611f480d6 100644
--- a/Lib/test/test_compile.py
+++ b/Lib/test/test_compile.py
@@ -728,7 +728,8 @@ def test_yet_more_evil_still_undecodable(self):
     def test_compiler_recursion_limit(self):
         # Compiler frames are small
         limit = 100
-        crash_depth = limit * 5000
+        # Android test devices have less memory.
+        crash_depth = limit * (1000 if sys.platform == "android" else 5000)
         success_depth = limit
 
         def check_limit(prefix, repeated, mode="single"):
@@ -1036,11 +1037,13 @@ def test_path_like_objects(self):
         # An implicit test for PyUnicode_FSDecoder().
         compile("42", FakePath("test_compile_pathlike"), "single")
 
+    # bpo-31113: Stack overflow when compile a long sequence of
+    # complex statements.
     @support.requires_resource('cpu')
     def test_stack_overflow(self):
-        # bpo-31113: Stack overflow when compile a long sequence of
-        # complex statements.
-        compile("if a: b\n" * 200000, "<dummy>", "exec")
+        # Android test devices have less memory.
+        size = 100_000 if sys.platform == "android" else 200_000
+        compile("if a: b\n" * size, "<dummy>", "exec")
 
     # Multiple users rely on the fact that CPython does not generate
     # bytecode for dead code blocks. See bpo-37500 for more context.
diff --git a/Lib/test/test_copy.py b/Lib/test/test_copy.py
index 467ec09d99e..cfef24727e8 100644
--- a/Lib/test/test_copy.py
+++ b/Lib/test/test_copy.py
@@ -672,7 +672,7 @@ def __eq__(self, other):
     def test_reduce_5tuple(self):
         class C(dict):
             def __reduce__(self):
-                return (C, (), self.__dict__, None, self.items())
+                return (C, (), self.__dict__, None, iter(self.items()))
             def __eq__(self, other):
                 return (dict(self) == dict(other) and
                         self.__dict__ == other.__dict__)
diff --git a/Lib/test/test_dataclasses/__init__.py b/Lib/test/test_dataclasses/__init__.py
index 513dd78c438..3b335429b98 100644
--- a/Lib/test/test_dataclasses/__init__.py
+++ b/Lib/test/test_dataclasses/__init__.py
@@ -927,6 +927,20 @@ class C:
 
         validate_class(C)
 
+    def test_incomplete_annotations(self):
+        # gh-142214
+        @dataclass
+        class C:
+            "doc"  # needed because otherwise we fetch the annotations at the wrong time
+            x: int
+
+        C.__annotate__ = lambda _: {}
+
+        self.assertEqual(
+            annotationlib.get_annotations(C.__init__),
+            {"return": None}
+        )
+
     def test_missing_default(self):
         # Test that MISSING works the same as a default not being
         #  specified.
@@ -2578,6 +2592,20 @@ def __init__(self, x: int) -> None:
 
         self.assertFalse(hasattr(E.__init__.__annotate__, "__generated_by_dataclasses__"))
 
+    def test_slots_true_init_false(self):
+        # Test that slots=True and init=False work together and
+        #  that __annotate__ is not added to __init__.
+
+        @dataclass(slots=True, init=False)
+        class F:
+            x: int
+
+        f = F()
+        f.x = 10
+        self.assertEqual(f.x, 10)
+
+        self.assertFalse(hasattr(F.__init__, "__annotate__"))
+
     def test_init_false_forwardref(self):
         # Test forward references in fields not required for __init__ annotations.
 
diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py
index 14f94285d3f..82a48ad4d1a 100644
--- a/Lib/test/test_descr.py
+++ b/Lib/test/test_descr.py
@@ -1329,18 +1329,17 @@ class D(object):
         self.assertNotHasAttr(a, "__weakref__")
         a.foo = 42
         self.assertEqual(a.__dict__, {"foo": 42})
+        with self.assertRaises(TypeError):
+            weakref.ref(a)
 
         class W(object):
             __slots__ = ["__weakref__"]
         a = W()
         self.assertHasAttr(a, "__weakref__")
         self.assertNotHasAttr(a, "__dict__")
-        try:
+        with self.assertRaises(AttributeError):
             a.foo = 42
-        except AttributeError:
-            pass
-        else:
-            self.fail("shouldn't be allowed to set a.foo")
+        self.assertIs(weakref.ref(a)(), a)
 
         class C1(W, D):
             __slots__ = []
@@ -1349,6 +1348,7 @@ class C1(W, D):
         self.assertHasAttr(a, "__weakref__")
         a.foo = 42
         self.assertEqual(a.__dict__, {"foo": 42})
+        self.assertIs(weakref.ref(a)(), a)
 
         class C2(D, W):
             __slots__ = []
@@ -1357,6 +1357,77 @@ class C2(D, W):
         self.assertHasAttr(a, "__weakref__")
         a.foo = 42
         self.assertEqual(a.__dict__, {"foo": 42})
+        self.assertIs(weakref.ref(a)(), a)
+
+    @unittest.skipIf(_testcapi is None, 'need the _testcapi module')
+    def test_slots_special_before_items(self):
+        class D(_testcapi.HeapCCollection):
+            __slots__ = ["__dict__"]
+        a = D(1, 2, 3)
+        self.assertHasAttr(a, "__dict__")
+        self.assertNotHasAttr(a, "__weakref__")
+        a.foo = 42
+        self.assertEqual(a.__dict__, {"foo": 42})
+        with self.assertRaises(TypeError):
+            weakref.ref(a)
+        del a.__dict__
+        self.assertNotHasAttr(a, "foo")
+        self.assertEqual(a.__dict__, {})
+        self.assertEqual(list(a), [1, 2, 3])
+
+        class W(_testcapi.HeapCCollection):
+            __slots__ = ["__weakref__"]
+        a = W(1, 2, 3)
+        self.assertHasAttr(a, "__weakref__")
+        self.assertNotHasAttr(a, "__dict__")
+        with self.assertRaises(AttributeError):
+            a.foo = 42
+        self.assertIs(weakref.ref(a)(), a)
+
+        with self.assertRaises(TypeError):
+            class X(_testcapi.HeapCCollection):
+                __slots__ = ['x']
+
+        with self.assertRaises(TypeError):
+            class X(_testcapi.HeapCCollection):
+                __slots__ = ['__dict__', 'x']
+
+    @support.subTests(('base', 'arg'), [
+        (tuple, (1, 2, 3)),
+        (int, 9876543210**2),
+        (bytes, b'ab'),
+    ])
+    def test_slots_special_after_items(self, base, arg):
+        class D(base):
+            __slots__ = ["__dict__"]
+        a = D(arg)
+        self.assertHasAttr(a, "__dict__")
+        self.assertNotHasAttr(a, "__weakref__")
+        a.foo = 42
+        self.assertEqual(a.__dict__, {"foo": 42})
+        with self.assertRaises(TypeError):
+            weakref.ref(a)
+        del a.__dict__
+        self.assertNotHasAttr(a, "foo")
+        self.assertEqual(a.__dict__, {})
+        self.assertEqual(a, base(arg))
+
+        class W(base):
+            __slots__ = ["__weakref__"]
+        a = W(arg)
+        self.assertHasAttr(a, "__weakref__")
+        self.assertNotHasAttr(a, "__dict__")
+        with self.assertRaises(AttributeError):
+            a.foo = 42
+        self.assertIs(weakref.ref(a)(), a)
+        self.assertEqual(a, base(arg))
+
+        with self.assertRaises(TypeError):
+            class X(base):
+                __slots__ = ['x']
+        with self.assertRaises(TypeError):
+            class X(base):
+                __slots__ = ['__dict__', 'x']
 
     def test_slots_special2(self):
         # Testing __qualname__ and __classcell__ in __slots__
diff --git a/Lib/test/test_dict.py b/Lib/test/test_dict.py
index 2e6c2bbdf19..665b3e843dd 100644
--- a/Lib/test/test_dict.py
+++ b/Lib/test/test_dict.py
@@ -1621,6 +1621,14 @@ def __eq__(self, other):
 
         self.assertEqual(len(d), 1)
 
+    def test_split_table_update_with_str_subclass(self):
+        class MyStr(str): pass
+        class MyClass: pass
+        obj = MyClass()
+        obj.attr = 1
+        obj.__dict__[MyStr('attr')] = 2
+        self.assertEqual(obj.attr, 2)
+
 
 class CAPITest(unittest.TestCase):
 
diff --git a/Lib/test/test_doctest/test_doctest.py b/Lib/test/test_doctest/test_doctest.py
index 0fa74407e3c..241d09db1fa 100644
--- a/Lib/test/test_doctest/test_doctest.py
+++ b/Lib/test/test_doctest/test_doctest.py
@@ -833,6 +833,118 @@ def test_empty_namespace_package(self):
             self.assertEqual(len(include_empty_finder.find(mod)), 1)
             self.assertEqual(len(exclude_empty_finder.find(mod)), 0)
 
+    def test_lineno_of_test_dict_strings(self):
+        """Test line numbers are found for __test__ dict strings."""
+        module_content = '''\
+"""Module docstring."""
+
+def dummy_function():
+    """Dummy function docstring."""
+    pass
+
+__test__ = {
+    'test_string': """
+    This is a test string.
+    >>> 1 + 1
+    2
+    """,
+}
+'''
+        with tempfile.TemporaryDirectory() as tmpdir:
+            module_path = os.path.join(tmpdir, 'test_module_lineno.py')
+            with open(module_path, 'w') as f:
+                f.write(module_content)
+
+            sys.path.insert(0, tmpdir)
+            try:
+                import test_module_lineno
+                finder = doctest.DocTestFinder()
+                tests = finder.find(test_module_lineno)
+
+                test_dict_test = None
+                for test in tests:
+                    if '__test__' in test.name:
+                        test_dict_test = test
+                        break
+
+                self.assertIsNotNone(
+                    test_dict_test,
+                    "__test__ dict test not found"
+                )
+                # gh-69113: line number should not be None for __test__ strings
+                self.assertIsNotNone(
+                    test_dict_test.lineno,
+                    "Line number should not be None for __test__ dict strings"
+                )
+                self.assertGreater(
+                    test_dict_test.lineno,
+                    0,
+                    "Line number should be positive"
+                )
+            finally:
+                if 'test_module_lineno' in sys.modules:
+                    del sys.modules['test_module_lineno']
+                sys.path.pop(0)
+
+    def test_lineno_multiline_matching(self):
+        """Test multi-line matching when no unique line exists."""
+        # gh-69113: test that line numbers are found even when lines
+        # appear multiple times (e.g., ">>> x = 1" in both test entries)
+        module_content = '''\
+"""Module docstring."""
+
+__test__ = {
+    'test_one': """
+    >>> x = 1
+    >>> x
+    1
+    """,
+    'test_two': """
+    >>> x = 1
+    >>> x
+    2
+    """,
+}
+'''
+        with tempfile.TemporaryDirectory() as tmpdir:
+            module_path = os.path.join(tmpdir, 'test_module_multiline.py')
+            with open(module_path, 'w') as f:
+                f.write(module_content)
+
+            sys.path.insert(0, tmpdir)
+            try:
+                import test_module_multiline
+                finder = doctest.DocTestFinder()
+                tests = finder.find(test_module_multiline)
+
+                test_one = None
+                test_two = None
+                for test in tests:
+                    if 'test_one' in test.name:
+                        test_one = test
+                    elif 'test_two' in test.name:
+                        test_two = test
+
+                self.assertIsNotNone(test_one, "test_one not found")
+                self.assertIsNotNone(test_two, "test_two not found")
+                self.assertIsNotNone(
+                    test_one.lineno,
+                    "Line number should not be None for test_one"
+                )
+                self.assertIsNotNone(
+                    test_two.lineno,
+                    "Line number should not be None for test_two"
+                )
+                self.assertNotEqual(
+                    test_one.lineno,
+                    test_two.lineno,
+                    "test_one and test_two should have different line numbers"
+                )
+            finally:
+                if 'test_module_multiline' in sys.modules:
+                    del sys.modules['test_module_multiline']
+                sys.path.pop(0)
+
 def test_DocTestParser(): r"""
 Unit tests for the `DocTestParser` class.
 
@@ -2434,7 +2546,8 @@ def test_DocTestSuite_errors():
          <BLANKLINE>
          >>> print(result.failures[1][1]) # doctest: +ELLIPSIS
          Traceback (most recent call last):
-           File "...sample_doctest_errors.py", line None, in test.test_doctest.sample_doctest_errors.__test__.bad
+           File "...sample_doctest_errors.py", line 37, in test.test_doctest.sample_doctest_errors.__test__.bad
+             >...>> 2 + 2
          AssertionError: Failed example:
              2 + 2
          Expected:
@@ -2464,7 +2577,8 @@ def test_DocTestSuite_errors():
          <BLANKLINE>
          >>> print(result.errors[1][1]) # doctest: +ELLIPSIS
          Traceback (most recent call last):
-           File "...sample_doctest_errors.py", line None, in test.test_doctest.sample_doctest_errors.__test__.bad
+           File "...sample_doctest_errors.py", line 39, in test.test_doctest.sample_doctest_errors.__test__.bad
+             >...>> 1/0
            File "<doctest test.test_doctest.sample_doctest_errors.__test__.bad[1]>", line 1, in <module>
              1/0
              ~^~
@@ -3256,7 +3370,7 @@ def test_testmod_errors(): r"""
             ~^~
         ZeroDivisionError: division by zero
     **********************************************************************
-    File "...sample_doctest_errors.py", line ?, in test.test_doctest.sample_doctest_errors.__test__.bad
+    File "...sample_doctest_errors.py", line 37, in test.test_doctest.sample_doctest_errors.__test__.bad
     Failed example:
         2 + 2
     Expected:
@@ -3264,7 +3378,7 @@ def test_testmod_errors(): r"""
     Got:
         4
     **********************************************************************
-    File "...sample_doctest_errors.py", line ?, in test.test_doctest.sample_doctest_errors.__test__.bad
+    File "...sample_doctest_errors.py", line 39, in test.test_doctest.sample_doctest_errors.__test__.bad
     Failed example:
         1/0
     Exception raised:
diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py
index 179e236ecdf..f7f9f9c4e2f 100644
--- a/Lib/test/test_email/test__header_value_parser.py
+++ b/Lib/test/test_email/test__header_value_parser.py
@@ -3255,5 +3255,15 @@ def test_long_filename_attachment(self):
             " filename*1*=_TEST_TES.txt\n",
             )
 
+    def test_fold_unfoldable_element_stealing_whitespace(self):
+        # gh-142006: When an element is too long to fit on the current line
+        # the previous line's trailing whitespace should not trigger a double newline.
+        policy = self.policy.clone(max_line_length=10)
+        # The non-whitespace text needs to exactly fill the max_line_length (10).
+        text = ("a" * 9) + ", " + ("b" * 20)
+        expected = ("a" * 9) + ",\n " + ("b" * 20) + "\n"
+        token = parser.get_address_list(text)[0]
+        self._test(token, expected, policy=policy)
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/Lib/test/test_email/test_defect_handling.py b/Lib/test/test_email/test_defect_handling.py
index 44e76c8ce5e..acc4accccac 100644
--- a/Lib/test/test_email/test_defect_handling.py
+++ b/Lib/test/test_email/test_defect_handling.py
@@ -126,12 +126,10 @@ def test_multipart_invalid_cte(self):
             errors.InvalidMultipartContentTransferEncodingDefect)
 
     def test_multipart_no_cte_no_defect(self):
-        if self.raise_expected: return
         msg = self._str_msg(self.multipart_msg.format(''))
         self.assertEqual(len(self.get_defects(msg)), 0)
 
     def test_multipart_valid_cte_no_defect(self):
-        if self.raise_expected: return
         for cte in ('7bit', '8bit', 'BINary'):
             msg = self._str_msg(
                 self.multipart_msg.format("\nContent-Transfer-Encoding: "+cte))
@@ -300,6 +298,47 @@ def test_missing_ending_boundary(self):
         self.assertDefectsEqual(self.get_defects(msg),
                                 [errors.CloseBoundaryNotFoundDefect])
 
+    def test_line_beginning_colon(self):
+        string = (
+            "Subject: Dummy subject\r\n: faulty header line\r\n\r\nbody\r\n"
+        )
+
+        with self._raise_point(errors.InvalidHeaderDefect):
+            msg = self._str_msg(string)
+            self.assertEqual(len(self.get_defects(msg)), 1)
+            self.assertDefectsEqual(
+                self.get_defects(msg), [errors.InvalidHeaderDefect]
+            )
+
+            if msg:
+                self.assertEqual(msg.items(), [("Subject", "Dummy subject")])
+                self.assertEqual(msg.get_payload(), "body\r\n")
+
+    def test_misplaced_envelope(self):
+        string = (
+            "Subject: Dummy subject\r\nFrom wtf\r\nTo: abc\r\n\r\nbody\r\n"
+        )
+        with self._raise_point(errors.MisplacedEnvelopeHeaderDefect):
+            msg = self._str_msg(string)
+            self.assertEqual(len(self.get_defects(msg)), 1)
+            self.assertDefectsEqual(
+                self.get_defects(msg), [errors.MisplacedEnvelopeHeaderDefect]
+            )
+
+            if msg:
+                headers = [("Subject", "Dummy subject"), ("To", "abc")]
+                self.assertEqual(msg.items(), headers)
+                self.assertEqual(msg.get_payload(), "body\r\n")
+
+
+
+class TestCompat32(TestDefectsBase, TestEmailBase):
+
+    policy = policy.compat32
+
+    def get_defects(self, obj):
+        return obj.defects
+
 
 class TestDefectDetection(TestDefectsBase, TestEmailBase):
 
@@ -332,6 +371,9 @@ def _raise_point(self, defect):
         with self.assertRaises(defect):
             yield
 
+    def get_defects(self, obj):
+        return obj.defects
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
index 4020f1041c4..4e6c213510c 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -2263,70 +2263,6 @@ def test_parse_missing_minor_type(self):
         eq(msg.get_content_maintype(), 'text')
         eq(msg.get_content_subtype(), 'plain')
 
-    # test_defect_handling
-    def test_same_boundary_inner_outer(self):
-        msg = self._msgobj('msg_15.txt')
-        # XXX We can probably eventually do better
-        inner = msg.get_payload(0)
-        self.assertHasAttr(inner, 'defects')
-        self.assertEqual(len(inner.defects), 1)
-        self.assertIsInstance(inner.defects[0],
-                              errors.StartBoundaryNotFoundDefect)
-
-    # test_defect_handling
-    def test_multipart_no_boundary(self):
-        msg = self._msgobj('msg_25.txt')
-        self.assertIsInstance(msg.get_payload(), str)
-        self.assertEqual(len(msg.defects), 2)
-        self.assertIsInstance(msg.defects[0],
-                              errors.NoBoundaryInMultipartDefect)
-        self.assertIsInstance(msg.defects[1],
-                              errors.MultipartInvariantViolationDefect)
-
-    multipart_msg = textwrap.dedent("""\
-        Date: Wed, 14 Nov 2007 12:56:23 GMT
-        From: foo@bar.invalid
-        To: foo@bar.invalid
-        Subject: Content-Transfer-Encoding: base64 and multipart
-        MIME-Version: 1.0
-        Content-Type: multipart/mixed;
-            boundary="===============3344438784458119861=="{}
-
-        --===============3344438784458119861==
-        Content-Type: text/plain
-
-        Test message
-
-        --===============3344438784458119861==
-        Content-Type: application/octet-stream
-        Content-Transfer-Encoding: base64
-
-        YWJj
-
-        --===============3344438784458119861==--
-        """)
-
-    # test_defect_handling
-    def test_multipart_invalid_cte(self):
-        msg = self._str_msg(
-            self.multipart_msg.format("\nContent-Transfer-Encoding: base64"))
-        self.assertEqual(len(msg.defects), 1)
-        self.assertIsInstance(msg.defects[0],
-            errors.InvalidMultipartContentTransferEncodingDefect)
-
-    # test_defect_handling
-    def test_multipart_no_cte_no_defect(self):
-        msg = self._str_msg(self.multipart_msg.format(''))
-        self.assertEqual(len(msg.defects), 0)
-
-    # test_defect_handling
-    def test_multipart_valid_cte_no_defect(self):
-        for cte in ('7bit', '8bit', 'BINary'):
-            msg = self._str_msg(
-                self.multipart_msg.format(
-                    "\nContent-Transfer-Encoding: {}".format(cte)))
-            self.assertEqual(len(msg.defects), 0)
-
     # test_headerregistry.TestContentTypeHeader invalid_1 and invalid_2.
     def test_invalid_content_type(self):
         eq = self.assertEqual
@@ -2403,30 +2339,6 @@ def test_missing_start_boundary(self):
         self.assertIsInstance(bad.defects[0],
                               errors.StartBoundaryNotFoundDefect)
 
-    # test_defect_handling
-    def test_first_line_is_continuation_header(self):
-        eq = self.assertEqual
-        m = ' Line 1\nSubject: test\n\nbody'
-        msg = email.message_from_string(m)
-        eq(msg.keys(), ['Subject'])
-        eq(msg.get_payload(), 'body')
-        eq(len(msg.defects), 1)
-        self.assertDefectsEqual(msg.defects,
-                                 [errors.FirstHeaderLineIsContinuationDefect])
-        eq(msg.defects[0].line, ' Line 1\n')
-
-    # test_defect_handling
-    def test_missing_header_body_separator(self):
-        # Our heuristic if we see a line that doesn't look like a header (no
-        # leading whitespace but no ':') is to assume that the blank line that
-        # separates the header from the body is missing, and to stop parsing
-        # headers and start parsing the body.
-        msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n')
-        self.assertEqual(msg.keys(), ['Subject'])
-        self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n')
-        self.assertDefectsEqual(msg.defects,
-                                [errors.MissingHeaderBodySeparatorDefect])
-
     def test_string_payload_with_extra_space_after_cte(self):
         # https://github.com/python/cpython/issues/98188
         cte = "base64 "
diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py
index 1078796eae8..b5367941227 100644
--- a/Lib/test/test_embed.py
+++ b/Lib/test/test_embed.py
@@ -241,21 +241,7 @@ def test_repeated_init_and_inittab(self):
 
     def test_create_module_from_initfunc(self):
         out, err = self.run_embedded_interpreter("test_create_module_from_initfunc")
-        if support.Py_GIL_DISABLED:
-            # the test imports a singlephase init extension, so it emits a warning
-            # under the free-threaded build
-            expected_runtime_warning = (
-                "RuntimeWarning: The global interpreter lock (GIL)"
-                " has been enabled to load module 'embedded_ext'"
-            )
-            filtered_err_lines = [
-                line
-                for line in err.strip().splitlines()
-                if expected_runtime_warning not in line
-            ]
-            self.assertEqual(filtered_err_lines, [])
-        else:
-            self.assertEqual(err, "")
+        self.assertEqual(self._nogil_filtered_err(err, "embedded_ext"), "")
         self.assertEqual(out,
                          "<module 'my_test_extension' (static-extension)>\n"
                          "my_test_extension.executed='yes'\n"
@@ -264,6 +250,26 @@ def test_create_module_from_initfunc(self):
                          "embedded_ext.executed='yes'\n"
                          )
 
+    def test_inittab_submodule_multiphase(self):
+        out, err = self.run_embedded_interpreter("test_inittab_submodule_multiphase")
+        self.assertEqual(err, "")
+        self.assertEqual(out,
+                         "<module 'mp_pkg.mp_submod' (built-in)>\n"
+                         "<module 'mp_pkg.mp_submod' (built-in)>\n"
+                         "Hello from sub-module\n"
+                         "mp_pkg.mp_submod.mp_submod_exec_slot_ran='yes'\n"
+                         "mp_pkg.mp_pkg_exec_slot_ran='yes'\n"
+                         )
+
+    def test_inittab_submodule_singlephase(self):
+        out, err = self.run_embedded_interpreter("test_inittab_submodule_singlephase")
+        self.assertEqual(self._nogil_filtered_err(err, "sp_pkg"), "")
+        self.assertEqual(out,
+                         "<module 'sp_pkg.sp_submod' (built-in)>\n"
+                         "<module 'sp_pkg.sp_submod' (built-in)>\n"
+                         "Hello from sub-module\n"
+                         )
+
     def test_forced_io_encoding(self):
         # Checks forced configuration of embedded interpreter IO streams
         env = dict(os.environ, PYTHONIOENCODING="utf-8:surrogateescape")
@@ -541,6 +547,24 @@ def test_getargs_reset_static_parser(self):
         out, err = self.run_embedded_interpreter("test_repeated_init_exec", code)
         self.assertEqual(out, '1\n2\n3\n' * INIT_LOOPS)
 
+    @staticmethod
+    def _nogil_filtered_err(err: str, mod_name: str) -> str:
+        if not support.Py_GIL_DISABLED:
+            return err
+
+        # the test imports a singlephase init extension, so it emits a warning
+        # under the free-threaded build
+        expected_runtime_warning = (
+            "RuntimeWarning: The global interpreter lock (GIL)"
+            f" has been enabled to load module '{mod_name}'"
+        )
+        filtered_err_lines = [
+            line
+            for line in err.strip().splitlines()
+            if expected_runtime_warning not in line
+        ]
+        return "\n".join(filtered_err_lines)
+
 
 def config_dev_mode(preconfig, config):
     preconfig['allocator'] = PYMEM_ALLOCATOR_DEBUG
diff --git a/Lib/test/test_exception_group.py b/Lib/test/test_exception_group.py
index 5df2c41c6b5..ace7ec72917 100644
--- a/Lib/test/test_exception_group.py
+++ b/Lib/test/test_exception_group.py
@@ -1,4 +1,4 @@
-import collections.abc
+import collections
 import types
 import unittest
 from test.support import skip_emscripten_stack_overflow, skip_wasi_stack_overflow, exceeds_recursion_limit
@@ -193,6 +193,77 @@ class MyEG(ExceptionGroup):
                       "MyEG('flat', [ValueError(1), TypeError(2)]), "
                       "TypeError(2)])"))
 
+    def test_exceptions_mutation(self):
+        class MyEG(ExceptionGroup):
+            pass
+
+        excs = [ValueError(1), TypeError(2)]
+        eg = MyEG('test', excs)
+
+        self.assertEqual(repr(eg), "MyEG('test', [ValueError(1), TypeError(2)])")
+        excs.clear()
+
+        # Ensure that clearing the exceptions sequence doesn't change the repr.
+        self.assertEqual(repr(eg), "MyEG('test', [ValueError(1), TypeError(2)])")
+
+        # Ensure that the args are still as passed.
+        self.assertEqual(eg.args, ('test', []))
+
+        excs = (ValueError(1), KeyboardInterrupt(2))
+        eg = BaseExceptionGroup('test', excs)
+
+        # Ensure that immutable sequences still work fine.
+        self.assertEqual(
+            repr(eg),
+            "BaseExceptionGroup('test', (ValueError(1), KeyboardInterrupt(2)))"
+        )
+
+        # Test non-standard custom sequences.
+        excs = collections.deque([ValueError(1), TypeError(2)])
+        eg = ExceptionGroup('test', excs)
+
+        self.assertEqual(
+            repr(eg),
+            "ExceptionGroup('test', deque([ValueError(1), TypeError(2)]))"
+        )
+        excs.clear()
+
+        # Ensure that clearing the exceptions sequence doesn't change the repr.
+        self.assertEqual(
+            repr(eg),
+            "ExceptionGroup('test', deque([ValueError(1), TypeError(2)]))"
+        )
+
+    def test_repr_raises(self):
+        class MySeq(collections.abc.Sequence):
+            def __init__(self, raises):
+                self.raises = raises
+
+            def __len__(self):
+                return 1
+
+            def __getitem__(self, index):
+                if index == 0:
+                    return ValueError(1)
+                raise IndexError
+
+            def __repr__(self):
+                if self.raises:
+                    raise self.raises
+                return None
+
+        seq = MySeq(None)
+        with self.assertRaisesRegex(
+            TypeError,
+            r".*MySeq\.__repr__\(\) must return a str, not NoneType"
+        ):
+            ExceptionGroup("test", seq)
+
+        seq = MySeq(ValueError)
+        with self.assertRaises(ValueError):
+            BaseExceptionGroup("test", seq)
+
+
 
 def create_simple_eg():
     excs = []
diff --git a/Lib/test/test_external_inspection.py b/Lib/test/test_external_inspection.py
index 7decd8f32d5..a97242483a8 100644
--- a/Lib/test/test_external_inspection.py
+++ b/Lib/test/test_external_inspection.py
@@ -1,11 +1,13 @@
 import unittest
 import os
 import textwrap
+import contextlib
 import importlib
 import sys
 import socket
 import threading
 import time
+from contextlib import contextmanager
 from asyncio import staggered, taskgroups, base_events, tasks
 from unittest.mock import ANY
 from test.support import (
@@ -26,9 +28,12 @@
 PROFILING_MODE_ALL = 3
 
 # Thread status flags
-THREAD_STATUS_HAS_GIL = (1 << 0)
-THREAD_STATUS_ON_CPU = (1 << 1)
-THREAD_STATUS_UNKNOWN = (1 << 2)
+THREAD_STATUS_HAS_GIL = 1 << 0
+THREAD_STATUS_ON_CPU = 1 << 1
+THREAD_STATUS_UNKNOWN = 1 << 2
+
+# Maximum number of retry attempts for operations that may fail transiently
+MAX_TRIES = 10
 
 try:
     from concurrent import interpreters
@@ -47,12 +52,149 @@
     )
 
 
+# ============================================================================
+# Module-level helper functions
+# ============================================================================
+
+
 def _make_test_script(script_dir, script_basename, source):
     to_return = make_script(script_dir, script_basename, source)
     importlib.invalidate_caches()
     return to_return
 
 
+def _create_server_socket(port, backlog=1):
+    """Create and configure a server socket for test communication."""
+    server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+    server_socket.bind(("localhost", port))
+    server_socket.settimeout(SHORT_TIMEOUT)
+    server_socket.listen(backlog)
+    return server_socket
+
+
+def _wait_for_signal(sock, expected_signals, timeout=SHORT_TIMEOUT):
+    """
+    Wait for expected signal(s) from a socket with proper timeout and EOF handling.
+
+    Args:
+        sock: Connected socket to read from
+        expected_signals: Single bytes object or list of bytes objects to wait for
+        timeout: Socket timeout in seconds
+
+    Returns:
+        bytes: Complete accumulated response buffer
+
+    Raises:
+        RuntimeError: If connection closed before signal received or timeout
+    """
+    if isinstance(expected_signals, bytes):
+        expected_signals = [expected_signals]
+
+    sock.settimeout(timeout)
+    buffer = b""
+
+    while True:
+        # Check if all expected signals are in buffer
+        if all(sig in buffer for sig in expected_signals):
+            return buffer
+
+        try:
+            chunk = sock.recv(4096)
+            if not chunk:
+                # EOF - connection closed
+                raise RuntimeError(
+                    f"Connection closed before receiving expected signals. "
+                    f"Expected: {expected_signals}, Got: {buffer[-200:]!r}"
+                )
+            buffer += chunk
+        except socket.timeout:
+            raise RuntimeError(
+                f"Timeout waiting for signals. "
+                f"Expected: {expected_signals}, Got: {buffer[-200:]!r}"
+            )
+
+
+def _wait_for_n_signals(sock, signal_pattern, count, timeout=SHORT_TIMEOUT):
+    """
+    Wait for N occurrences of a signal pattern.
+
+    Args:
+        sock: Connected socket to read from
+        signal_pattern: bytes pattern to count (e.g., b"ready")
+        count: Number of occurrences expected
+        timeout: Socket timeout in seconds
+
+    Returns:
+        bytes: Complete accumulated response buffer
+
+    Raises:
+        RuntimeError: If connection closed or timeout before receiving all signals
+    """
+    sock.settimeout(timeout)
+    buffer = b""
+    found_count = 0
+
+    while found_count < count:
+        try:
+            chunk = sock.recv(4096)
+            if not chunk:
+                raise RuntimeError(
+                    f"Connection closed after {found_count}/{count} signals. "
+                    f"Last 200 bytes: {buffer[-200:]!r}"
+                )
+            buffer += chunk
+            # Count occurrences in entire buffer
+            found_count = buffer.count(signal_pattern)
+        except socket.timeout:
+            raise RuntimeError(
+                f"Timeout waiting for {count} signals (found {found_count}). "
+                f"Last 200 bytes: {buffer[-200:]!r}"
+            )
+
+    return buffer
+
+
+@contextmanager
+def _managed_subprocess(args, timeout=SHORT_TIMEOUT):
+    """
+    Context manager for subprocess lifecycle management.
+
+    Ensures process is properly terminated and cleaned up even on exceptions.
+    Uses graceful termination first, then forceful kill if needed.
+    """
+    p = subprocess.Popen(args)
+    try:
+        yield p
+    finally:
+        try:
+            p.terminate()
+            try:
+                p.wait(timeout=timeout)
+            except subprocess.TimeoutExpired:
+                p.kill()
+                try:
+                    p.wait(timeout=timeout)
+                except subprocess.TimeoutExpired:
+                    pass  # Process refuses to die, nothing more we can do
+        except OSError:
+            pass  # Process already dead
+
+
+def _cleanup_sockets(*sockets):
+    """Safely close multiple sockets, ignoring errors."""
+    for sock in sockets:
+        if sock is not None:
+            try:
+                sock.close()
+            except OSError:
+                pass
+
+
+# ============================================================================
+# Decorators and skip conditions
+# ============================================================================
+
 skip_if_not_supported = unittest.skipIf(
     (
         sys.platform != "darwin"
@@ -65,40 +207,196 @@ def _make_test_script(script_dir, script_basename, source):
 
 def requires_subinterpreters(meth):
     """Decorator to skip a test if subinterpreters are not supported."""
-    return unittest.skipIf(interpreters is None,
-                           'subinterpreters required')(meth)
+    return unittest.skipIf(interpreters is None, "subinterpreters required")(
+        meth
+    )
 
 
+# ============================================================================
+# Simple wrapper functions for RemoteUnwinder
+# ============================================================================
+
 def get_stack_trace(pid):
-    unwinder = RemoteUnwinder(pid, all_threads=True, debug=True)
-    return unwinder.get_stack_trace()
+    for _ in busy_retry(SHORT_TIMEOUT):
+        try:
+            unwinder = RemoteUnwinder(pid, all_threads=True, debug=True)
+            return unwinder.get_stack_trace()
+        except RuntimeError as e:
+            continue
+    raise RuntimeError("Failed to get stack trace after retries")
 
 
 def get_async_stack_trace(pid):
-    unwinder = RemoteUnwinder(pid, debug=True)
-    return unwinder.get_async_stack_trace()
+    for _ in busy_retry(SHORT_TIMEOUT):
+        try:
+            unwinder = RemoteUnwinder(pid, debug=True)
+            return unwinder.get_async_stack_trace()
+        except RuntimeError as e:
+            continue
+    raise RuntimeError("Failed to get async stack trace after retries")
 
 
 def get_all_awaited_by(pid):
-    unwinder = RemoteUnwinder(pid, debug=True)
-    return unwinder.get_all_awaited_by()
+    for _ in busy_retry(SHORT_TIMEOUT):
+        try:
+            unwinder = RemoteUnwinder(pid, debug=True)
+            return unwinder.get_all_awaited_by()
+        except RuntimeError as e:
+            continue
+    raise RuntimeError("Failed to get all awaited_by after retries")
 
 
-class TestGetStackTrace(unittest.TestCase):
+# ============================================================================
+# Base test class with shared infrastructure
+# ============================================================================
+
+
+class RemoteInspectionTestBase(unittest.TestCase):
+    """Base class for remote inspection tests with common helpers."""
+
     maxDiff = None
 
+    def _run_script_and_get_trace(
+        self,
+        script,
+        trace_func,
+        wait_for_signals=None,
+        port=None,
+        backlog=1,
+    ):
+        """
+        Common pattern: run a script, wait for signals, get trace.
+
+        Args:
+            script: Script content (will be formatted with port if {port} present)
+            trace_func: Function to call with pid to get trace (e.g., get_stack_trace)
+            wait_for_signals: Signal(s) to wait for before getting trace
+            port: Port to use (auto-selected if None)
+            backlog: Socket listen backlog
+
+        Returns:
+            tuple: (trace_result, script_name)
+        """
+        if port is None:
+            port = find_unused_port()
+
+        # Format script with port if needed
+        if "{port}" in script or "{{port}}" in script:
+            script = script.replace("{{port}}", "{port}").format(port=port)
+
+        with os_helper.temp_dir() as work_dir:
+            script_dir = os.path.join(work_dir, "script_pkg")
+            os.mkdir(script_dir)
+
+            server_socket = _create_server_socket(port, backlog)
+            script_name = _make_test_script(script_dir, "script", script)
+            client_socket = None
+
+            try:
+                with _managed_subprocess([sys.executable, script_name]) as p:
+                    client_socket, _ = server_socket.accept()
+                    server_socket.close()
+                    server_socket = None
+
+                    if wait_for_signals:
+                        _wait_for_signal(client_socket, wait_for_signals)
+
+                    try:
+                        trace = trace_func(p.pid)
+                    except PermissionError:
+                        self.skipTest(
+                            "Insufficient permissions to read the stack trace"
+                        )
+                    return trace, script_name
+            finally:
+                _cleanup_sockets(client_socket, server_socket)
+
+    def _find_frame_in_trace(self, stack_trace, predicate):
+        """
+        Find a frame matching predicate in stack trace.
+
+        Args:
+            stack_trace: List of InterpreterInfo objects
+            predicate: Function(frame) -> bool
+
+        Returns:
+            FrameInfo or None
+        """
+        for interpreter_info in stack_trace:
+            for thread_info in interpreter_info.threads:
+                for frame in thread_info.frame_info:
+                    if predicate(frame):
+                        return frame
+        return None
+
+    def _find_thread_by_id(self, stack_trace, thread_id):
+        """Find a thread by its native thread ID."""
+        for interpreter_info in stack_trace:
+            for thread_info in interpreter_info.threads:
+                if thread_info.thread_id == thread_id:
+                    return thread_info
+        return None
+
+    def _find_thread_with_frame(self, stack_trace, frame_predicate):
+        """Find a thread containing a frame matching predicate."""
+        for interpreter_info in stack_trace:
+            for thread_info in interpreter_info.threads:
+                for frame in thread_info.frame_info:
+                    if frame_predicate(frame):
+                        return thread_info
+        return None
+
+    def _get_thread_statuses(self, stack_trace):
+        """Extract thread_id -> status mapping from stack trace."""
+        statuses = {}
+        for interpreter_info in stack_trace:
+            for thread_info in interpreter_info.threads:
+                statuses[thread_info.thread_id] = thread_info.status
+        return statuses
+
+    def _get_task_id_map(self, stack_trace):
+        """Create task_id -> task mapping from async stack trace."""
+        return {task.task_id: task for task in stack_trace[0].awaited_by}
+
+    def _get_awaited_by_relationships(self, stack_trace):
+        """Extract task name to awaited_by set mapping."""
+        id_to_task = self._get_task_id_map(stack_trace)
+        return {
+            task.task_name: set(
+                id_to_task[awaited.task_name].task_name
+                for awaited in task.awaited_by
+            )
+            for task in stack_trace[0].awaited_by
+        }
+
+    def _extract_coroutine_stacks(self, stack_trace):
+        """Extract and format coroutine stacks from tasks."""
+        return {
+            task.task_name: sorted(
+                tuple(tuple(frame) for frame in coro.call_stack)
+                for coro in task.coroutine_stack
+            )
+            for task in stack_trace[0].awaited_by
+        }
+
+
+# ============================================================================
+# Test classes
+# ============================================================================
+
+
+class TestGetStackTrace(RemoteInspectionTestBase):
     @skip_if_not_supported
     @unittest.skipIf(
         sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
         "Test only runs on Linux with process_vm_readv support",
     )
     def test_remote_stack_trace(self):
-        # Spawn a process with some realistic Python code
         port = find_unused_port()
         script = textwrap.dedent(
             f"""\
             import time, sys, socket, threading
-            # Connect to the test process
+
             sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
             sock.connect(('localhost', {port}))
 
@@ -111,80 +409,78 @@ def baz():
                 foo()
 
             def foo():
-                sock.sendall(b"ready:thread\\n"); time.sleep(10_000)  # same line number
+                sock.sendall(b"ready:thread\\n"); time.sleep(10_000)
 
             t = threading.Thread(target=bar)
             t.start()
-            sock.sendall(b"ready:main\\n"); t.join()  # same line number
+            sock.sendall(b"ready:main\\n"); t.join()
             """
         )
-        stack_trace = None
+
         with os_helper.temp_dir() as work_dir:
             script_dir = os.path.join(work_dir, "script_pkg")
             os.mkdir(script_dir)
 
-            # Create a socket server to communicate with the target process
-            server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-            server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-            server_socket.bind(("localhost", port))
-            server_socket.settimeout(SHORT_TIMEOUT)
-            server_socket.listen(1)
-
+            server_socket = _create_server_socket(port)
             script_name = _make_test_script(script_dir, "script", script)
             client_socket = None
+
             try:
-                p = subprocess.Popen([sys.executable, script_name])
-                client_socket, _ = server_socket.accept()
-                server_socket.close()
-                response = b""
-                while (
-                    b"ready:main" not in response
-                    or b"ready:thread" not in response
-                ):
-                    response += client_socket.recv(1024)
-                stack_trace = get_stack_trace(p.pid)
-            except PermissionError:
-                self.skipTest(
-                    "Insufficient permissions to read the stack trace"
-                )
+                with _managed_subprocess([sys.executable, script_name]) as p:
+                    client_socket, _ = server_socket.accept()
+                    server_socket.close()
+                    server_socket = None
+
+                    _wait_for_signal(
+                        client_socket, [b"ready:main", b"ready:thread"]
+                    )
+
+                    try:
+                        stack_trace = get_stack_trace(p.pid)
+                    except PermissionError:
+                        self.skipTest(
+                            "Insufficient permissions to read the stack trace"
+                        )
+
+                    thread_expected_stack_trace = [
+                        FrameInfo([script_name, 15, "foo"]),
+                        FrameInfo([script_name, 12, "baz"]),
+                        FrameInfo([script_name, 9, "bar"]),
+                        FrameInfo([threading.__file__, ANY, "Thread.run"]),
+                        FrameInfo(
+                            [
+                                threading.__file__,
+                                ANY,
+                                "Thread._bootstrap_inner",
+                            ]
+                        ),
+                        FrameInfo(
+                            [threading.__file__, ANY, "Thread._bootstrap"]
+                        ),
+                    ]
+
+                    # Find expected thread stack
+                    found_thread = self._find_thread_with_frame(
+                        stack_trace,
+                        lambda f: f.funcname == "foo" and f.lineno == 15,
+                    )
+                    self.assertIsNotNone(
+                        found_thread, "Expected thread stack trace not found"
+                    )
+                    self.assertEqual(
+                        found_thread.frame_info, thread_expected_stack_trace
+                    )
+
+                    # Check main thread
+                    main_frame = FrameInfo([script_name, 19, "<module>"])
+                    found_main = self._find_frame_in_trace(
+                        stack_trace, lambda f: f == main_frame
+                    )
+                    self.assertIsNotNone(
+                        found_main, "Main thread stack trace not found"
+                    )
             finally:
-                if client_socket is not None:
-                    client_socket.close()
-                p.kill()
-                p.terminate()
-                p.wait(timeout=SHORT_TIMEOUT)
-
-            thread_expected_stack_trace = [
-                FrameInfo([script_name, 15, "foo"]),
-                FrameInfo([script_name, 12, "baz"]),
-                FrameInfo([script_name, 9, "bar"]),
-                FrameInfo([threading.__file__, ANY, "Thread.run"]),
-                FrameInfo([threading.__file__, ANY, "Thread._bootstrap_inner"]),
-                FrameInfo([threading.__file__, ANY, "Thread._bootstrap"]),
-            ]
-            # Is possible that there are more threads, so we check that the
-            # expected stack traces are in the result (looking at you Windows!)
-            found_expected_stack = False
-            for interpreter_info in stack_trace:
-                for thread_info in interpreter_info.threads:
-                    if thread_info.frame_info == thread_expected_stack_trace:
-                        found_expected_stack = True
-                        break
-                if found_expected_stack:
-                    break
-            self.assertTrue(found_expected_stack, "Expected thread stack trace not found")
-
-            # Check that the main thread stack trace is in the result
-            frame = FrameInfo([script_name, 19, "<module>"])
-            main_thread_found = False
-            for interpreter_info in stack_trace:
-                for thread_info in interpreter_info.threads:
-                    if frame in thread_info.frame_info:
-                        main_thread_found = True
-                        break
-                if main_thread_found:
-                    break
-            self.assertTrue(main_thread_found, "Main thread stack trace not found in result")
+                _cleanup_sockets(client_socket, server_socket)
 
     @skip_if_not_supported
     @unittest.skipIf(
@@ -192,7 +488,6 @@ def foo():
         "Test only runs on Linux with process_vm_readv support",
     )
     def test_async_remote_stack_trace(self):
-        # Spawn a process with some realistic Python code
         port = find_unused_port()
         script = textwrap.dedent(
             f"""\
@@ -200,12 +495,12 @@ def test_async_remote_stack_trace(self):
             import time
             import sys
             import socket
-            # Connect to the test process
+
             sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
             sock.connect(('localhost', {port}))
 
             def c5():
-                sock.sendall(b"ready"); time.sleep(10_000)  # same line number
+                sock.sendall(b"ready"); time.sleep(10_000)
 
             async def c4():
                 await asyncio.sleep(0)
@@ -236,7 +531,7 @@ def new_eager_loop():
             asyncio.run(main(), loop_factory={{TASK_FACTORY}})
             """
         )
-        stack_trace = None
+
         for task_factory_variant in "asyncio.new_event_loop", "new_eager_loop":
             with (
                 self.subTest(task_factory_variant=task_factory_variant),
@@ -244,195 +539,203 @@ def new_eager_loop():
             ):
                 script_dir = os.path.join(work_dir, "script_pkg")
                 os.mkdir(script_dir)
-                server_socket = socket.socket(
-                    socket.AF_INET, socket.SOCK_STREAM
-                )
-                server_socket.setsockopt(
-                    socket.SOL_SOCKET, socket.SO_REUSEADDR, 1
-                )
-                server_socket.bind(("localhost", port))
-                server_socket.settimeout(SHORT_TIMEOUT)
-                server_socket.listen(1)
+
+                server_socket = _create_server_socket(port)
                 script_name = _make_test_script(
                     script_dir,
                     "script",
                     script.format(TASK_FACTORY=task_factory_variant),
                 )
                 client_socket = None
+
                 try:
-                    p = subprocess.Popen([sys.executable, script_name])
-                    client_socket, _ = server_socket.accept()
-                    server_socket.close()
-                    response = client_socket.recv(1024)
-                    self.assertEqual(response, b"ready")
-                    stack_trace = get_async_stack_trace(p.pid)
-                except PermissionError:
-                    self.skipTest(
-                        "Insufficient permissions to read the stack trace"
-                    )
-                finally:
-                    if client_socket is not None:
-                        client_socket.close()
-                    p.kill()
-                    p.terminate()
-                    p.wait(timeout=SHORT_TIMEOUT)
+                    with _managed_subprocess(
+                        [sys.executable, script_name]
+                    ) as p:
+                        client_socket, _ = server_socket.accept()
+                        server_socket.close()
+                        server_socket = None
 
-                # First check all the tasks are present
-                tasks_names = [
-                    task.task_name for task in stack_trace[0].awaited_by
-                ]
-                for task_name in ["c2_root", "sub_main_1", "sub_main_2"]:
-                    self.assertIn(task_name, tasks_names)
+                        response = _wait_for_signal(client_socket, b"ready")
+                        self.assertIn(b"ready", response)
 
-                # Now ensure that the awaited_by_relationships are correct
-                id_to_task = {
-                    task.task_id: task for task in stack_trace[0].awaited_by
-                }
-                task_name_to_awaited_by = {
-                    task.task_name: set(
-                        id_to_task[awaited.task_name].task_name
-                        for awaited in task.awaited_by
-                    )
-                    for task in stack_trace[0].awaited_by
-                }
-                self.assertEqual(
-                    task_name_to_awaited_by,
-                    {
-                        "c2_root": {"Task-1", "sub_main_1", "sub_main_2"},
-                        "Task-1": set(),
-                        "sub_main_1": {"Task-1"},
-                        "sub_main_2": {"Task-1"},
-                    },
-                )
-
-                # Now ensure that the coroutine stacks are correct
-                coroutine_stacks = {
-                    task.task_name: sorted(
-                        tuple(tuple(frame) for frame in coro.call_stack)
-                        for coro in task.coroutine_stack
-                    )
-                    for task in stack_trace[0].awaited_by
-                }
-                self.assertEqual(
-                    coroutine_stacks,
-                    {
-                        "Task-1": [
-                            (
-                                tuple(
-                                    [
-                                        taskgroups.__file__,
-                                        ANY,
-                                        "TaskGroup._aexit",
-                                    ]
-                                ),
-                                tuple(
-                                    [
-                                        taskgroups.__file__,
-                                        ANY,
-                                        "TaskGroup.__aexit__",
-                                    ]
-                                ),
-                                tuple([script_name, 26, "main"]),
+                        try:
+                            stack_trace = get_async_stack_trace(p.pid)
+                        except PermissionError:
+                            self.skipTest(
+                                "Insufficient permissions to read the stack trace"
                             )
-                        ],
-                        "c2_root": [
-                            (
-                                tuple([script_name, 10, "c5"]),
-                                tuple([script_name, 14, "c4"]),
-                                tuple([script_name, 17, "c3"]),
-                                tuple([script_name, 20, "c2"]),
-                            )
-                        ],
-                        "sub_main_1": [(tuple([script_name, 23, "c1"]),)],
-                        "sub_main_2": [(tuple([script_name, 23, "c1"]),)],
-                    },
-                )
 
-                # Now ensure the coroutine stacks for the awaited_by relationships are correct.
-                awaited_by_coroutine_stacks = {
-                    task.task_name: sorted(
-                        (
-                            id_to_task[coro.task_name].task_name,
-                            tuple(tuple(frame) for frame in coro.call_stack),
+                        # Check all tasks are present
+                        tasks_names = [
+                            task.task_name
+                            for task in stack_trace[0].awaited_by
+                        ]
+                        for task_name in [
+                            "c2_root",
+                            "sub_main_1",
+                            "sub_main_2",
+                        ]:
+                            self.assertIn(task_name, tasks_names)
+
+                        # Check awaited_by relationships
+                        relationships = self._get_awaited_by_relationships(
+                            stack_trace
                         )
-                        for coro in task.awaited_by
-                    )
-                    for task in stack_trace[0].awaited_by
-                }
-                self.assertEqual(
-                    awaited_by_coroutine_stacks,
-                    {
-                        "Task-1": [],
-                        "c2_root": [
-                            (
-                                "Task-1",
+                        self.assertEqual(
+                            relationships,
+                            {
+                                "c2_root": {
+                                    "Task-1",
+                                    "sub_main_1",
+                                    "sub_main_2",
+                                },
+                                "Task-1": set(),
+                                "sub_main_1": {"Task-1"},
+                                "sub_main_2": {"Task-1"},
+                            },
+                        )
+
+                        # Check coroutine stacks
+                        coroutine_stacks = self._extract_coroutine_stacks(
+                            stack_trace
+                        )
+                        self.assertEqual(
+                            coroutine_stacks,
+                            {
+                                "Task-1": [
+                                    (
+                                        tuple(
+                                            [
+                                                taskgroups.__file__,
+                                                ANY,
+                                                "TaskGroup._aexit",
+                                            ]
+                                        ),
+                                        tuple(
+                                            [
+                                                taskgroups.__file__,
+                                                ANY,
+                                                "TaskGroup.__aexit__",
+                                            ]
+                                        ),
+                                        tuple([script_name, 26, "main"]),
+                                    )
+                                ],
+                                "c2_root": [
+                                    (
+                                        tuple([script_name, 10, "c5"]),
+                                        tuple([script_name, 14, "c4"]),
+                                        tuple([script_name, 17, "c3"]),
+                                        tuple([script_name, 20, "c2"]),
+                                    )
+                                ],
+                                "sub_main_1": [
+                                    (tuple([script_name, 23, "c1"]),)
+                                ],
+                                "sub_main_2": [
+                                    (tuple([script_name, 23, "c1"]),)
+                                ],
+                            },
+                        )
+
+                        # Check awaited_by coroutine stacks
+                        id_to_task = self._get_task_id_map(stack_trace)
+                        awaited_by_coroutine_stacks = {
+                            task.task_name: sorted(
                                 (
+                                    id_to_task[coro.task_name].task_name,
                                     tuple(
-                                        [
-                                            taskgroups.__file__,
-                                            ANY,
-                                            "TaskGroup._aexit",
-                                        ]
+                                        tuple(frame)
+                                        for frame in coro.call_stack
                                     ),
-                                    tuple(
-                                        [
-                                            taskgroups.__file__,
-                                            ANY,
-                                            "TaskGroup.__aexit__",
-                                        ]
-                                    ),
-                                    tuple([script_name, 26, "main"]),
-                                ),
-                            ),
-                            ("sub_main_1", (tuple([script_name, 23, "c1"]),)),
-                            ("sub_main_2", (tuple([script_name, 23, "c1"]),)),
-                        ],
-                        "sub_main_1": [
-                            (
-                                "Task-1",
-                                (
-                                    tuple(
-                                        [
-                                            taskgroups.__file__,
-                                            ANY,
-                                            "TaskGroup._aexit",
-                                        ]
-                                    ),
-                                    tuple(
-                                        [
-                                            taskgroups.__file__,
-                                            ANY,
-                                            "TaskGroup.__aexit__",
-                                        ]
-                                    ),
-                                    tuple([script_name, 26, "main"]),
-                                ),
+                                )
+                                for coro in task.awaited_by
                             )
-                        ],
-                        "sub_main_2": [
-                            (
-                                "Task-1",
-                                (
-                                    tuple(
-                                        [
-                                            taskgroups.__file__,
-                                            ANY,
-                                            "TaskGroup._aexit",
-                                        ]
+                            for task in stack_trace[0].awaited_by
+                        }
+                        self.assertEqual(
+                            awaited_by_coroutine_stacks,
+                            {
+                                "Task-1": [],
+                                "c2_root": [
+                                    (
+                                        "Task-1",
+                                        (
+                                            tuple(
+                                                [
+                                                    taskgroups.__file__,
+                                                    ANY,
+                                                    "TaskGroup._aexit",
+                                                ]
+                                            ),
+                                            tuple(
+                                                [
+                                                    taskgroups.__file__,
+                                                    ANY,
+                                                    "TaskGroup.__aexit__",
+                                                ]
+                                            ),
+                                            tuple([script_name, 26, "main"]),
+                                        ),
                                     ),
-                                    tuple(
-                                        [
-                                            taskgroups.__file__,
-                                            ANY,
-                                            "TaskGroup.__aexit__",
-                                        ]
+                                    (
+                                        "sub_main_1",
+                                        (tuple([script_name, 23, "c1"]),),
                                     ),
-                                    tuple([script_name, 26, "main"]),
-                                ),
-                            )
-                        ],
-                    },
-                )
+                                    (
+                                        "sub_main_2",
+                                        (tuple([script_name, 23, "c1"]),),
+                                    ),
+                                ],
+                                "sub_main_1": [
+                                    (
+                                        "Task-1",
+                                        (
+                                            tuple(
+                                                [
+                                                    taskgroups.__file__,
+                                                    ANY,
+                                                    "TaskGroup._aexit",
+                                                ]
+                                            ),
+                                            tuple(
+                                                [
+                                                    taskgroups.__file__,
+                                                    ANY,
+                                                    "TaskGroup.__aexit__",
+                                                ]
+                                            ),
+                                            tuple([script_name, 26, "main"]),
+                                        ),
+                                    )
+                                ],
+                                "sub_main_2": [
+                                    (
+                                        "Task-1",
+                                        (
+                                            tuple(
+                                                [
+                                                    taskgroups.__file__,
+                                                    ANY,
+                                                    "TaskGroup._aexit",
+                                                ]
+                                            ),
+                                            tuple(
+                                                [
+                                                    taskgroups.__file__,
+                                                    ANY,
+                                                    "TaskGroup.__aexit__",
+                                                ]
+                                            ),
+                                            tuple([script_name, 26, "main"]),
+                                        ),
+                                    )
+                                ],
+                            },
+                        )
+                finally:
+                    _cleanup_sockets(client_socket, server_socket)
 
     @skip_if_not_supported
     @unittest.skipIf(
@@ -440,7 +743,6 @@ def new_eager_loop():
         "Test only runs on Linux with process_vm_readv support",
     )
     def test_asyncgen_remote_stack_trace(self):
-        # Spawn a process with some realistic Python code
         port = find_unused_port()
         script = textwrap.dedent(
             f"""\
@@ -448,12 +750,12 @@ def test_asyncgen_remote_stack_trace(self):
             import time
             import sys
             import socket
-            # Connect to the test process
+
             sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
             sock.connect(('localhost', {port}))
 
             async def gen_nested_call():
-                sock.sendall(b"ready"); time.sleep(10_000)  # same line number
+                sock.sendall(b"ready"); time.sleep(10_000)
 
             async def gen():
                 for num in range(2):
@@ -468,59 +770,56 @@ async def main():
             asyncio.run(main())
             """
         )
-        stack_trace = None
+
         with os_helper.temp_dir() as work_dir:
             script_dir = os.path.join(work_dir, "script_pkg")
             os.mkdir(script_dir)
-            # Create a socket server to communicate with the target process
-            server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-            server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-            server_socket.bind(("localhost", port))
-            server_socket.settimeout(SHORT_TIMEOUT)
-            server_socket.listen(1)
+
+            server_socket = _create_server_socket(port)
             script_name = _make_test_script(script_dir, "script", script)
             client_socket = None
+
             try:
-                p = subprocess.Popen([sys.executable, script_name])
-                client_socket, _ = server_socket.accept()
-                server_socket.close()
-                response = client_socket.recv(1024)
-                self.assertEqual(response, b"ready")
-                stack_trace = get_async_stack_trace(p.pid)
-            except PermissionError:
-                self.skipTest(
-                    "Insufficient permissions to read the stack trace"
-                )
-            finally:
-                if client_socket is not None:
-                    client_socket.close()
-                p.kill()
-                p.terminate()
-                p.wait(timeout=SHORT_TIMEOUT)
+                with _managed_subprocess([sys.executable, script_name]) as p:
+                    client_socket, _ = server_socket.accept()
+                    server_socket.close()
+                    server_socket = None
 
-            # For this simple asyncgen test, we only expect one task with the full coroutine stack
-            self.assertEqual(len(stack_trace[0].awaited_by), 1)
-            task = stack_trace[0].awaited_by[0]
-            self.assertEqual(task.task_name, "Task-1")
+                    response = _wait_for_signal(client_socket, b"ready")
+                    self.assertIn(b"ready", response)
 
-            # Check the coroutine stack - based on actual output, only shows main
-            coroutine_stack = sorted(
-                tuple(tuple(frame) for frame in coro.call_stack)
-                for coro in task.coroutine_stack
-            )
-            self.assertEqual(
-                coroutine_stack,
-                [
-                    (
-                        tuple([script_name, 10, "gen_nested_call"]),
-                        tuple([script_name, 16, "gen"]),
-                        tuple([script_name, 19, "main"]),
+                    try:
+                        stack_trace = get_async_stack_trace(p.pid)
+                    except PermissionError:
+                        self.skipTest(
+                            "Insufficient permissions to read the stack trace"
+                        )
+
+                    # For this simple asyncgen test, we only expect one task
+                    self.assertEqual(len(stack_trace[0].awaited_by), 1)
+                    task = stack_trace[0].awaited_by[0]
+                    self.assertEqual(task.task_name, "Task-1")
+
+                    # Check the coroutine stack
+                    coroutine_stack = sorted(
+                        tuple(tuple(frame) for frame in coro.call_stack)
+                        for coro in task.coroutine_stack
+                    )
+                    self.assertEqual(
+                        coroutine_stack,
+                        [
+                            (
+                                tuple([script_name, 10, "gen_nested_call"]),
+                                tuple([script_name, 16, "gen"]),
+                                tuple([script_name, 19, "main"]),
+                            )
+                        ],
                     )
-                ],
-            )
 
-            # No awaited_by relationships expected for this simple case
-            self.assertEqual(task.awaited_by, [])
+                    # No awaited_by relationships expected
+                    self.assertEqual(task.awaited_by, [])
+            finally:
+                _cleanup_sockets(client_socket, server_socket)
 
     @skip_if_not_supported
     @unittest.skipIf(
@@ -528,7 +827,6 @@ async def main():
         "Test only runs on Linux with process_vm_readv support",
     )
     def test_async_gather_remote_stack_trace(self):
-        # Spawn a process with some realistic Python code
         port = find_unused_port()
         script = textwrap.dedent(
             f"""\
@@ -536,13 +834,13 @@ def test_async_gather_remote_stack_trace(self):
             import time
             import sys
             import socket
-            # Connect to the test process
+
             sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
             sock.connect(('localhost', {port}))
 
             async def deep():
                 await asyncio.sleep(0)
-                sock.sendall(b"ready"); time.sleep(10_000)  # same line number
+                sock.sendall(b"ready"); time.sleep(10_000)
 
             async def c1():
                 await asyncio.sleep(0)
@@ -557,103 +855,92 @@ async def main():
             asyncio.run(main())
             """
         )
-        stack_trace = None
+
         with os_helper.temp_dir() as work_dir:
             script_dir = os.path.join(work_dir, "script_pkg")
             os.mkdir(script_dir)
-            # Create a socket server to communicate with the target process
-            server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-            server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-            server_socket.bind(("localhost", port))
-            server_socket.settimeout(SHORT_TIMEOUT)
-            server_socket.listen(1)
+
+            server_socket = _create_server_socket(port)
             script_name = _make_test_script(script_dir, "script", script)
             client_socket = None
+
             try:
-                p = subprocess.Popen([sys.executable, script_name])
-                client_socket, _ = server_socket.accept()
-                server_socket.close()
-                response = client_socket.recv(1024)
-                self.assertEqual(response, b"ready")
-                stack_trace = get_async_stack_trace(p.pid)
-            except PermissionError:
-                self.skipTest(
-                    "Insufficient permissions to read the stack trace"
-                )
-            finally:
-                if client_socket is not None:
-                    client_socket.close()
-                p.kill()
-                p.terminate()
-                p.wait(timeout=SHORT_TIMEOUT)
+                with _managed_subprocess([sys.executable, script_name]) as p:
+                    client_socket, _ = server_socket.accept()
+                    server_socket.close()
+                    server_socket = None
 
-            # First check all the tasks are present
-            tasks_names = [
-                task.task_name for task in stack_trace[0].awaited_by
-            ]
-            for task_name in ["Task-1", "Task-2"]:
-                self.assertIn(task_name, tasks_names)
+                    response = _wait_for_signal(client_socket, b"ready")
+                    self.assertIn(b"ready", response)
 
-            # Now ensure that the awaited_by_relationships are correct
-            id_to_task = {
-                task.task_id: task for task in stack_trace[0].awaited_by
-            }
-            task_name_to_awaited_by = {
-                task.task_name: set(
-                    id_to_task[awaited.task_name].task_name
-                    for awaited in task.awaited_by
-                )
-                for task in stack_trace[0].awaited_by
-            }
-            self.assertEqual(
-                task_name_to_awaited_by,
-                {
-                    "Task-1": set(),
-                    "Task-2": {"Task-1"},
-                },
-            )
-
-            # Now ensure that the coroutine stacks are correct
-            coroutine_stacks = {
-                task.task_name: sorted(
-                    tuple(tuple(frame) for frame in coro.call_stack)
-                    for coro in task.coroutine_stack
-                )
-                for task in stack_trace[0].awaited_by
-            }
-            self.assertEqual(
-                coroutine_stacks,
-                {
-                    "Task-1": [(tuple([script_name, 21, "main"]),)],
-                    "Task-2": [
-                        (
-                            tuple([script_name, 11, "deep"]),
-                            tuple([script_name, 15, "c1"]),
+                    try:
+                        stack_trace = get_async_stack_trace(p.pid)
+                    except PermissionError:
+                        self.skipTest(
+                            "Insufficient permissions to read the stack trace"
                         )
-                    ],
-                },
-            )
 
-            # Now ensure the coroutine stacks for the awaited_by relationships are correct.
-            awaited_by_coroutine_stacks = {
-                task.task_name: sorted(
-                    (
-                        id_to_task[coro.task_name].task_name,
-                        tuple(tuple(frame) for frame in coro.call_stack),
+                    # Check all tasks are present
+                    tasks_names = [
+                        task.task_name for task in stack_trace[0].awaited_by
+                    ]
+                    for task_name in ["Task-1", "Task-2"]:
+                        self.assertIn(task_name, tasks_names)
+
+                    # Check awaited_by relationships
+                    relationships = self._get_awaited_by_relationships(
+                        stack_trace
                     )
-                    for coro in task.awaited_by
-                )
-                for task in stack_trace[0].awaited_by
-            }
-            self.assertEqual(
-                awaited_by_coroutine_stacks,
-                {
-                    "Task-1": [],
-                    "Task-2": [
-                        ("Task-1", (tuple([script_name, 21, "main"]),))
-                    ],
-                },
-            )
+                    self.assertEqual(
+                        relationships,
+                        {
+                            "Task-1": set(),
+                            "Task-2": {"Task-1"},
+                        },
+                    )
+
+                    # Check coroutine stacks
+                    coroutine_stacks = self._extract_coroutine_stacks(
+                        stack_trace
+                    )
+                    self.assertEqual(
+                        coroutine_stacks,
+                        {
+                            "Task-1": [(tuple([script_name, 21, "main"]),)],
+                            "Task-2": [
+                                (
+                                    tuple([script_name, 11, "deep"]),
+                                    tuple([script_name, 15, "c1"]),
+                                )
+                            ],
+                        },
+                    )
+
+                    # Check awaited_by coroutine stacks
+                    id_to_task = self._get_task_id_map(stack_trace)
+                    awaited_by_coroutine_stacks = {
+                        task.task_name: sorted(
+                            (
+                                id_to_task[coro.task_name].task_name,
+                                tuple(
+                                    tuple(frame) for frame in coro.call_stack
+                                ),
+                            )
+                            for coro in task.awaited_by
+                        )
+                        for task in stack_trace[0].awaited_by
+                    }
+                    self.assertEqual(
+                        awaited_by_coroutine_stacks,
+                        {
+                            "Task-1": [],
+                            "Task-2": [
+                                ("Task-1", (tuple([script_name, 21, "main"]),))
+                            ],
+                        },
+                    )
+            finally:
+                _cleanup_sockets(client_socket, server_socket)
 
     @skip_if_not_supported
     @unittest.skipIf(
@@ -661,7 +948,6 @@ async def main():
         "Test only runs on Linux with process_vm_readv support",
     )
     def test_async_staggered_race_remote_stack_trace(self):
-        # Spawn a process with some realistic Python code
         port = find_unused_port()
         script = textwrap.dedent(
             f"""\
@@ -669,13 +955,13 @@ def test_async_staggered_race_remote_stack_trace(self):
             import time
             import sys
             import socket
-            # Connect to the test process
+
             sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
             sock.connect(('localhost', {port}))
 
             async def deep():
                 await asyncio.sleep(0)
-                sock.sendall(b"ready"); time.sleep(10_000)  # same line number
+                sock.sendall(b"ready"); time.sleep(10_000)
 
             async def c1():
                 await asyncio.sleep(0)
@@ -693,123 +979,122 @@ async def main():
             asyncio.run(main())
             """
         )
-        stack_trace = None
+
         with os_helper.temp_dir() as work_dir:
             script_dir = os.path.join(work_dir, "script_pkg")
             os.mkdir(script_dir)
-            # Create a socket server to communicate with the target process
-            server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-            server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-            server_socket.bind(("localhost", port))
-            server_socket.settimeout(SHORT_TIMEOUT)
-            server_socket.listen(1)
+
+            server_socket = _create_server_socket(port)
             script_name = _make_test_script(script_dir, "script", script)
             client_socket = None
+
             try:
-                p = subprocess.Popen([sys.executable, script_name])
-                client_socket, _ = server_socket.accept()
-                server_socket.close()
-                response = client_socket.recv(1024)
-                self.assertEqual(response, b"ready")
-                stack_trace = get_async_stack_trace(p.pid)
-            except PermissionError:
-                self.skipTest(
-                    "Insufficient permissions to read the stack trace"
-                )
-            finally:
-                if client_socket is not None:
-                    client_socket.close()
-                p.kill()
-                p.terminate()
-                p.wait(timeout=SHORT_TIMEOUT)
+                with _managed_subprocess([sys.executable, script_name]) as p:
+                    client_socket, _ = server_socket.accept()
+                    server_socket.close()
+                    server_socket = None
 
-            # First check all the tasks are present
-            tasks_names = [
-                task.task_name for task in stack_trace[0].awaited_by
-            ]
-            for task_name in ["Task-1", "Task-2"]:
-                self.assertIn(task_name, tasks_names)
+                    response = _wait_for_signal(client_socket, b"ready")
+                    self.assertIn(b"ready", response)
 
-            # Now ensure that the awaited_by_relationships are correct
-            id_to_task = {
-                task.task_id: task for task in stack_trace[0].awaited_by
-            }
-            task_name_to_awaited_by = {
-                task.task_name: set(
-                    id_to_task[awaited.task_name].task_name
-                    for awaited in task.awaited_by
-                )
-                for task in stack_trace[0].awaited_by
-            }
-            self.assertEqual(
-                task_name_to_awaited_by,
-                {
-                    "Task-1": set(),
-                    "Task-2": {"Task-1"},
-                },
-            )
-
-            # Now ensure that the coroutine stacks are correct
-            coroutine_stacks = {
-                task.task_name: sorted(
-                    tuple(tuple(frame) for frame in coro.call_stack)
-                    for coro in task.coroutine_stack
-                )
-                for task in stack_trace[0].awaited_by
-            }
-            self.assertEqual(
-                coroutine_stacks,
-                {
-                    "Task-1": [
-                        (
-                            tuple([staggered.__file__, ANY, "staggered_race"]),
-                            tuple([script_name, 21, "main"]),
+                    try:
+                        stack_trace = get_async_stack_trace(p.pid)
+                    except PermissionError:
+                        self.skipTest(
+                            "Insufficient permissions to read the stack trace"
                         )
-                    ],
-                    "Task-2": [
-                        (
-                            tuple([script_name, 11, "deep"]),
-                            tuple([script_name, 15, "c1"]),
-                            tuple(
-                                [
-                                    staggered.__file__,
-                                    ANY,
-                                    "staggered_race.<locals>.run_one_coro",
-                                ]
-                            ),
-                        )
-                    ],
-                },
-            )
 
-            # Now ensure the coroutine stacks for the awaited_by relationships are correct.
-            awaited_by_coroutine_stacks = {
-                task.task_name: sorted(
-                    (
-                        id_to_task[coro.task_name].task_name,
-                        tuple(tuple(frame) for frame in coro.call_stack),
+                    # Check all tasks are present
+                    tasks_names = [
+                        task.task_name for task in stack_trace[0].awaited_by
+                    ]
+                    for task_name in ["Task-1", "Task-2"]:
+                        self.assertIn(task_name, tasks_names)
+
+                    # Check awaited_by relationships
+                    relationships = self._get_awaited_by_relationships(
+                        stack_trace
                     )
-                    for coro in task.awaited_by
-                )
-                for task in stack_trace[0].awaited_by
-            }
-            self.assertEqual(
-                awaited_by_coroutine_stacks,
-                {
-                    "Task-1": [],
-                    "Task-2": [
-                        (
-                            "Task-1",
+                    self.assertEqual(
+                        relationships,
+                        {
+                            "Task-1": set(),
+                            "Task-2": {"Task-1"},
+                        },
+                    )
+
+                    # Check coroutine stacks
+                    coroutine_stacks = self._extract_coroutine_stacks(
+                        stack_trace
+                    )
+                    self.assertEqual(
+                        coroutine_stacks,
+                        {
+                            "Task-1": [
+                                (
+                                    tuple(
+                                        [
+                                            staggered.__file__,
+                                            ANY,
+                                            "staggered_race",
+                                        ]
+                                    ),
+                                    tuple([script_name, 21, "main"]),
+                                )
+                            ],
+                            "Task-2": [
+                                (
+                                    tuple([script_name, 11, "deep"]),
+                                    tuple([script_name, 15, "c1"]),
+                                    tuple(
+                                        [
+                                            staggered.__file__,
+                                            ANY,
+                                            "staggered_race.<locals>.run_one_coro",
+                                        ]
+                                    ),
+                                )
+                            ],
+                        },
+                    )
+
+                    # Check awaited_by coroutine stacks
+                    id_to_task = self._get_task_id_map(stack_trace)
+                    awaited_by_coroutine_stacks = {
+                        task.task_name: sorted(
                             (
+                                id_to_task[coro.task_name].task_name,
                                 tuple(
-                                    [staggered.__file__, ANY, "staggered_race"]
+                                    tuple(frame) for frame in coro.call_stack
                                 ),
-                                tuple([script_name, 21, "main"]),
-                            ),
+                            )
+                            for coro in task.awaited_by
                         )
-                    ],
-                },
-            )
+                        for task in stack_trace[0].awaited_by
+                    }
+                    self.assertEqual(
+                        awaited_by_coroutine_stacks,
+                        {
+                            "Task-1": [],
+                            "Task-2": [
+                                (
+                                    "Task-1",
+                                    (
+                                        tuple(
+                                            [
+                                                staggered.__file__,
+                                                ANY,
+                                                "staggered_race",
+                                            ]
+                                        ),
+                                        tuple([script_name, 21, "main"]),
+                                    ),
+                                )
+                            ],
+                        },
+                    )
+            finally:
+                _cleanup_sockets(client_socket, server_socket)
 
     @skip_if_not_supported
     @unittest.skipIf(
@@ -817,6 +1102,10 @@ async def main():
         "Test only runs on Linux with process_vm_readv support",
     )
     def test_async_global_awaited_by(self):
+        # Reduced from 1000 to 100 to avoid file descriptor exhaustion
+        # when running tests in parallel (e.g., -j 20)
+        NUM_TASKS = 100
+
         port = find_unused_port()
         script = textwrap.dedent(
             f"""\
@@ -832,7 +1121,6 @@ def test_async_global_awaited_by(self):
             PORT = socket_helper.find_unused_port()
             connections = 0
 
-            # Connect to the test process
             sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
             sock.connect(('localhost', {port}))
 
@@ -855,23 +1143,16 @@ async def echo_client(message):
                 assert message == data.decode()
                 writer.close()
                 await writer.wait_closed()
-                # Signal we are ready to sleep
                 sock.sendall(b"ready")
                 await asyncio.sleep(SHORT_TIMEOUT)
 
             async def echo_client_spam(server):
                 async with asyncio.TaskGroup() as tg:
-                    while connections < 1000:
+                    while connections < {NUM_TASKS}:
                         msg = list(ascii_lowercase + digits)
                         random.shuffle(msg)
                         tg.create_task(echo_client("".join(msg)))
                         await asyncio.sleep(0)
-                    # at least a 1000 tasks created. Each task will signal
-                    # when is ready to avoid the race caused by the fact that
-                    # tasks are waited on tg.__exit__ and we cannot signal when
-                    # that happens otherwise
-                # at this point all client tasks completed without assertion errors
-                # let's wrap up the test
                 server.close()
                 await server.wait_closed()
 
@@ -886,231 +1167,216 @@ async def main():
             asyncio.run(main())
             """
         )
-        stack_trace = None
+
         with os_helper.temp_dir() as work_dir:
             script_dir = os.path.join(work_dir, "script_pkg")
             os.mkdir(script_dir)
-            # Create a socket server to communicate with the target process
-            server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-            server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-            server_socket.bind(("localhost", port))
-            server_socket.settimeout(SHORT_TIMEOUT)
-            server_socket.listen(1)
+
+            server_socket = _create_server_socket(port)
             script_name = _make_test_script(script_dir, "script", script)
             client_socket = None
+
             try:
-                p = subprocess.Popen([sys.executable, script_name])
-                client_socket, _ = server_socket.accept()
-                server_socket.close()
-                for _ in range(1000):
-                    expected_response = b"ready"
-                    response = client_socket.recv(len(expected_response))
-                    self.assertEqual(response, expected_response)
-                for _ in busy_retry(SHORT_TIMEOUT):
+                with _managed_subprocess([sys.executable, script_name]) as p:
+                    client_socket, _ = server_socket.accept()
+                    server_socket.close()
+                    server_socket = None
+
+                    # Wait for NUM_TASKS "ready" signals
+                    try:
+                        _wait_for_n_signals(client_socket, b"ready", NUM_TASKS)
+                    except RuntimeError as e:
+                        self.fail(str(e))
+
                     try:
                         all_awaited_by = get_all_awaited_by(p.pid)
-                    except RuntimeError as re:
-                        # This call reads a linked list in another process with
-                        # no synchronization. That occasionally leads to invalid
-                        # reads. Here we avoid making the test flaky.
-                        msg = str(re)
-                        if msg.startswith("Task list appears corrupted"):
-                            continue
-                        elif msg.startswith(
-                            "Invalid linked list structure reading remote memory"
-                        ):
-                            continue
-                        elif msg.startswith("Unknown error reading memory"):
-                            continue
-                        elif msg.startswith("Unhandled frame owner"):
-                            continue
-                        raise  # Unrecognized exception, safest not to ignore it
-                    else:
-                        break
-                # expected: a list of two elements: 1 thread, 1 interp
-                self.assertEqual(len(all_awaited_by), 2)
-                # expected: a tuple with the thread ID and the awaited_by list
-                self.assertEqual(len(all_awaited_by[0]), 2)
-                # expected: no tasks in the fallback per-interp task list
-                self.assertEqual(all_awaited_by[1], (0, []))
-                entries = all_awaited_by[0][1]
-                # expected: at least 1000 pending tasks
-                self.assertGreaterEqual(len(entries), 1000)
-                # the first three tasks stem from the code structure
-                main_stack = [
-                    FrameInfo([taskgroups.__file__, ANY, "TaskGroup._aexit"]),
-                    FrameInfo(
-                        [taskgroups.__file__, ANY, "TaskGroup.__aexit__"]
-                    ),
-                    FrameInfo([script_name, 60, "main"]),
-                ]
-                self.assertIn(
-                    TaskInfo(
-                        [ANY, "Task-1", [CoroInfo([main_stack, ANY])], []]
-                    ),
-                    entries,
-                )
-                self.assertIn(
-                    TaskInfo(
-                        [
-                            ANY,
-                            "server task",
-                            [
-                                CoroInfo(
-                                    [
-                                        [
-                                            FrameInfo(
-                                                [
-                                                    base_events.__file__,
-                                                    ANY,
-                                                    "Server.serve_forever",
-                                                ]
-                                            )
-                                        ],
-                                        ANY,
-                                    ]
-                                )
-                            ],
-                            [
-                                CoroInfo(
-                                    [
-                                        [
-                                            FrameInfo(
-                                                [
-                                                    taskgroups.__file__,
-                                                    ANY,
-                                                    "TaskGroup._aexit",
-                                                ]
-                                            ),
-                                            FrameInfo(
-                                                [
-                                                    taskgroups.__file__,
-                                                    ANY,
-                                                    "TaskGroup.__aexit__",
-                                                ]
-                                            ),
-                                            FrameInfo(
-                                                [script_name, ANY, "main"]
-                                            ),
-                                        ],
-                                        ANY,
-                                    ]
-                                )
-                            ],
-                        ]
-                    ),
-                    entries,
-                )
-                self.assertIn(
-                    TaskInfo(
-                        [
-                            ANY,
-                            "Task-4",
-                            [
-                                CoroInfo(
-                                    [
-                                        [
-                                            FrameInfo(
-                                                [tasks.__file__, ANY, "sleep"]
-                                            ),
-                                            FrameInfo(
-                                                [
-                                                    script_name,
-                                                    38,
-                                                    "echo_client",
-                                                ]
-                                            ),
-                                        ],
-                                        ANY,
-                                    ]
-                                )
-                            ],
-                            [
-                                CoroInfo(
-                                    [
-                                        [
-                                            FrameInfo(
-                                                [
-                                                    taskgroups.__file__,
-                                                    ANY,
-                                                    "TaskGroup._aexit",
-                                                ]
-                                            ),
-                                            FrameInfo(
-                                                [
-                                                    taskgroups.__file__,
-                                                    ANY,
-                                                    "TaskGroup.__aexit__",
-                                                ]
-                                            ),
-                                            FrameInfo(
-                                                [
-                                                    script_name,
-                                                    41,
-                                                    "echo_client_spam",
-                                                ]
-                                            ),
-                                        ],
-                                        ANY,
-                                    ]
-                                )
-                            ],
-                        ]
-                    ),
-                    entries,
-                )
+                    except PermissionError:
+                        self.skipTest(
+                            "Insufficient permissions to read the stack trace"
+                        )
 
-                expected_awaited_by = [
-                    CoroInfo(
-                        [
-                            [
-                                FrameInfo(
-                                    [
-                                        taskgroups.__file__,
-                                        ANY,
-                                        "TaskGroup._aexit",
-                                    ]
-                                ),
-                                FrameInfo(
-                                    [
-                                        taskgroups.__file__,
-                                        ANY,
-                                        "TaskGroup.__aexit__",
-                                    ]
-                                ),
-                                FrameInfo(
-                                    [script_name, 41, "echo_client_spam"]
-                                ),
-                            ],
-                            ANY,
-                        ]
-                    )
-                ]
-                tasks_with_awaited = [
-                    task
-                    for task in entries
-                    if task.awaited_by == expected_awaited_by
-                ]
-                self.assertGreaterEqual(len(tasks_with_awaited), 1000)
+                    # Expected: a list of two elements: 1 thread, 1 interp
+                    self.assertEqual(len(all_awaited_by), 2)
+                    # Expected: a tuple with the thread ID and the awaited_by list
+                    self.assertEqual(len(all_awaited_by[0]), 2)
+                    # Expected: no tasks in the fallback per-interp task list
+                    self.assertEqual(all_awaited_by[1], (0, []))
 
-                # the final task will have some random number, but it should for
-                # sure be one of the echo client spam horde (In windows this is not true
-                # for some reason)
-                if sys.platform != "win32":
-                    self.assertEqual(
-                        tasks_with_awaited[-1].awaited_by,
-                        entries[-1].awaited_by,
+                    entries = all_awaited_by[0][1]
+                    # Expected: at least NUM_TASKS pending tasks
+                    self.assertGreaterEqual(len(entries), NUM_TASKS)
+
+                    # Check the main task structure
+                    main_stack = [
+                        FrameInfo(
+                            [taskgroups.__file__, ANY, "TaskGroup._aexit"]
+                        ),
+                        FrameInfo(
+                            [taskgroups.__file__, ANY, "TaskGroup.__aexit__"]
+                        ),
+                        FrameInfo([script_name, 52, "main"]),
+                    ]
+                    self.assertIn(
+                        TaskInfo(
+                            [ANY, "Task-1", [CoroInfo([main_stack, ANY])], []]
+                        ),
+                        entries,
                     )
-            except PermissionError:
-                self.skipTest(
-                    "Insufficient permissions to read the stack trace"
-                )
+                    self.assertIn(
+                        TaskInfo(
+                            [
+                                ANY,
+                                "server task",
+                                [
+                                    CoroInfo(
+                                        [
+                                            [
+                                                FrameInfo(
+                                                    [
+                                                        base_events.__file__,
+                                                        ANY,
+                                                        "Server.serve_forever",
+                                                    ]
+                                                )
+                                            ],
+                                            ANY,
+                                        ]
+                                    )
+                                ],
+                                [
+                                    CoroInfo(
+                                        [
+                                            [
+                                                FrameInfo(
+                                                    [
+                                                        taskgroups.__file__,
+                                                        ANY,
+                                                        "TaskGroup._aexit",
+                                                    ]
+                                                ),
+                                                FrameInfo(
+                                                    [
+                                                        taskgroups.__file__,
+                                                        ANY,
+                                                        "TaskGroup.__aexit__",
+                                                    ]
+                                                ),
+                                                FrameInfo(
+                                                    [script_name, ANY, "main"]
+                                                ),
+                                            ],
+                                            ANY,
+                                        ]
+                                    )
+                                ],
+                            ]
+                        ),
+                        entries,
+                    )
+                    self.assertIn(
+                        TaskInfo(
+                            [
+                                ANY,
+                                "Task-4",
+                                [
+                                    CoroInfo(
+                                        [
+                                            [
+                                                FrameInfo(
+                                                    [
+                                                        tasks.__file__,
+                                                        ANY,
+                                                        "sleep",
+                                                    ]
+                                                ),
+                                                FrameInfo(
+                                                    [
+                                                        script_name,
+                                                        36,
+                                                        "echo_client",
+                                                    ]
+                                                ),
+                                            ],
+                                            ANY,
+                                        ]
+                                    )
+                                ],
+                                [
+                                    CoroInfo(
+                                        [
+                                            [
+                                                FrameInfo(
+                                                    [
+                                                        taskgroups.__file__,
+                                                        ANY,
+                                                        "TaskGroup._aexit",
+                                                    ]
+                                                ),
+                                                FrameInfo(
+                                                    [
+                                                        taskgroups.__file__,
+                                                        ANY,
+                                                        "TaskGroup.__aexit__",
+                                                    ]
+                                                ),
+                                                FrameInfo(
+                                                    [
+                                                        script_name,
+                                                        39,
+                                                        "echo_client_spam",
+                                                    ]
+                                                ),
+                                            ],
+                                            ANY,
+                                        ]
+                                    )
+                                ],
+                            ]
+                        ),
+                        entries,
+                    )
+
+                    expected_awaited_by = [
+                        CoroInfo(
+                            [
+                                [
+                                    FrameInfo(
+                                        [
+                                            taskgroups.__file__,
+                                            ANY,
+                                            "TaskGroup._aexit",
+                                        ]
+                                    ),
+                                    FrameInfo(
+                                        [
+                                            taskgroups.__file__,
+                                            ANY,
+                                            "TaskGroup.__aexit__",
+                                        ]
+                                    ),
+                                    FrameInfo(
+                                        [script_name, 39, "echo_client_spam"]
+                                    ),
+                                ],
+                                ANY,
+                            ]
+                        )
+                    ]
+                    tasks_with_awaited = [
+                        task
+                        for task in entries
+                        if task.awaited_by == expected_awaited_by
+                    ]
+                    self.assertGreaterEqual(len(tasks_with_awaited), NUM_TASKS)
+
+                    # Final task should be from echo client spam (not on Windows)
+                    if sys.platform != "win32":
+                        self.assertEqual(
+                            tasks_with_awaited[-1].awaited_by,
+                            entries[-1].awaited_by,
+                        )
             finally:
-                if client_socket is not None:
-                    client_socket.close()
-                p.kill()
-                p.terminate()
-                p.wait(timeout=SHORT_TIMEOUT)
+                _cleanup_sockets(client_socket, server_socket)
 
     @skip_if_not_supported
     @unittest.skipIf(
@@ -1119,25 +1385,24 @@ async def main():
     )
     def test_self_trace(self):
         stack_trace = get_stack_trace(os.getpid())
-        # Is possible that there are more threads, so we check that the
-        # expected stack traces are in the result (looking at you Windows!)
-        this_tread_stack = None
-        # New format: [InterpreterInfo(interpreter_id, [ThreadInfo(...)])]
+
+        this_thread_stack = None
         for interpreter_info in stack_trace:
             for thread_info in interpreter_info.threads:
                 if thread_info.thread_id == threading.get_native_id():
-                    this_tread_stack = thread_info.frame_info
+                    this_thread_stack = thread_info.frame_info
                     break
-            if this_tread_stack:
+            if this_thread_stack:
                 break
-        self.assertIsNotNone(this_tread_stack)
+
+        self.assertIsNotNone(this_thread_stack)
         self.assertEqual(
-            this_tread_stack[:2],
+            this_thread_stack[:2],
             [
                 FrameInfo(
                     [
                         __file__,
-                        get_stack_trace.__code__.co_firstlineno + 2,
+                        get_stack_trace.__code__.co_firstlineno + 4,
                         "get_stack_trace",
                     ]
                 ),
@@ -1158,12 +1423,11 @@ def test_self_trace(self):
     )
     @requires_subinterpreters
     def test_subinterpreter_stack_trace(self):
-        # Test that subinterpreters are correctly handled
         port = find_unused_port()
 
-        # Calculate subinterpreter code separately and pickle it to avoid f-string issues
         import pickle
-        subinterp_code = textwrap.dedent(f'''
+
+        subinterp_code = textwrap.dedent(f"""
             import socket
             import time
 
@@ -1176,9 +1440,8 @@ def nested_func():
                 nested_func()
 
             sub_worker()
-        ''').strip()
+        """).strip()
 
-        # Pickle the subinterpreter code
         pickled_code = pickle.dumps(subinterp_code)
 
         script = textwrap.dedent(
@@ -1189,33 +1452,26 @@ def nested_func():
             import socket
             import threading
 
-            # Connect to the test process
             sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
             sock.connect(('localhost', {port}))
 
             def main_worker():
-                # Function running in main interpreter
                 sock.sendall(b"ready:main\\n")
                 time.sleep(10_000)
 
             def run_subinterp():
-                # Create and run subinterpreter
                 subinterp = interpreters.create()
-
                 import pickle
                 pickled_code = {pickled_code!r}
                 subinterp_code = pickle.loads(pickled_code)
                 subinterp.exec(subinterp_code)
 
-            # Start subinterpreter in thread
             sub_thread = threading.Thread(target=run_subinterp)
             sub_thread.start()
 
-            # Start main thread work
             main_thread = threading.Thread(target=main_worker)
             main_thread.start()
 
-            # Keep main thread alive
             main_thread.join()
             sub_thread.join()
             """
@@ -1225,85 +1481,74 @@ def run_subinterp():
             script_dir = os.path.join(work_dir, "script_pkg")
             os.mkdir(script_dir)
 
-            # Create a socket server to communicate with the target process
-            server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-            server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-            server_socket.bind(("localhost", port))
-            server_socket.settimeout(SHORT_TIMEOUT)
-            server_socket.listen(1)
-
+            server_socket = _create_server_socket(port)
             script_name = _make_test_script(script_dir, "script", script)
             client_sockets = []
+
             try:
-                p = subprocess.Popen([sys.executable, script_name])
-
-                # Accept connections from both main and subinterpreter
-                responses = set()
-                while len(responses) < 2:  # Wait for both "ready:main" and "ready:sub"
-                    try:
-                        client_socket, _ = server_socket.accept()
-                        client_sockets.append(client_socket)
-
-                        # Read the response from this connection
-                        response = client_socket.recv(1024)
-                        if b"ready:main" in response:
-                            responses.add("main")
-                        if b"ready:sub" in response:
-                            responses.add("sub")
-                    except socket.timeout:
-                        break
-
-                server_socket.close()
-                stack_trace = get_stack_trace(p.pid)
-            except PermissionError:
-                self.skipTest(
-                    "Insufficient permissions to read the stack trace"
-                )
-            finally:
-                for client_socket in client_sockets:
-                    if client_socket is not None:
-                        client_socket.close()
-                p.kill()
-                p.terminate()
-                p.wait(timeout=SHORT_TIMEOUT)
-
-            # Verify we have multiple interpreters
-            self.assertGreaterEqual(len(stack_trace), 1, "Should have at least one interpreter")
-
-            # Look for main interpreter (ID 0) and subinterpreter (ID > 0)
-            main_interp = None
-            sub_interp = None
-
-            for interpreter_info in stack_trace:
-                if interpreter_info.interpreter_id == 0:
-                    main_interp = interpreter_info
-                elif interpreter_info.interpreter_id > 0:
-                    sub_interp = interpreter_info
-
-            self.assertIsNotNone(main_interp, "Main interpreter should be present")
-
-            # Check main interpreter has expected stack trace
-            main_found = False
-            for thread_info in main_interp.threads:
-                for frame in thread_info.frame_info:
-                    if frame.funcname == "main_worker":
-                        main_found = True
-                        break
-                if main_found:
-                    break
-            self.assertTrue(main_found, "Main interpreter should have main_worker in stack")
-
-            # If subinterpreter is present, check its stack trace
-            if sub_interp:
-                sub_found = False
-                for thread_info in sub_interp.threads:
-                    for frame in thread_info.frame_info:
-                        if frame.funcname in ("sub_worker", "nested_func"):
-                            sub_found = True
+                with _managed_subprocess([sys.executable, script_name]) as p:
+                    # Accept connections from both main and subinterpreter
+                    responses = set()
+                    while len(responses) < 2:
+                        try:
+                            client_socket, _ = server_socket.accept()
+                            client_sockets.append(client_socket)
+                            response = client_socket.recv(1024)
+                            if b"ready:main" in response:
+                                responses.add("main")
+                            if b"ready:sub" in response:
+                                responses.add("sub")
+                        except socket.timeout:
                             break
-                    if sub_found:
-                        break
-                self.assertTrue(sub_found, "Subinterpreter should have sub_worker or nested_func in stack")
+
+                    server_socket.close()
+                    server_socket = None
+
+                    try:
+                        stack_trace = get_stack_trace(p.pid)
+                    except PermissionError:
+                        self.skipTest(
+                            "Insufficient permissions to read the stack trace"
+                        )
+
+                    # Verify we have at least one interpreter
+                    self.assertGreaterEqual(len(stack_trace), 1)
+
+                    # Look for main interpreter (ID 0) and subinterpreter (ID > 0)
+                    main_interp = None
+                    sub_interp = None
+                    for interpreter_info in stack_trace:
+                        if interpreter_info.interpreter_id == 0:
+                            main_interp = interpreter_info
+                        elif interpreter_info.interpreter_id > 0:
+                            sub_interp = interpreter_info
+
+                    self.assertIsNotNone(
+                        main_interp, "Main interpreter should be present"
+                    )
+
+                    # Check main interpreter has expected stack trace
+                    main_found = self._find_frame_in_trace(
+                        [main_interp], lambda f: f.funcname == "main_worker"
+                    )
+                    self.assertIsNotNone(
+                        main_found,
+                        "Main interpreter should have main_worker in stack",
+                    )
+
+                    # If subinterpreter is present, check its stack trace
+                    if sub_interp:
+                        sub_found = self._find_frame_in_trace(
+                            [sub_interp],
+                            lambda f: f.funcname
+                            in ("sub_worker", "nested_func"),
+                        )
+                        self.assertIsNotNone(
+                            sub_found,
+                            "Subinterpreter should have sub_worker or nested_func in stack",
+                        )
+            finally:
+                _cleanup_sockets(*client_sockets, server_socket)
 
     @skip_if_not_supported
     @unittest.skipIf(
@@ -1312,14 +1557,11 @@ def run_subinterp():
     )
     @requires_subinterpreters
     def test_multiple_subinterpreters_with_threads(self):
-        # Test multiple subinterpreters, each with multiple threads
         port = find_unused_port()
 
-        # Calculate subinterpreter codes separately and pickle them
         import pickle
 
-        # Code for first subinterpreter with 2 threads
-        subinterp1_code = textwrap.dedent(f'''
+        subinterp1_code = textwrap.dedent(f"""
             import socket
             import time
             import threading
@@ -1346,10 +1588,9 @@ def nested_func():
             t2.start()
             t1.join()
             t2.join()
-        ''').strip()
+        """).strip()
 
-        # Code for second subinterpreter with 2 threads
-        subinterp2_code = textwrap.dedent(f'''
+        subinterp2_code = textwrap.dedent(f"""
             import socket
             import time
             import threading
@@ -1376,9 +1617,8 @@ def nested_func():
             t2.start()
             t1.join()
             t2.join()
-        ''').strip()
+        """).strip()
 
-        # Pickle the subinterpreter codes
         pickled_code1 = pickle.dumps(subinterp1_code)
         pickled_code2 = pickle.dumps(subinterp2_code)
 
@@ -1390,44 +1630,35 @@ def nested_func():
             import socket
             import threading
 
-            # Connect to the test process
             sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
             sock.connect(('localhost', {port}))
 
             def main_worker():
-                # Function running in main interpreter
                 sock.sendall(b"ready:main\\n")
                 time.sleep(10_000)
 
             def run_subinterp1():
-                # Create and run first subinterpreter
                 subinterp = interpreters.create()
-
                 import pickle
                 pickled_code = {pickled_code1!r}
                 subinterp_code = pickle.loads(pickled_code)
                 subinterp.exec(subinterp_code)
 
             def run_subinterp2():
-                # Create and run second subinterpreter
                 subinterp = interpreters.create()
-
                 import pickle
                 pickled_code = {pickled_code2!r}
                 subinterp_code = pickle.loads(pickled_code)
                 subinterp.exec(subinterp_code)
 
-            # Start subinterpreters in threads
             sub1_thread = threading.Thread(target=run_subinterp1)
             sub2_thread = threading.Thread(target=run_subinterp2)
             sub1_thread.start()
             sub2_thread.start()
 
-            # Start main thread work
             main_thread = threading.Thread(target=main_worker)
             main_thread.start()
 
-            # Keep main thread alive
             main_thread.join()
             sub1_thread.join()
             sub2_thread.join()
@@ -1438,72 +1669,80 @@ def run_subinterp2():
             script_dir = os.path.join(work_dir, "script_pkg")
             os.mkdir(script_dir)
 
-            # Create a socket server to communicate with the target process
-            server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-            server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-            server_socket.bind(("localhost", port))
-            server_socket.settimeout(SHORT_TIMEOUT)
-            server_socket.listen(5)  # Allow multiple connections
-
+            server_socket = _create_server_socket(port, backlog=5)
             script_name = _make_test_script(script_dir, "script", script)
             client_sockets = []
+
             try:
-                p = subprocess.Popen([sys.executable, script_name])
+                with _managed_subprocess([sys.executable, script_name]) as p:
+                    # Accept connections from main and all subinterpreter threads
+                    expected_responses = {
+                        "ready:main",
+                        "ready:sub1-t1",
+                        "ready:sub1-t2",
+                        "ready:sub2-t1",
+                        "ready:sub2-t2",
+                    }
+                    responses = set()
 
-                # Accept connections from main and all subinterpreter threads
-                expected_responses = {"ready:main", "ready:sub1-t1", "ready:sub1-t2", "ready:sub2-t1", "ready:sub2-t2"}
-                responses = set()
+                    while len(responses) < 5:
+                        try:
+                            client_socket, _ = server_socket.accept()
+                            client_sockets.append(client_socket)
+                            response = client_socket.recv(1024)
+                            response_str = response.decode().strip()
+                            if response_str in expected_responses:
+                                responses.add(response_str)
+                        except socket.timeout:
+                            break
+
+                    server_socket.close()
+                    server_socket = None
 
-                while len(responses) < 5:  # Wait for all 5 ready signals
                     try:
-                        client_socket, _ = server_socket.accept()
-                        client_sockets.append(client_socket)
+                        stack_trace = get_stack_trace(p.pid)
+                    except PermissionError:
+                        self.skipTest(
+                            "Insufficient permissions to read the stack trace"
+                        )
 
-                        # Read the response from this connection
-                        response = client_socket.recv(1024)
-                        response_str = response.decode().strip()
-                        if response_str in expected_responses:
-                            responses.add(response_str)
-                    except socket.timeout:
-                        break
+                    # Verify we have multiple interpreters
+                    self.assertGreaterEqual(len(stack_trace), 2)
 
-                server_socket.close()
-                stack_trace = get_stack_trace(p.pid)
-            except PermissionError:
-                self.skipTest(
-                    "Insufficient permissions to read the stack trace"
-                )
+                    # Count interpreters by ID
+                    interpreter_ids = {
+                        interp.interpreter_id for interp in stack_trace
+                    }
+                    self.assertIn(
+                        0,
+                        interpreter_ids,
+                        "Main interpreter should be present",
+                    )
+                    self.assertGreaterEqual(len(interpreter_ids), 3)
+
+                    # Count total threads
+                    total_threads = sum(
+                        len(interp.threads) for interp in stack_trace
+                    )
+                    self.assertGreaterEqual(total_threads, 5)
+
+                    # Look for expected function names
+                    all_funcnames = set()
+                    for interpreter_info in stack_trace:
+                        for thread_info in interpreter_info.threads:
+                            for frame in thread_info.frame_info:
+                                all_funcnames.add(frame.funcname)
+
+                    expected_funcs = {
+                        "main_worker",
+                        "worker1",
+                        "worker2",
+                        "nested_func",
+                    }
+                    found_funcs = expected_funcs.intersection(all_funcnames)
+                    self.assertGreater(len(found_funcs), 0)
             finally:
-                for client_socket in client_sockets:
-                    if client_socket is not None:
-                        client_socket.close()
-                p.kill()
-                p.terminate()
-                p.wait(timeout=SHORT_TIMEOUT)
-
-            # Verify we have multiple interpreters
-            self.assertGreaterEqual(len(stack_trace), 2, "Should have at least two interpreters")
-
-            # Count interpreters by ID
-            interpreter_ids = {interp.interpreter_id for interp in stack_trace}
-            self.assertIn(0, interpreter_ids, "Main interpreter should be present")
-            self.assertGreaterEqual(len(interpreter_ids), 3, "Should have main + at least 2 subinterpreters")
-
-            # Count total threads across all interpreters
-            total_threads = sum(len(interp.threads) for interp in stack_trace)
-            self.assertGreaterEqual(total_threads, 5, "Should have at least 5 threads total")
-
-            # Look for expected function names in stack traces
-            all_funcnames = set()
-            for interpreter_info in stack_trace:
-                for thread_info in interpreter_info.threads:
-                    for frame in thread_info.frame_info:
-                        all_funcnames.add(frame.funcname)
-
-            # Should find functions from different interpreters and threads
-            expected_funcs = {"main_worker", "worker1", "worker2", "nested_func"}
-            found_funcs = expected_funcs.intersection(all_funcnames)
-            self.assertGreater(len(found_funcs), 0, f"Should find some expected functions, got: {all_funcnames}")
+                _cleanup_sockets(*client_sockets, server_socket)
 
     @skip_if_not_supported
     @unittest.skipIf(
@@ -1512,54 +1751,41 @@ def run_subinterp2():
     )
     @requires_gil_enabled("Free threaded builds don't have an 'active thread'")
     def test_only_active_thread(self):
-        # Test that only_active_thread parameter works correctly
         port = find_unused_port()
         script = textwrap.dedent(
             f"""\
             import time, sys, socket, threading
 
-            # Connect to the test process
             sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
             sock.connect(('localhost', {port}))
 
             def worker_thread(name, barrier, ready_event):
-                barrier.wait()  # Synchronize thread start
-                ready_event.wait()  # Wait for main thread signal
-                # Sleep to keep thread alive
+                barrier.wait()
+                ready_event.wait()
                 time.sleep(10_000)
 
             def main_work():
-                # Do busy work to hold the GIL
                 sock.sendall(b"working\\n")
                 count = 0
                 while count < 100000000:
                     count += 1
                     if count % 10000000 == 0:
-                        pass  # Keep main thread busy
+                        pass
                 sock.sendall(b"done\\n")
 
-            # Create synchronization primitives
             num_threads = 3
-            barrier = threading.Barrier(num_threads + 1)  # +1 for main thread
+            barrier = threading.Barrier(num_threads + 1)
             ready_event = threading.Event()
 
-            # Start worker threads
             threads = []
             for i in range(num_threads):
                 t = threading.Thread(target=worker_thread, args=(f"Worker-{{i}}", barrier, ready_event))
                 t.start()
                 threads.append(t)
 
-            # Wait for all threads to be ready
             barrier.wait()
-
-            # Signal ready to parent process
             sock.sendall(b"ready\\n")
-
-            # Signal threads to start waiting
             ready_event.set()
-
-            # Now do busy work to hold the GIL
             main_work()
             """
         )
@@ -1568,104 +1794,76 @@ def main_work():
             script_dir = os.path.join(work_dir, "script_pkg")
             os.mkdir(script_dir)
 
-            # Create a socket server to communicate with the target process
-            server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-            server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-            server_socket.bind(("localhost", port))
-            server_socket.settimeout(SHORT_TIMEOUT)
-            server_socket.listen(1)
-
+            server_socket = _create_server_socket(port)
             script_name = _make_test_script(script_dir, "script", script)
             client_socket = None
+
             try:
-                p = subprocess.Popen([sys.executable, script_name])
-                client_socket, _ = server_socket.accept()
-                server_socket.close()
+                with _managed_subprocess([sys.executable, script_name]) as p:
+                    client_socket, _ = server_socket.accept()
+                    server_socket.close()
+                    server_socket = None
 
-                # Wait for ready signal
-                response = b""
-                while b"ready" not in response:
-                    response += client_socket.recv(1024)
+                    # Wait for ready and working signals
+                    _wait_for_signal(client_socket, [b"ready", b"working"])
 
-                # Wait for the main thread to start its busy work
-                while b"working" not in response:
-                    response += client_socket.recv(1024)
-
-                # Get stack trace with all threads
-                unwinder_all = RemoteUnwinder(p.pid, all_threads=True)
-                for _ in range(10):
-                    # Wait for the main thread to start its busy work
-                    all_traces = unwinder_all.get_stack_trace()
-                    found = False
-                    # New format: [InterpreterInfo(interpreter_id, [ThreadInfo(...)])]
-                    for interpreter_info in all_traces:
-                        for thread_info in interpreter_info.threads:
-                            if not thread_info.frame_info:
-                                continue
-                            current_frame = thread_info.frame_info[0]
-                            if (
-                                current_frame.funcname == "main_work"
-                                and current_frame.lineno > 15
-                            ):
-                                found = True
+                    try:
+                        # Get stack trace with all threads
+                        unwinder_all = RemoteUnwinder(p.pid, all_threads=True)
+                        for _ in range(MAX_TRIES):
+                            all_traces = unwinder_all.get_stack_trace()
+                            found = self._find_frame_in_trace(
+                                all_traces,
+                                lambda f: f.funcname == "main_work"
+                                and f.lineno > 12,
+                            )
+                            if found:
                                 break
-                        if found:
+                            time.sleep(0.1)
+                        else:
+                            self.fail(
+                                "Main thread did not start its busy work on time"
+                            )
+
+                        # Get stack trace with only GIL holder
+                        unwinder_gil = RemoteUnwinder(
+                            p.pid, only_active_thread=True
+                        )
+                        gil_traces = unwinder_gil.get_stack_trace()
+                    except PermissionError:
+                        self.skipTest(
+                            "Insufficient permissions to read the stack trace"
+                        )
+
+                    # Count threads
+                    total_threads = sum(
+                        len(interp.threads) for interp in all_traces
+                    )
+                    self.assertGreater(total_threads, 1)
+
+                    total_gil_threads = sum(
+                        len(interp.threads) for interp in gil_traces
+                    )
+                    self.assertEqual(total_gil_threads, 1)
+
+                    # Get the GIL holder thread ID
+                    gil_thread_id = None
+                    for interpreter_info in gil_traces:
+                        if interpreter_info.threads:
+                            gil_thread_id = interpreter_info.threads[
+                                0
+                            ].thread_id
                             break
 
-                    if found:
-                        break
-                    # Give a bit of time to take the next sample
-                    time.sleep(0.1)
-                else:
-                    self.fail(
-                        "Main thread did not start its busy work on time"
-                    )
+                    # Get all thread IDs
+                    all_thread_ids = []
+                    for interpreter_info in all_traces:
+                        for thread_info in interpreter_info.threads:
+                            all_thread_ids.append(thread_info.thread_id)
 
-                # Get stack trace with only GIL holder
-                unwinder_gil = RemoteUnwinder(p.pid, only_active_thread=True)
-                gil_traces = unwinder_gil.get_stack_trace()
-
-            except PermissionError:
-                self.skipTest(
-                    "Insufficient permissions to read the stack trace"
-                )
+                    self.assertIn(gil_thread_id, all_thread_ids)
             finally:
-                if client_socket is not None:
-                    client_socket.close()
-                p.kill()
-                p.terminate()
-                p.wait(timeout=SHORT_TIMEOUT)
-
-            # Count total threads across all interpreters in all_traces
-            total_threads = sum(len(interpreter_info.threads) for interpreter_info in all_traces)
-            self.assertGreater(
-                total_threads, 1, "Should have multiple threads"
-            )
-
-            # Count total threads across all interpreters in gil_traces
-            total_gil_threads = sum(len(interpreter_info.threads) for interpreter_info in gil_traces)
-            self.assertEqual(
-                total_gil_threads, 1, "Should have exactly one GIL holder"
-            )
-
-            # Get the GIL holder thread ID
-            gil_thread_id = None
-            for interpreter_info in gil_traces:
-                if interpreter_info.threads:
-                    gil_thread_id = interpreter_info.threads[0].thread_id
-                    break
-
-            # Get all thread IDs from all_traces
-            all_thread_ids = []
-            for interpreter_info in all_traces:
-                for thread_info in interpreter_info.threads:
-                    all_thread_ids.append(thread_info.thread_id)
-
-            self.assertIn(
-                gil_thread_id,
-                all_thread_ids,
-                "GIL holder should be among all threads",
-            )
+                _cleanup_sockets(client_socket, server_socket)
 
 
 class TestUnsupportedPlatformHandling(unittest.TestCase):
@@ -1673,23 +1871,28 @@ class TestUnsupportedPlatformHandling(unittest.TestCase):
         sys.platform in ("linux", "darwin", "win32"),
         "Test only runs on unsupported platforms (not Linux, macOS, or Windows)",
     )
-    @unittest.skipIf(sys.platform == "android", "Android raises Linux-specific exception")
+    @unittest.skipIf(
+        sys.platform == "android", "Android raises Linux-specific exception"
+    )
     def test_unsupported_platform_error(self):
         with self.assertRaises(RuntimeError) as cm:
             RemoteUnwinder(os.getpid())
 
         self.assertIn(
             "Reading the PyRuntime section is not supported on this platform",
-            str(cm.exception)
+            str(cm.exception),
         )
 
-class TestDetectionOfThreadStatus(unittest.TestCase):
-    @unittest.skipIf(
-        sys.platform not in ("linux", "darwin", "win32"),
-        "Test only runs on unsupported platforms (not Linux, macOS, or Windows)",
-    )
-    @unittest.skipIf(sys.platform == "android", "Android raises Linux-specific exception")
-    def test_thread_status_detection(self):
+
+class TestDetectionOfThreadStatus(RemoteInspectionTestBase):
+    def _run_thread_status_test(self, mode, check_condition):
+        """
+        Common pattern for thread status detection tests.
+
+        Args:
+            mode: Profiling mode (PROFILING_MODE_CPU, PROFILING_MODE_GIL, etc.)
+            check_condition: Function(statuses, sleeper_tid, busy_tid) -> bool
+        """
         port = find_unused_port()
         script = textwrap.dedent(
             f"""\
@@ -1722,203 +1925,146 @@ def busy():
             sock.close()
             """
         )
+
         with os_helper.temp_dir() as work_dir:
             script_dir = os.path.join(work_dir, "script_pkg")
             os.mkdir(script_dir)
-            server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-            server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-            server_socket.bind(("localhost", port))
-            server_socket.settimeout(SHORT_TIMEOUT)
-            server_socket.listen(1)
 
-            script_name = _make_test_script(script_dir, "thread_status_script", script)
+            server_socket = _create_server_socket(port)
+            script_name = _make_test_script(
+                script_dir, "thread_status_script", script
+            )
             client_socket = None
+
             try:
-                p = subprocess.Popen([sys.executable, script_name])
-                client_socket, _ = server_socket.accept()
-                server_socket.close()
-                response = b""
-                sleeper_tid = None
-                busy_tid = None
-                while True:
-                    chunk = client_socket.recv(1024)
-                    response += chunk
-                    if b"ready:main" in response and b"ready:sleeper" in response and b"ready:busy" in response:
-                        # Parse TIDs from the response
-                        for line in response.split(b"\n"):
-                            if line.startswith(b"ready:sleeper:"):
-                                try:
-                                    sleeper_tid = int(line.split(b":")[-1])
-                                except Exception:
-                                    pass
-                            elif line.startswith(b"ready:busy:"):
-                                try:
-                                    busy_tid = int(line.split(b":")[-1])
-                                except Exception:
-                                    pass
-                        break
+                with _managed_subprocess([sys.executable, script_name]) as p:
+                    client_socket, _ = server_socket.accept()
+                    server_socket.close()
+                    server_socket = None
 
-                attempts = 10
-                statuses = {}
-                try:
-                    unwinder = RemoteUnwinder(p.pid, all_threads=True, mode=PROFILING_MODE_CPU,
-                                                skip_non_matching_threads=False)
-                    for _ in range(attempts):
-                        traces = unwinder.get_stack_trace()
-                        # Find threads and their statuses
-                        statuses = {}
-                        for interpreter_info in traces:
-                            for thread_info in interpreter_info.threads:
-                                statuses[thread_info.thread_id] = thread_info.status
-
-                        # Check if sleeper thread is off CPU and busy thread is on CPU
-                        # In the new flags system:
-                        # - sleeper should NOT have ON_CPU flag (off CPU)
-                        # - busy should have ON_CPU flag
-                        if (sleeper_tid in statuses and
-                            busy_tid in statuses and
-                            not (statuses[sleeper_tid] & THREAD_STATUS_ON_CPU) and
-                            (statuses[busy_tid] & THREAD_STATUS_ON_CPU)):
-                            break
-                        time.sleep(0.5)  # Give a bit of time to let threads settle
-                except PermissionError:
-                    self.skipTest(
-                        "Insufficient permissions to read the stack trace"
+                    # Wait for all ready signals and parse TIDs
+                    response = _wait_for_signal(
+                        client_socket,
+                        [b"ready:main", b"ready:sleeper", b"ready:busy"],
                     )
 
-                self.assertIsNotNone(sleeper_tid, "Sleeper thread id not received")
-                self.assertIsNotNone(busy_tid, "Busy thread id not received")
-                self.assertIn(sleeper_tid, statuses, "Sleeper tid not found in sampled threads")
-                self.assertIn(busy_tid, statuses, "Busy tid not found in sampled threads")
-                self.assertFalse(statuses[sleeper_tid] & THREAD_STATUS_ON_CPU, "Sleeper thread should be off CPU")
-                self.assertTrue(statuses[busy_tid] & THREAD_STATUS_ON_CPU, "Busy thread should be on CPU")
+                    sleeper_tid = None
+                    busy_tid = None
+                    for line in response.split(b"\n"):
+                        if line.startswith(b"ready:sleeper:"):
+                            try:
+                                sleeper_tid = int(line.split(b":")[-1])
+                            except (ValueError, IndexError):
+                                pass
+                        elif line.startswith(b"ready:busy:"):
+                            try:
+                                busy_tid = int(line.split(b":")[-1])
+                            except (ValueError, IndexError):
+                                pass
 
-            finally:
-                if client_socket is not None:
-                    client_socket.close()
-                p.terminate()
-                p.wait(timeout=SHORT_TIMEOUT)
-
-    @unittest.skipIf(
-        sys.platform not in ("linux", "darwin", "win32"),
-        "Test only runs on unsupported platforms (not Linux, macOS, or Windows)",
-    )
-    @unittest.skipIf(sys.platform == "android", "Android raises Linux-specific exception")
-    def test_thread_status_gil_detection(self):
-        port = find_unused_port()
-        script = textwrap.dedent(
-            f"""\
-            import time, sys, socket, threading
-            import os
-
-            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-            sock.connect(('localhost', {port}))
-
-            def sleeper():
-                tid = threading.get_native_id()
-                sock.sendall(f'ready:sleeper:{{tid}}\\n'.encode())
-                time.sleep(10000)
-
-            def busy():
-                tid = threading.get_native_id()
-                sock.sendall(f'ready:busy:{{tid}}\\n'.encode())
-                x = 0
-                while True:
-                    x = x + 1
-                time.sleep(0.5)
-
-            t1 = threading.Thread(target=sleeper)
-            t2 = threading.Thread(target=busy)
-            t1.start()
-            t2.start()
-            sock.sendall(b'ready:main\\n')
-            t1.join()
-            t2.join()
-            sock.close()
-            """
-        )
-        with os_helper.temp_dir() as work_dir:
-            script_dir = os.path.join(work_dir, "script_pkg")
-            os.mkdir(script_dir)
-            server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-            server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-            server_socket.bind(("localhost", port))
-            server_socket.settimeout(SHORT_TIMEOUT)
-            server_socket.listen(1)
-
-            script_name = _make_test_script(script_dir, "thread_status_script", script)
-            client_socket = None
-            try:
-                p = subprocess.Popen([sys.executable, script_name])
-                client_socket, _ = server_socket.accept()
-                server_socket.close()
-                response = b""
-                sleeper_tid = None
-                busy_tid = None
-                while True:
-                    chunk = client_socket.recv(1024)
-                    response += chunk
-                    if b"ready:main" in response and b"ready:sleeper" in response and b"ready:busy" in response:
-                        # Parse TIDs from the response
-                        for line in response.split(b"\n"):
-                            if line.startswith(b"ready:sleeper:"):
-                                try:
-                                    sleeper_tid = int(line.split(b":")[-1])
-                                except Exception:
-                                    pass
-                            elif line.startswith(b"ready:busy:"):
-                                try:
-                                    busy_tid = int(line.split(b":")[-1])
-                                except Exception:
-                                    pass
-                        break
-
-                attempts = 10
-                statuses = {}
-                try:
-                    unwinder = RemoteUnwinder(p.pid, all_threads=True, mode=PROFILING_MODE_GIL,
-                                                skip_non_matching_threads=False)
-                    for _ in range(attempts):
-                        traces = unwinder.get_stack_trace()
-                        # Find threads and their statuses
-                        statuses = {}
-                        for interpreter_info in traces:
-                            for thread_info in interpreter_info.threads:
-                                statuses[thread_info.thread_id] = thread_info.status
-
-                        # Check if sleeper thread doesn't have GIL and busy thread has GIL
-                        # In the new flags system:
-                        # - sleeper should NOT have HAS_GIL flag (waiting for GIL)
-                        # - busy should have HAS_GIL flag
-                        if (sleeper_tid in statuses and
-                            busy_tid in statuses and
-                            not (statuses[sleeper_tid] & THREAD_STATUS_HAS_GIL) and
-                            (statuses[busy_tid] & THREAD_STATUS_HAS_GIL)):
-                            break
-                        time.sleep(0.5)  # Give a bit of time to let threads settle
-                except PermissionError:
-                    self.skipTest(
-                        "Insufficient permissions to read the stack trace"
+                    self.assertIsNotNone(
+                        sleeper_tid, "Sleeper thread id not received"
+                    )
+                    self.assertIsNotNone(
+                        busy_tid, "Busy thread id not received"
                     )
 
-                self.assertIsNotNone(sleeper_tid, "Sleeper thread id not received")
-                self.assertIsNotNone(busy_tid, "Busy thread id not received")
-                self.assertIn(sleeper_tid, statuses, "Sleeper tid not found in sampled threads")
-                self.assertIn(busy_tid, statuses, "Busy tid not found in sampled threads")
-                self.assertFalse(statuses[sleeper_tid] & THREAD_STATUS_HAS_GIL, "Sleeper thread should not have GIL")
-                self.assertTrue(statuses[busy_tid] & THREAD_STATUS_HAS_GIL, "Busy thread should have GIL")
+                    # Sample until we see expected thread states
+                    statuses = {}
+                    try:
+                        unwinder = RemoteUnwinder(
+                            p.pid,
+                            all_threads=True,
+                            mode=mode,
+                            skip_non_matching_threads=False,
+                        )
+                        for _ in range(MAX_TRIES):
+                            traces = unwinder.get_stack_trace()
+                            statuses = self._get_thread_statuses(traces)
 
+                            if check_condition(
+                                statuses, sleeper_tid, busy_tid
+                            ):
+                                break
+                            time.sleep(0.5)
+                    except PermissionError:
+                        self.skipTest(
+                            "Insufficient permissions to read the stack trace"
+                        )
+
+                    return statuses, sleeper_tid, busy_tid
             finally:
-                if client_socket is not None:
-                    client_socket.close()
-                p.terminate()
-                p.wait(timeout=SHORT_TIMEOUT)
+                _cleanup_sockets(client_socket, server_socket)
 
     @unittest.skipIf(
         sys.platform not in ("linux", "darwin", "win32"),
         "Test only runs on supported platforms (Linux, macOS, or Windows)",
     )
-    @unittest.skipIf(sys.platform == "android", "Android raises Linux-specific exception")
+    @unittest.skipIf(
+        sys.platform == "android", "Android raises Linux-specific exception"
+    )
+    def test_thread_status_detection(self):
+        def check_cpu_status(statuses, sleeper_tid, busy_tid):
+            return (
+                sleeper_tid in statuses
+                and busy_tid in statuses
+                and not (statuses[sleeper_tid] & THREAD_STATUS_ON_CPU)
+                and (statuses[busy_tid] & THREAD_STATUS_ON_CPU)
+            )
+
+        statuses, sleeper_tid, busy_tid = self._run_thread_status_test(
+            PROFILING_MODE_CPU, check_cpu_status
+        )
+
+        self.assertIn(sleeper_tid, statuses)
+        self.assertIn(busy_tid, statuses)
+        self.assertFalse(
+            statuses[sleeper_tid] & THREAD_STATUS_ON_CPU,
+            "Sleeper thread should be off CPU",
+        )
+        self.assertTrue(
+            statuses[busy_tid] & THREAD_STATUS_ON_CPU,
+            "Busy thread should be on CPU",
+        )
+
+    @unittest.skipIf(
+        sys.platform not in ("linux", "darwin", "win32"),
+        "Test only runs on supported platforms (Linux, macOS, or Windows)",
+    )
+    @unittest.skipIf(
+        sys.platform == "android", "Android raises Linux-specific exception"
+    )
+    def test_thread_status_gil_detection(self):
+        def check_gil_status(statuses, sleeper_tid, busy_tid):
+            return (
+                sleeper_tid in statuses
+                and busy_tid in statuses
+                and not (statuses[sleeper_tid] & THREAD_STATUS_HAS_GIL)
+                and (statuses[busy_tid] & THREAD_STATUS_HAS_GIL)
+            )
+
+        statuses, sleeper_tid, busy_tid = self._run_thread_status_test(
+            PROFILING_MODE_GIL, check_gil_status
+        )
+
+        self.assertIn(sleeper_tid, statuses)
+        self.assertIn(busy_tid, statuses)
+        self.assertFalse(
+            statuses[sleeper_tid] & THREAD_STATUS_HAS_GIL,
+            "Sleeper thread should not have GIL",
+        )
+        self.assertTrue(
+            statuses[busy_tid] & THREAD_STATUS_HAS_GIL,
+            "Busy thread should have GIL",
+        )
+
+    @unittest.skipIf(
+        sys.platform not in ("linux", "darwin", "win32"),
+        "Test only runs on supported platforms (Linux, macOS, or Windows)",
+    )
+    @unittest.skipIf(
+        sys.platform == "android", "Android raises Linux-specific exception"
+    )
     def test_thread_status_all_mode_detection(self):
         port = find_unused_port()
         script = textwrap.dedent(
@@ -1951,91 +2097,967 @@ def busy_thread():
 
         with os_helper.temp_dir() as tmp_dir:
             script_file = make_script(tmp_dir, "script", script)
-            server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-            server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-            server_socket.bind(("localhost", port))
-            server_socket.listen(2)
-            server_socket.settimeout(SHORT_TIMEOUT)
-
-            p = subprocess.Popen(
-                [sys.executable, script_file],
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE,
-            )
-
+            server_socket = _create_server_socket(port, backlog=2)
             client_sockets = []
+
             try:
-                sleeper_tid = None
-                busy_tid = None
+                with _managed_subprocess(
+                    [sys.executable, script_file],
+                ) as p:
+                    sleeper_tid = None
+                    busy_tid = None
 
-                # Receive thread IDs from the child process
-                for _ in range(2):
-                    client_socket, _ = server_socket.accept()
-                    client_sockets.append(client_socket)
-                    line = client_socket.recv(1024)
-                    if line:
-                        if line.startswith(b"sleeper:"):
-                            try:
-                                sleeper_tid = int(line.split(b":")[-1])
-                            except Exception:
-                                pass
-                        elif line.startswith(b"busy:"):
-                            try:
-                                busy_tid = int(line.split(b":")[-1])
-                            except Exception:
-                                pass
+                    # Receive thread IDs from the child process
+                    for _ in range(2):
+                        client_socket, _ = server_socket.accept()
+                        client_sockets.append(client_socket)
+                        line = client_socket.recv(1024)
+                        if line:
+                            if line.startswith(b"sleeper:"):
+                                try:
+                                    sleeper_tid = int(line.split(b":")[-1])
+                                except (ValueError, IndexError):
+                                    pass
+                            elif line.startswith(b"busy:"):
+                                try:
+                                    busy_tid = int(line.split(b":")[-1])
+                                except (ValueError, IndexError):
+                                    pass
 
-                server_socket.close()
+                    server_socket.close()
+                    server_socket = None
 
-                attempts = 10
-                statuses = {}
-                try:
-                    unwinder = RemoteUnwinder(p.pid, all_threads=True, mode=PROFILING_MODE_ALL,
-                                                skip_non_matching_threads=False)
-                    for _ in range(attempts):
-                        traces = unwinder.get_stack_trace()
-                        # Find threads and their statuses
-                        statuses = {}
-                        for interpreter_info in traces:
-                            for thread_info in interpreter_info.threads:
-                                statuses[thread_info.thread_id] = thread_info.status
+                    statuses = {}
+                    try:
+                        unwinder = RemoteUnwinder(
+                            p.pid,
+                            all_threads=True,
+                            mode=PROFILING_MODE_ALL,
+                            skip_non_matching_threads=False,
+                        )
+                        for _ in range(MAX_TRIES):
+                            traces = unwinder.get_stack_trace()
+                            statuses = self._get_thread_statuses(traces)
 
-                        # Check ALL mode provides both GIL and CPU info
-                        # - sleeper should NOT have ON_CPU and NOT have HAS_GIL
-                        # - busy should have ON_CPU and have HAS_GIL
-                        if (sleeper_tid in statuses and
-                            busy_tid in statuses and
-                            not (statuses[sleeper_tid] & THREAD_STATUS_ON_CPU) and
-                            not (statuses[sleeper_tid] & THREAD_STATUS_HAS_GIL) and
-                            (statuses[busy_tid] & THREAD_STATUS_ON_CPU) and
-                            (statuses[busy_tid] & THREAD_STATUS_HAS_GIL)):
-                            break
-                        time.sleep(0.5)
-                except PermissionError:
-                    self.skipTest(
-                        "Insufficient permissions to read the stack trace"
+                            # Check ALL mode provides both GIL and CPU info
+                            if (
+                                sleeper_tid in statuses
+                                and busy_tid in statuses
+                                and not (
+                                    statuses[sleeper_tid]
+                                    & THREAD_STATUS_ON_CPU
+                                )
+                                and not (
+                                    statuses[sleeper_tid]
+                                    & THREAD_STATUS_HAS_GIL
+                                )
+                                and (statuses[busy_tid] & THREAD_STATUS_ON_CPU)
+                                and (
+                                    statuses[busy_tid] & THREAD_STATUS_HAS_GIL
+                                )
+                            ):
+                                break
+                            time.sleep(0.5)
+                    except PermissionError:
+                        self.skipTest(
+                            "Insufficient permissions to read the stack trace"
+                        )
+
+                    self.assertIsNotNone(
+                        sleeper_tid, "Sleeper thread id not received"
+                    )
+                    self.assertIsNotNone(
+                        busy_tid, "Busy thread id not received"
+                    )
+                    self.assertIn(sleeper_tid, statuses)
+                    self.assertIn(busy_tid, statuses)
+
+                    # Sleeper: off CPU, no GIL
+                    self.assertFalse(
+                        statuses[sleeper_tid] & THREAD_STATUS_ON_CPU,
+                        "Sleeper should be off CPU",
+                    )
+                    self.assertFalse(
+                        statuses[sleeper_tid] & THREAD_STATUS_HAS_GIL,
+                        "Sleeper should not have GIL",
                     )
 
-                self.assertIsNotNone(sleeper_tid, "Sleeper thread id not received")
-                self.assertIsNotNone(busy_tid, "Busy thread id not received")
-                self.assertIn(sleeper_tid, statuses, "Sleeper tid not found in sampled threads")
-                self.assertIn(busy_tid, statuses, "Busy tid not found in sampled threads")
-
-                # Sleeper thread: off CPU, no GIL
-                self.assertFalse(statuses[sleeper_tid] & THREAD_STATUS_ON_CPU, "Sleeper should be off CPU")
-                self.assertFalse(statuses[sleeper_tid] & THREAD_STATUS_HAS_GIL, "Sleeper should not have GIL")
-
-                # Busy thread: on CPU, has GIL
-                self.assertTrue(statuses[busy_tid] & THREAD_STATUS_ON_CPU, "Busy should be on CPU")
-                self.assertTrue(statuses[busy_tid] & THREAD_STATUS_HAS_GIL, "Busy should have GIL")
-
+                    # Busy: on CPU, has GIL
+                    self.assertTrue(
+                        statuses[busy_tid] & THREAD_STATUS_ON_CPU,
+                        "Busy should be on CPU",
+                    )
+                    self.assertTrue(
+                        statuses[busy_tid] & THREAD_STATUS_HAS_GIL,
+                        "Busy should have GIL",
+                    )
             finally:
-                for client_socket in client_sockets:
-                    client_socket.close()
-                p.terminate()
-                p.wait(timeout=SHORT_TIMEOUT)
-                p.stdout.close()
-                p.stderr.close()
+                _cleanup_sockets(*client_sockets, server_socket)
+
+
+class TestFrameCaching(RemoteInspectionTestBase):
+    """Test that frame caching produces correct results.
+
+    Uses socket-based synchronization for deterministic testing.
+    All tests verify cache reuse via object identity checks (assertIs).
+    """
+
+    @contextmanager
+    def _target_process(self, script_body):
+        """Context manager for running a target process with socket sync."""
+        port = find_unused_port()
+        script = f"""\
+import socket
+sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+sock.connect(('localhost', {port}))
+{textwrap.dedent(script_body)}
+"""
+
+        with os_helper.temp_dir() as work_dir:
+            script_dir = os.path.join(work_dir, "script_pkg")
+            os.mkdir(script_dir)
+
+            server_socket = _create_server_socket(port)
+            script_name = _make_test_script(script_dir, "script", script)
+            client_socket = None
+
+            try:
+                with _managed_subprocess([sys.executable, script_name]) as p:
+                    client_socket, _ = server_socket.accept()
+                    server_socket.close()
+                    server_socket = None
+
+                    def make_unwinder(cache_frames=True):
+                        return RemoteUnwinder(
+                            p.pid, all_threads=True, cache_frames=cache_frames
+                        )
+
+                    yield p, client_socket, make_unwinder
+
+            except PermissionError:
+                self.skipTest(
+                    "Insufficient permissions to read the stack trace"
+                )
+            finally:
+                _cleanup_sockets(client_socket, server_socket)
+
+    def _get_frames_with_retry(self, unwinder, required_funcs):
+        """Get frames containing required_funcs, with retry for transient errors."""
+        for _ in range(MAX_TRIES):
+            with contextlib.suppress(OSError, RuntimeError):
+                traces = unwinder.get_stack_trace()
+                for interp in traces:
+                    for thread in interp.threads:
+                        funcs = {f.funcname for f in thread.frame_info}
+                        if required_funcs.issubset(funcs):
+                            return thread.frame_info
+            time.sleep(0.1)
+        return None
+
+    def _sample_frames(
+        self,
+        client_socket,
+        unwinder,
+        wait_signal,
+        send_ack,
+        required_funcs,
+        expected_frames=1,
+    ):
+        """Wait for signal, sample frames with retry until required funcs present, send ack."""
+        _wait_for_signal(client_socket, wait_signal)
+        frames = None
+        for _ in range(MAX_TRIES):
+            frames = self._get_frames_with_retry(unwinder, required_funcs)
+            if frames and len(frames) >= expected_frames:
+                break
+            time.sleep(0.1)
+        client_socket.sendall(send_ack)
+        return frames
+
+    @skip_if_not_supported
+    @unittest.skipIf(
+        sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
+        "Test only runs on Linux with process_vm_readv support",
+    )
+    def test_cache_hit_same_stack(self):
+        """Test that consecutive samples reuse cached parent frame objects.
+
+        The current frame (index 0) is always re-read from memory to get
+        updated line numbers, so it may be a different object. Parent frames
+        (index 1+) should be identical objects from cache.
+        """
+        script_body = """\
+            def level3():
+                sock.sendall(b"sync1")
+                sock.recv(16)
+                sock.sendall(b"sync2")
+                sock.recv(16)
+                sock.sendall(b"sync3")
+                sock.recv(16)
+
+            def level2():
+                level3()
+
+            def level1():
+                level2()
+
+            level1()
+            """
+
+        with self._target_process(script_body) as (
+            p,
+            client_socket,
+            make_unwinder,
+        ):
+            unwinder = make_unwinder(cache_frames=True)
+            expected = {"level1", "level2", "level3"}
+
+            frames1 = self._sample_frames(
+                client_socket, unwinder, b"sync1", b"ack", expected
+            )
+            frames2 = self._sample_frames(
+                client_socket, unwinder, b"sync2", b"ack", expected
+            )
+            frames3 = self._sample_frames(
+                client_socket, unwinder, b"sync3", b"done", expected
+            )
+
+        self.assertIsNotNone(frames1)
+        self.assertIsNotNone(frames2)
+        self.assertIsNotNone(frames3)
+        self.assertEqual(len(frames1), len(frames2))
+        self.assertEqual(len(frames2), len(frames3))
+
+        # Current frame (index 0) is always re-read, so check value equality
+        self.assertEqual(frames1[0].funcname, frames2[0].funcname)
+        self.assertEqual(frames2[0].funcname, frames3[0].funcname)
+
+        # Parent frames (index 1+) must be identical objects (cache reuse)
+        for i in range(1, len(frames1)):
+            f1, f2, f3 = frames1[i], frames2[i], frames3[i]
+            self.assertIs(
+                f1, f2, f"Frame {i}: samples 1-2 must be same object"
+            )
+            self.assertIs(
+                f2, f3, f"Frame {i}: samples 2-3 must be same object"
+            )
+
+    @skip_if_not_supported
+    @unittest.skipIf(
+        sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
+        "Test only runs on Linux with process_vm_readv support",
+    )
+    def test_line_number_updates_in_same_frame(self):
+        """Test that line numbers are correctly updated when execution moves within a function.
+
+        When the profiler samples at different points within the same function,
+        it must report the correct line number for each sample, not stale cached values.
+        """
+        script_body = """\
+            def outer():
+                inner()
+
+            def inner():
+                sock.sendall(b"line_a"); sock.recv(16)
+                sock.sendall(b"line_b"); sock.recv(16)
+                sock.sendall(b"line_c"); sock.recv(16)
+                sock.sendall(b"line_d"); sock.recv(16)
+
+            outer()
+            """
+
+        with self._target_process(script_body) as (
+            p,
+            client_socket,
+            make_unwinder,
+        ):
+            unwinder = make_unwinder(cache_frames=True)
+
+            frames_a = self._sample_frames(
+                client_socket, unwinder, b"line_a", b"ack", {"inner"}
+            )
+            frames_b = self._sample_frames(
+                client_socket, unwinder, b"line_b", b"ack", {"inner"}
+            )
+            frames_c = self._sample_frames(
+                client_socket, unwinder, b"line_c", b"ack", {"inner"}
+            )
+            frames_d = self._sample_frames(
+                client_socket, unwinder, b"line_d", b"done", {"inner"}
+            )
+
+        self.assertIsNotNone(frames_a)
+        self.assertIsNotNone(frames_b)
+        self.assertIsNotNone(frames_c)
+        self.assertIsNotNone(frames_d)
+
+        # Get the 'inner' frame from each sample (should be index 0)
+        inner_a = frames_a[0]
+        inner_b = frames_b[0]
+        inner_c = frames_c[0]
+        inner_d = frames_d[0]
+
+        self.assertEqual(inner_a.funcname, "inner")
+        self.assertEqual(inner_b.funcname, "inner")
+        self.assertEqual(inner_c.funcname, "inner")
+        self.assertEqual(inner_d.funcname, "inner")
+
+        # Line numbers must be different and increasing (execution moves forward)
+        self.assertLess(
+            inner_a.lineno, inner_b.lineno, "Line B should be after line A"
+        )
+        self.assertLess(
+            inner_b.lineno, inner_c.lineno, "Line C should be after line B"
+        )
+        self.assertLess(
+            inner_c.lineno, inner_d.lineno, "Line D should be after line C"
+        )
+
+    @skip_if_not_supported
+    @unittest.skipIf(
+        sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
+        "Test only runs on Linux with process_vm_readv support",
+    )
+    def test_cache_invalidation_on_return(self):
+        """Test cache invalidation when stack shrinks (function returns)."""
+        script_body = """\
+            def inner():
+                sock.sendall(b"at_inner")
+                sock.recv(16)
+
+            def outer():
+                inner()
+                sock.sendall(b"at_outer")
+                sock.recv(16)
+
+            outer()
+            """
+
+        with self._target_process(script_body) as (
+            p,
+            client_socket,
+            make_unwinder,
+        ):
+            unwinder = make_unwinder(cache_frames=True)
+
+            frames_deep = self._sample_frames(
+                client_socket,
+                unwinder,
+                b"at_inner",
+                b"ack",
+                {"inner", "outer"},
+            )
+            frames_shallow = self._sample_frames(
+                client_socket, unwinder, b"at_outer", b"done", {"outer"}
+            )
+
+        self.assertIsNotNone(frames_deep)
+        self.assertIsNotNone(frames_shallow)
+
+        funcs_deep = [f.funcname for f in frames_deep]
+        funcs_shallow = [f.funcname for f in frames_shallow]
+
+        self.assertIn("inner", funcs_deep)
+        self.assertIn("outer", funcs_deep)
+        self.assertNotIn("inner", funcs_shallow)
+        self.assertIn("outer", funcs_shallow)
+
+    @skip_if_not_supported
+    @unittest.skipIf(
+        sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
+        "Test only runs on Linux with process_vm_readv support",
+    )
+    def test_cache_invalidation_on_call(self):
+        """Test cache invalidation when stack grows (new function called)."""
+        script_body = """\
+            def deeper():
+                sock.sendall(b"at_deeper")
+                sock.recv(16)
+
+            def middle():
+                sock.sendall(b"at_middle")
+                sock.recv(16)
+                deeper()
+
+            def top():
+                middle()
+
+            top()
+            """
+
+        with self._target_process(script_body) as (
+            p,
+            client_socket,
+            make_unwinder,
+        ):
+            unwinder = make_unwinder(cache_frames=True)
+
+            frames_before = self._sample_frames(
+                client_socket,
+                unwinder,
+                b"at_middle",
+                b"ack",
+                {"middle", "top"},
+            )
+            frames_after = self._sample_frames(
+                client_socket,
+                unwinder,
+                b"at_deeper",
+                b"done",
+                {"deeper", "middle", "top"},
+            )
+
+        self.assertIsNotNone(frames_before)
+        self.assertIsNotNone(frames_after)
+
+        funcs_before = [f.funcname for f in frames_before]
+        funcs_after = [f.funcname for f in frames_after]
+
+        self.assertIn("middle", funcs_before)
+        self.assertIn("top", funcs_before)
+        self.assertNotIn("deeper", funcs_before)
+
+        self.assertIn("deeper", funcs_after)
+        self.assertIn("middle", funcs_after)
+        self.assertIn("top", funcs_after)
+
+    @skip_if_not_supported
+    @unittest.skipIf(
+        sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
+        "Test only runs on Linux with process_vm_readv support",
+    )
+    def test_partial_stack_reuse(self):
+        """Test that unchanged bottom frames are reused when top changes (A→B→C to A→B→D)."""
+        script_body = """\
+            def func_c():
+                sock.sendall(b"at_c")
+                sock.recv(16)
+
+            def func_d():
+                sock.sendall(b"at_d")
+                sock.recv(16)
+
+            def func_b():
+                func_c()
+                func_d()
+
+            def func_a():
+                func_b()
+
+            func_a()
+            """
+
+        with self._target_process(script_body) as (
+            p,
+            client_socket,
+            make_unwinder,
+        ):
+            unwinder = make_unwinder(cache_frames=True)
+
+            # Sample at C: stack is A→B→C
+            frames_c = self._sample_frames(
+                client_socket,
+                unwinder,
+                b"at_c",
+                b"ack",
+                {"func_a", "func_b", "func_c"},
+            )
+            # Sample at D: stack is A→B→D (C returned, D called)
+            frames_d = self._sample_frames(
+                client_socket,
+                unwinder,
+                b"at_d",
+                b"done",
+                {"func_a", "func_b", "func_d"},
+            )
+
+        self.assertIsNotNone(frames_c)
+        self.assertIsNotNone(frames_d)
+
+        # Find func_a and func_b frames in both samples
+        def find_frame(frames, funcname):
+            for f in frames:
+                if f.funcname == funcname:
+                    return f
+            return None
+
+        frame_a_in_c = find_frame(frames_c, "func_a")
+        frame_b_in_c = find_frame(frames_c, "func_b")
+        frame_a_in_d = find_frame(frames_d, "func_a")
+        frame_b_in_d = find_frame(frames_d, "func_b")
+
+        self.assertIsNotNone(frame_a_in_c)
+        self.assertIsNotNone(frame_b_in_c)
+        self.assertIsNotNone(frame_a_in_d)
+        self.assertIsNotNone(frame_b_in_d)
+
+        # The bottom frames (A, B) should be the SAME objects (cache reuse)
+        self.assertIs(
+            frame_a_in_c,
+            frame_a_in_d,
+            "func_a frame should be reused from cache",
+        )
+        self.assertIs(
+            frame_b_in_c,
+            frame_b_in_d,
+            "func_b frame should be reused from cache",
+        )
+
+    @skip_if_not_supported
+    @unittest.skipIf(
+        sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
+        "Test only runs on Linux with process_vm_readv support",
+    )
+    def test_recursive_frames(self):
+        """Test caching with same function appearing multiple times (recursion)."""
+        script_body = """\
+            def recurse(n):
+                if n <= 0:
+                    sock.sendall(b"sync1")
+                    sock.recv(16)
+                    sock.sendall(b"sync2")
+                    sock.recv(16)
+                else:
+                    recurse(n - 1)
+
+            recurse(5)
+            """
+
+        with self._target_process(script_body) as (
+            p,
+            client_socket,
+            make_unwinder,
+        ):
+            unwinder = make_unwinder(cache_frames=True)
+
+            frames1 = self._sample_frames(
+                client_socket, unwinder, b"sync1", b"ack", {"recurse"}
+            )
+            frames2 = self._sample_frames(
+                client_socket, unwinder, b"sync2", b"done", {"recurse"}
+            )
+
+        self.assertIsNotNone(frames1)
+        self.assertIsNotNone(frames2)
+
+        # Should have multiple "recurse" frames (6 total: recurse(5) down to recurse(0))
+        recurse_count = sum(1 for f in frames1 if f.funcname == "recurse")
+        self.assertEqual(recurse_count, 6, "Should have 6 recursive frames")
+
+        self.assertEqual(len(frames1), len(frames2))
+
+        # Current frame (index 0) is re-read, check value equality
+        self.assertEqual(frames1[0].funcname, frames2[0].funcname)
+
+        # Parent frames (index 1+) should be identical objects (cache reuse)
+        for i in range(1, len(frames1)):
+            self.assertIs(
+                frames1[i],
+                frames2[i],
+                f"Frame {i}: recursive frames must be same object",
+            )
+
+    @skip_if_not_supported
+    @unittest.skipIf(
+        sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
+        "Test only runs on Linux with process_vm_readv support",
+    )
+    def test_cache_vs_no_cache_equivalence(self):
+        """Test that cache_frames=True and cache_frames=False produce equivalent results."""
+        script_body = """\
+            def level3():
+                sock.sendall(b"ready"); sock.recv(16)
+
+            def level2():
+                level3()
+
+            def level1():
+                level2()
+
+            level1()
+            """
+
+        with self._target_process(script_body) as (
+            p,
+            client_socket,
+            make_unwinder,
+        ):
+            _wait_for_signal(client_socket, b"ready")
+
+            # Sample with cache
+            unwinder_cache = make_unwinder(cache_frames=True)
+            frames_cached = self._get_frames_with_retry(
+                unwinder_cache, {"level1", "level2", "level3"}
+            )
+
+            # Sample without cache
+            unwinder_no_cache = make_unwinder(cache_frames=False)
+            frames_no_cache = self._get_frames_with_retry(
+                unwinder_no_cache, {"level1", "level2", "level3"}
+            )
+
+            client_socket.sendall(b"done")
+
+        self.assertIsNotNone(frames_cached)
+        self.assertIsNotNone(frames_no_cache)
+
+        # Same number of frames
+        self.assertEqual(len(frames_cached), len(frames_no_cache))
+
+        # Same function names in same order
+        funcs_cached = [f.funcname for f in frames_cached]
+        funcs_no_cache = [f.funcname for f in frames_no_cache]
+        self.assertEqual(funcs_cached, funcs_no_cache)
+
+        # Same line numbers
+        lines_cached = [f.lineno for f in frames_cached]
+        lines_no_cache = [f.lineno for f in frames_no_cache]
+        self.assertEqual(lines_cached, lines_no_cache)
+
+    @skip_if_not_supported
+    @unittest.skipIf(
+        sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
+        "Test only runs on Linux with process_vm_readv support",
+    )
+    def test_cache_per_thread_isolation(self):
+        """Test that frame cache is per-thread and cache invalidation works independently."""
+        script_body = """\
+            import threading
+
+            lock = threading.Lock()
+
+            def sync(msg):
+                with lock:
+                    sock.sendall(msg + b"\\n")
+                    sock.recv(1)
+
+            # Thread 1 functions
+            def baz1():
+                sync(b"t1:baz1")
+
+            def bar1():
+                baz1()
+
+            def blech1():
+                sync(b"t1:blech1")
+
+            def foo1():
+                bar1()  # Goes down to baz1, syncs
+                blech1()  # Returns up, goes down to blech1, syncs
+
+            # Thread 2 functions
+            def baz2():
+                sync(b"t2:baz2")
+
+            def bar2():
+                baz2()
+
+            def blech2():
+                sync(b"t2:blech2")
+
+            def foo2():
+                bar2()  # Goes down to baz2, syncs
+                blech2()  # Returns up, goes down to blech2, syncs
+
+            t1 = threading.Thread(target=foo1)
+            t2 = threading.Thread(target=foo2)
+            t1.start()
+            t2.start()
+            t1.join()
+            t2.join()
+            """
+
+        with self._target_process(script_body) as (
+            p,
+            client_socket,
+            make_unwinder,
+        ):
+            unwinder = make_unwinder(cache_frames=True)
+
+            # Message dispatch table: signal -> required functions for that thread
+            dispatch = {
+                b"t1:baz1": {"baz1", "bar1", "foo1"},
+                b"t2:baz2": {"baz2", "bar2", "foo2"},
+                b"t1:blech1": {"blech1", "foo1"},
+                b"t2:blech2": {"blech2", "foo2"},
+            }
+
+            # Track results for each sync point
+            results = {}
+
+            # Process 4 sync points (order depends on thread scheduling)
+            buffer = _wait_for_signal(client_socket, b"\n")
+            for i in range(4):
+                # Extract first message from buffer
+                msg, sep, buffer = buffer.partition(b"\n")
+                self.assertIn(msg, dispatch, f"Unexpected message: {msg!r}")
+
+                # Sample frames for the thread at this sync point
+                required_funcs = dispatch[msg]
+                frames = self._get_frames_with_retry(unwinder, required_funcs)
+                self.assertIsNotNone(frames, f"Thread not found for {msg!r}")
+                results[msg] = [f.funcname for f in frames]
+
+                # Release thread and wait for next message (if not last)
+                client_socket.sendall(b"k")
+                if i < 3:
+                    buffer += _wait_for_signal(client_socket, b"\n")
+
+            # Validate Phase 1: baz snapshots
+            t1_baz = results.get(b"t1:baz1")
+            t2_baz = results.get(b"t2:baz2")
+            self.assertIsNotNone(t1_baz, "Missing t1:baz1 snapshot")
+            self.assertIsNotNone(t2_baz, "Missing t2:baz2 snapshot")
+
+            # Thread 1 at baz1: should have foo1->bar1->baz1
+            self.assertIn("baz1", t1_baz)
+            self.assertIn("bar1", t1_baz)
+            self.assertIn("foo1", t1_baz)
+            self.assertNotIn("blech1", t1_baz)
+            # No cross-contamination
+            self.assertNotIn("baz2", t1_baz)
+            self.assertNotIn("bar2", t1_baz)
+            self.assertNotIn("foo2", t1_baz)
+
+            # Thread 2 at baz2: should have foo2->bar2->baz2
+            self.assertIn("baz2", t2_baz)
+            self.assertIn("bar2", t2_baz)
+            self.assertIn("foo2", t2_baz)
+            self.assertNotIn("blech2", t2_baz)
+            # No cross-contamination
+            self.assertNotIn("baz1", t2_baz)
+            self.assertNotIn("bar1", t2_baz)
+            self.assertNotIn("foo1", t2_baz)
+
+            # Validate Phase 2: blech snapshots (cache invalidation test)
+            t1_blech = results.get(b"t1:blech1")
+            t2_blech = results.get(b"t2:blech2")
+            self.assertIsNotNone(t1_blech, "Missing t1:blech1 snapshot")
+            self.assertIsNotNone(t2_blech, "Missing t2:blech2 snapshot")
+
+            # Thread 1 at blech1: bar1/baz1 should be GONE (cache invalidated)
+            self.assertIn("blech1", t1_blech)
+            self.assertIn("foo1", t1_blech)
+            self.assertNotIn(
+                "bar1", t1_blech, "Cache not invalidated: bar1 still present"
+            )
+            self.assertNotIn(
+                "baz1", t1_blech, "Cache not invalidated: baz1 still present"
+            )
+            # No cross-contamination
+            self.assertNotIn("blech2", t1_blech)
+
+            # Thread 2 at blech2: bar2/baz2 should be GONE (cache invalidated)
+            self.assertIn("blech2", t2_blech)
+            self.assertIn("foo2", t2_blech)
+            self.assertNotIn(
+                "bar2", t2_blech, "Cache not invalidated: bar2 still present"
+            )
+            self.assertNotIn(
+                "baz2", t2_blech, "Cache not invalidated: baz2 still present"
+            )
+            # No cross-contamination
+            self.assertNotIn("blech1", t2_blech)
+
+    @skip_if_not_supported
+    @unittest.skipIf(
+        sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
+        "Test only runs on Linux with process_vm_readv support",
+    )
+    def test_new_unwinder_with_stale_last_profiled_frame(self):
+        """Test that a new unwinder returns complete stack when cache lookup misses."""
+        script_body = """\
+            def level4():
+                sock.sendall(b"sync1")
+                sock.recv(16)
+                sock.sendall(b"sync2")
+                sock.recv(16)
+
+            def level3():
+                level4()
+
+            def level2():
+                level3()
+
+            def level1():
+                level2()
+
+            level1()
+            """
+
+        with self._target_process(script_body) as (
+            p,
+            client_socket,
+            make_unwinder,
+        ):
+            expected = {"level1", "level2", "level3", "level4"}
+
+            # First unwinder samples - this sets last_profiled_frame in target
+            unwinder1 = make_unwinder(cache_frames=True)
+            frames1 = self._sample_frames(
+                client_socket, unwinder1, b"sync1", b"ack", expected
+            )
+
+            # Create NEW unwinder (empty cache) and sample
+            # The target still has last_profiled_frame set from unwinder1
+            unwinder2 = make_unwinder(cache_frames=True)
+            frames2 = self._sample_frames(
+                client_socket, unwinder2, b"sync2", b"done", expected
+            )
+
+        self.assertIsNotNone(frames1)
+        self.assertIsNotNone(frames2)
+
+        funcs1 = [f.funcname for f in frames1]
+        funcs2 = [f.funcname for f in frames2]
+
+        # Both should have all levels
+        for level in ["level1", "level2", "level3", "level4"]:
+            self.assertIn(level, funcs1, f"{level} missing from first sample")
+            self.assertIn(level, funcs2, f"{level} missing from second sample")
+
+        # Should have same stack depth
+        self.assertEqual(
+            len(frames1),
+            len(frames2),
+            "New unwinder should return complete stack despite stale last_profiled_frame",
+        )
+
+    @skip_if_not_supported
+    @unittest.skipIf(
+        sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
+        "Test only runs on Linux with process_vm_readv support",
+    )
+    def test_cache_exhaustion(self):
+        """Test cache works when frame limit (1024) is exceeded.
+
+        FRAME_CACHE_MAX_FRAMES=1024. With 1100 recursive frames,
+        the cache can't store all of them but should still work.
+        """
+        # Use 1100 to exceed FRAME_CACHE_MAX_FRAMES=1024
+        depth = 1100
+        script_body = f"""\
+import sys
+sys.setrecursionlimit(2000)
+
+def recurse(n):
+    if n <= 0:
+        sock.sendall(b"ready")
+        sock.recv(16)  # wait for ack
+        sock.sendall(b"ready2")
+        sock.recv(16)  # wait for done
+        return
+    recurse(n - 1)
+
+recurse({depth})
+"""
+
+        with self._target_process(script_body) as (
+            p,
+            client_socket,
+            make_unwinder,
+        ):
+            unwinder_cache = make_unwinder(cache_frames=True)
+            unwinder_no_cache = make_unwinder(cache_frames=False)
+
+            frames_cached = self._sample_frames(
+                client_socket,
+                unwinder_cache,
+                b"ready",
+                b"ack",
+                {"recurse"},
+                expected_frames=1102,
+            )
+            # Sample again with no cache for comparison
+            frames_no_cache = self._sample_frames(
+                client_socket,
+                unwinder_no_cache,
+                b"ready2",
+                b"done",
+                {"recurse"},
+                expected_frames=1102,
+            )
+
+        self.assertIsNotNone(frames_cached)
+        self.assertIsNotNone(frames_no_cache)
+
+        # Both should have many recurse frames (> 1024 limit)
+        cached_count = [f.funcname for f in frames_cached].count("recurse")
+        no_cache_count = [f.funcname for f in frames_no_cache].count("recurse")
+
+        self.assertGreater(
+            cached_count, 1000, "Should have >1000 recurse frames"
+        )
+        self.assertGreater(
+            no_cache_count, 1000, "Should have >1000 recurse frames"
+        )
+
+        # Both modes should produce same frame count
+        self.assertEqual(
+            len(frames_cached),
+            len(frames_no_cache),
+            "Cache exhaustion should not affect stack completeness",
+        )
+
+    @skip_if_not_supported
+    @unittest.skipIf(
+        sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
+        "Test only runs on Linux with process_vm_readv support",
+    )
+    def test_get_stats(self):
+        """Test that get_stats() returns statistics when stats=True."""
+        script_body = """\
+            sock.sendall(b"ready")
+            sock.recv(16)
+            """
+
+        with self._target_process(script_body) as (p, client_socket, _):
+            unwinder = RemoteUnwinder(p.pid, all_threads=True, stats=True)
+            _wait_for_signal(client_socket, b"ready")
+
+            # Take a sample
+            unwinder.get_stack_trace()
+
+            stats = unwinder.get_stats()
+            client_socket.sendall(b"done")
+
+        # Verify expected keys exist
+        expected_keys = [
+            "total_samples",
+            "frame_cache_hits",
+            "frame_cache_misses",
+            "frame_cache_partial_hits",
+            "frames_read_from_cache",
+            "frames_read_from_memory",
+            "frame_cache_hit_rate",
+        ]
+        for key in expected_keys:
+            self.assertIn(key, stats)
+
+        self.assertEqual(stats["total_samples"], 1)
+
+    @skip_if_not_supported
+    @unittest.skipIf(
+        sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
+        "Test only runs on Linux with process_vm_readv support",
+    )
+    def test_get_stats_disabled_raises(self):
+        """Test that get_stats() raises RuntimeError when stats=False."""
+        script_body = """\
+            sock.sendall(b"ready")
+            sock.recv(16)
+            """
+
+        with self._target_process(script_body) as (p, client_socket, _):
+            unwinder = RemoteUnwinder(
+                p.pid, all_threads=True
+            )  # stats=False by default
+            _wait_for_signal(client_socket, b"ready")
+
+            with self.assertRaises(RuntimeError):
+                unwinder.get_stats()
+
+            client_socket.sendall(b"done")
 
 
 if __name__ == "__main__":
diff --git a/Lib/test/test_free_threading/test_capi.py b/Lib/test/test_free_threading/test_capi.py
new file mode 100644
index 00000000000..146d7cfc97a
--- /dev/null
+++ b/Lib/test/test_free_threading/test_capi.py
@@ -0,0 +1,47 @@
+import ctypes
+import sys
+import unittest
+
+from test.support import threading_helper
+from test.support.threading_helper import run_concurrently
+
+
+_PyImport_AddModuleRef = ctypes.pythonapi.PyImport_AddModuleRef
+_PyImport_AddModuleRef.argtypes = (ctypes.c_char_p,)
+_PyImport_AddModuleRef.restype = ctypes.py_object
+
+
+@threading_helper.requires_working_threading()
+class TestImportCAPI(unittest.TestCase):
+    def test_pyimport_addmoduleref_thread_safe(self):
+        # gh-137422: Concurrent calls to PyImport_AddModuleRef with the same
+        # module name must return the same module object.
+
+        NUM_ITERS = 10
+        NTHREADS = 4
+
+        module_name = f"test_free_threading_addmoduleref_{id(self)}"
+        module_name_bytes = module_name.encode()
+        sys.modules.pop(module_name, None)
+        results = []
+
+        def worker():
+            module = _PyImport_AddModuleRef(module_name_bytes)
+            results.append(module)
+
+        for _ in range(NUM_ITERS):
+            try:
+                run_concurrently(worker_func=worker, nthreads=NTHREADS)
+                self.assertEqual(len(results), NTHREADS)
+                reference = results[0]
+                for module in results[1:]:
+                    self.assertIs(module, reference)
+                self.assertIn(module_name, sys.modules)
+                self.assertIs(sys.modules[module_name], reference)
+            finally:
+                results.clear()
+                sys.modules.pop(module_name, None)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/Lib/test/test_free_threading/test_csv.py b/Lib/test/test_free_threading/test_csv.py
new file mode 100644
index 00000000000..beb4510a128
--- /dev/null
+++ b/Lib/test/test_free_threading/test_csv.py
@@ -0,0 +1,50 @@
+import csv
+import io
+import unittest
+
+from test.support import threading_helper
+from test.support.threading_helper import run_concurrently
+
+
+NTHREADS = 10
+
+
+@threading_helper.requires_working_threading()
+class TestCSV(unittest.TestCase):
+    def test_concurrent_reader_next(self):
+        input_rows = [f"{i},{i},{i}" for i in range(50)]
+        input_stream = io.StringIO("\n".join(input_rows))
+        reader = csv.reader(input_stream)
+        output_rows = []
+
+        def read_row():
+            for row in reader:
+                self.assertEqual(len(row), 3)
+                output_rows.append(",".join(row))
+
+        run_concurrently(worker_func=read_row, nthreads=NTHREADS)
+        self.assertSetEqual(set(input_rows), set(output_rows))
+
+    def test_concurrent_writer_writerow(self):
+        output_stream = io.StringIO()
+        writer = csv.writer(output_stream)
+        row_per_thread = 10
+        expected_rows = []
+
+        def write_row():
+            for i in range(row_per_thread):
+                writer.writerow([i, i, i])
+                expected_rows.append(f"{i},{i},{i}")
+
+        run_concurrently(worker_func=write_row, nthreads=NTHREADS)
+
+        # Rewind to the start of the stream and parse the rows
+        output_stream.seek(0)
+        output_rows = [line.strip() for line in output_stream.readlines()]
+
+        self.assertEqual(len(output_rows), NTHREADS * row_per_thread)
+        self.assertListEqual(sorted(output_rows), sorted(expected_rows))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/Lib/test/test_free_threading/test_monitoring.py b/Lib/test/test_free_threading/test_monitoring.py
index 407bf7cbdee..2cd6e7b035e 100644
--- a/Lib/test/test_free_threading/test_monitoring.py
+++ b/Lib/test/test_free_threading/test_monitoring.py
@@ -35,10 +35,10 @@ def work(self, n, funcs):
             return n
         return self.work(n - 1, funcs) + self.work(n - 2, funcs)
 
-    def start_work(self, n, funcs):
+    def start_work(self, n, funcs, barrier):
         # With the GIL builds we need to make sure that the hooks have
         # a chance to run as it's possible to run w/o releasing the GIL.
-        time.sleep(0.1)
+        barrier.wait()
         self.work(n, funcs)
 
     def after_test(self):
@@ -53,14 +53,16 @@ def test_instrumentation(self):
             exec("def f(): pass", x)
             funcs.append(x["f"])
 
+        barrier = Barrier(self.thread_count + 1)
         threads = []
         for i in range(self.thread_count):
             # Each thread gets a copy of the func list to avoid contention
-            t = Thread(target=self.start_work, args=(self.fib, list(funcs)))
+            t = Thread(target=self.start_work, args=(self.fib, list(funcs), barrier))
             t.start()
             threads.append(t)
 
         self.after_threads()
+        barrier.wait()
 
         while True:
             any_alive = False
@@ -73,6 +75,9 @@ def test_instrumentation(self):
                 break
 
             self.during_threads()
+            # Sleep to avoid setting monitoring events too rapidly and
+            # overflowing the global version counter
+            time.sleep(0.0001)
 
         self.after_test()
 
@@ -117,7 +122,6 @@ class MonitoringMultiThreaded(
     def setUp(self):
         super().setUp()
         self.set = False
-        self.called = False
         monitoring.register_callback(
             self.tool_id, monitoring.events.LINE, self.callback
         )
@@ -127,10 +131,7 @@ def tearDown(self):
         super().tearDown()
 
     def callback(self, *args):
-        self.called = True
-
-    def after_test(self):
-        self.assertTrue(self.called)
+        pass
 
     def during_threads(self):
         if self.set:
@@ -148,16 +149,11 @@ class SetTraceMultiThreaded(InstrumentationMultiThreadedMixin, TestCase):
 
     def setUp(self):
         self.set = False
-        self.called = False
-
-    def after_test(self):
-        self.assertTrue(self.called)
 
     def tearDown(self):
         sys.settrace(None)
 
     def trace_func(self, frame, event, arg):
-        self.called = True
         return self.trace_func
 
     def during_threads(self):
@@ -174,16 +170,11 @@ class SetProfileMultiThreaded(InstrumentationMultiThreadedMixin, TestCase):
 
     def setUp(self):
         self.set = False
-        self.called = False
-
-    def after_test(self):
-        self.assertTrue(self.called)
 
     def tearDown(self):
         sys.setprofile(None)
 
     def trace_func(self, frame, event, arg):
-        self.called = True
         return self.trace_func
 
     def during_threads(self):
@@ -200,16 +191,11 @@ class SetProfileAllThreadsMultiThreaded(InstrumentationMultiThreadedMixin, TestC
 
     def setUp(self):
         self.set = False
-        self.called = False
-
-    def after_test(self):
-        self.assertTrue(self.called)
 
     def tearDown(self):
         threading.setprofile_all_threads(None)
 
     def trace_func(self, frame, event, arg):
-        self.called = True
         return self.trace_func
 
     def during_threads(self):
diff --git a/Lib/test/test_free_threading/test_re.py b/Lib/test/test_free_threading/test_re.py
new file mode 100644
index 00000000000..56f25045d1b
--- /dev/null
+++ b/Lib/test/test_free_threading/test_re.py
@@ -0,0 +1,62 @@
+import re
+import unittest
+
+from test.support import threading_helper
+from test.support.threading_helper import run_concurrently
+
+
+NTHREADS = 10
+
+
+@threading_helper.requires_working_threading()
+class TestRe(unittest.TestCase):
+    def test_pattern_sub(self):
+        """Pattern substitution should work across threads"""
+        pattern = re.compile(r"\w+@\w+\.\w+")
+        text = "e-mail: test@python.org or user@pycon.org. " * 5
+        results = []
+
+        def worker():
+            substituted = pattern.sub("(redacted)", text)
+            results.append(substituted.count("(redacted)"))
+
+        run_concurrently(worker_func=worker, nthreads=NTHREADS)
+        self.assertEqual(results, [2 * 5] * NTHREADS)
+
+    def test_pattern_search(self):
+        """Pattern search should work across threads."""
+        emails = ["alice@python.org", "bob@pycon.org"] * 10
+        pattern = re.compile(r"\w+@\w+\.\w+")
+        results = []
+
+        def worker():
+            matches = [pattern.search(e).group() for e in emails]
+            results.append(len(matches))
+
+        run_concurrently(worker_func=worker, nthreads=NTHREADS)
+        self.assertEqual(results, [2 * 10] * NTHREADS)
+
+    def test_scanner_concurrent_access(self):
+        """Shared scanner should reject concurrent access."""
+        pattern = re.compile(r"\w+")
+        scanner = pattern.scanner("word " * 10)
+
+        def worker():
+            for _ in range(100):
+                try:
+                    scanner.search()
+                except ValueError as e:
+                    if "already executing" in str(e):
+                        pass
+                    else:
+                        raise
+
+        run_concurrently(worker_func=worker, nthreads=NTHREADS)
+        # This test has no assertions. Its purpose is to catch crashes and
+        # enable thread sanitizer to detect race conditions. While "already
+        # executing" errors are very likely, they're not guaranteed due to
+        # non-deterministic thread scheduling, so we can't assert errors > 0.
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py
index 10c3a622107..ec5df4d20e7 100644
--- a/Lib/test/test_gc.py
+++ b/Lib/test/test_gc.py
@@ -846,11 +846,15 @@ def test_get_stats(self):
         self.assertEqual(len(stats), 3)
         for st in stats:
             self.assertIsInstance(st, dict)
-            self.assertEqual(set(st),
-                             {"collected", "collections", "uncollectable"})
+            self.assertEqual(
+                set(st),
+                {"collected", "collections", "uncollectable", "candidates", "duration"}
+            )
             self.assertGreaterEqual(st["collected"], 0)
             self.assertGreaterEqual(st["collections"], 0)
             self.assertGreaterEqual(st["uncollectable"], 0)
+            self.assertGreaterEqual(st["candidates"], 0)
+            self.assertGreaterEqual(st["duration"], 0)
         # Check that collection counts are incremented correctly
         if gc.isenabled():
             self.addCleanup(gc.enable)
@@ -861,11 +865,25 @@ def test_get_stats(self):
         self.assertEqual(new[0]["collections"], old[0]["collections"] + 1)
         self.assertEqual(new[1]["collections"], old[1]["collections"])
         self.assertEqual(new[2]["collections"], old[2]["collections"])
+        self.assertGreater(new[0]["duration"], old[0]["duration"])
+        self.assertEqual(new[1]["duration"], old[1]["duration"])
+        self.assertEqual(new[2]["duration"], old[2]["duration"])
+        for stat in ["collected", "uncollectable", "candidates"]:
+            self.assertGreaterEqual(new[0][stat], old[0][stat])
+            self.assertEqual(new[1][stat], old[1][stat])
+            self.assertEqual(new[2][stat], old[2][stat])
         gc.collect(2)
-        new = gc.get_stats()
-        self.assertEqual(new[0]["collections"], old[0]["collections"] + 1)
+        old, new = new, gc.get_stats()
+        self.assertEqual(new[0]["collections"], old[0]["collections"])
         self.assertEqual(new[1]["collections"], old[1]["collections"])
         self.assertEqual(new[2]["collections"], old[2]["collections"] + 1)
+        self.assertEqual(new[0]["duration"], old[0]["duration"])
+        self.assertEqual(new[1]["duration"], old[1]["duration"])
+        self.assertGreater(new[2]["duration"], old[2]["duration"])
+        for stat in ["collected", "uncollectable", "candidates"]:
+            self.assertEqual(new[0][stat], old[0][stat])
+            self.assertEqual(new[1][stat], old[1][stat])
+            self.assertGreaterEqual(new[2][stat], old[2][stat])
 
     def test_freeze(self):
         gc.freeze()
@@ -1298,9 +1316,11 @@ def test_collect(self):
         # Check that we got the right info dict for all callbacks
         for v in self.visit:
             info = v[2]
-            self.assertTrue("generation" in info)
-            self.assertTrue("collected" in info)
-            self.assertTrue("uncollectable" in info)
+            self.assertIn("generation", info)
+            self.assertIn("collected", info)
+            self.assertIn("uncollectable", info)
+            self.assertIn("candidates", info)
+            self.assertIn("duration", info)
 
     def test_collect_generation(self):
         self.preclean()
diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py
index 09ce329bdcd..de0dbab480f 100644
--- a/Lib/test/test_generated_cases.py
+++ b/Lib/test/test_generated_cases.py
@@ -165,7 +165,7 @@ def test_inst_one_pop(self):
             value = stack_pointer[-1];
             SPAM(value);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
     """
@@ -190,7 +190,7 @@ def test_inst_one_push(self):
             res = SPAM();
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
     """
@@ -247,7 +247,7 @@ def test_binary_op(self):
             res = SPAM(left, right);
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
     """
@@ -366,14 +366,14 @@ def test_sync_sp(self):
             _PyStackRef res;
             arg = stack_pointer[-1];
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             escaping_call();
             stack_pointer = _PyFrame_GetStackPointer(frame);
             res = Py_None;
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -489,7 +489,7 @@ def test_error_if_pop(self):
             res = 0;
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
     """
@@ -523,7 +523,7 @@ def test_error_if_pop_with_result(self):
             }
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
     """
@@ -553,7 +553,7 @@ def test_cache_effect(self):
             uint32_t extra = read_u32(&this_instr[2].cache);
             (void)extra;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
     """
@@ -640,7 +640,7 @@ def test_macro_instruction(self):
             }
             stack_pointer[-3] = res;
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -688,7 +688,7 @@ def test_macro_instruction(self):
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer[-3] = res;
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
     """
@@ -827,7 +827,7 @@ def test_array_input(self):
             below = stack_pointer[-2 - oparg*2];
             SPAM(values, oparg);
             stack_pointer += -2 - oparg*2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
     """
@@ -860,7 +860,7 @@ def test_array_output(self):
             stack_pointer[-2] = below;
             stack_pointer[-1 + oparg*3] = above;
             stack_pointer += oparg*3;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
     """
@@ -889,7 +889,7 @@ def test_array_input_output(self):
             above = 0;
             stack_pointer[0] = above;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
     """
@@ -918,11 +918,11 @@ def test_array_error_if(self):
             extra = stack_pointer[-1 - oparg];
             if (oparg == 0) {
                 stack_pointer += -1 - oparg;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 JUMP_TO_LABEL(error);
             }
             stack_pointer += -1 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
     """
@@ -960,7 +960,7 @@ def test_macro_push_push(self):
             stack_pointer[0] = val1;
             stack_pointer[1] = val2;
             stack_pointer += 2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
         """
@@ -1263,13 +1263,13 @@ def test_flush(self):
             stack_pointer[0] = a;
             stack_pointer[1] = b;
             stack_pointer += 2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             // SECOND
             {
                 USE(a, b);
             }
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
         """
@@ -1325,7 +1325,7 @@ def test_pop_on_error_peeks(self):
                 }
             }
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
         """
@@ -1368,14 +1368,14 @@ def test_push_then_error(self):
                     stack_pointer[0] = a;
                     stack_pointer[1] = b;
                     stack_pointer += 2;
-                    assert(WITHIN_STACK_BOUNDS());
+                    ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                     JUMP_TO_LABEL(error);
                 }
             }
             stack_pointer[0] = a;
             stack_pointer[1] = b;
             stack_pointer += 2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
         """
@@ -1661,7 +1661,7 @@ def test_pystackref_frompyobject_new_next_to_cmacro(self):
             stack_pointer[0] = out1;
             stack_pointer[1] = out2;
             stack_pointer += 2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
         """
@@ -1881,7 +1881,7 @@ def test_reassigning_dead_inputs(self):
             stack_pointer = _PyFrame_GetStackPointer(frame);
             in = temp;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(in);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -2115,8 +2115,9 @@ def test_validate_uop_unused_input(self):
         """
         output = """
         case OP: {
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
         """
@@ -2132,8 +2133,9 @@ def test_validate_uop_unused_input(self):
         """
         output = """
         case OP: {
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
         """
@@ -2153,9 +2155,10 @@ def test_validate_uop_unused_output(self):
         case OP: {
             JitOptRef foo;
             foo = NULL;
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = foo;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
         """
@@ -2172,8 +2175,9 @@ def test_validate_uop_unused_output(self):
         """
         output = """
         case OP: {
+            CHECK_STACK_BOUNDS(1);
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
         """
diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py
index 33845d8a9e2..489bb049d2f 100644
--- a/Lib/test/test_hashlib.py
+++ b/Lib/test/test_hashlib.py
@@ -40,12 +40,15 @@
 openssl_hashlib = import_fresh_module('hashlib', fresh=['_hashlib'])
 
 try:
-    from _hashlib import HASH, HASHXOF, openssl_md_meth_names, get_fips_mode
+    import _hashlib
 except ImportError:
-    HASH = None
-    HASHXOF = None
-    openssl_md_meth_names = frozenset()
-
+    _hashlib = None
+# The extension module may exist but only define some of these. gh-141907
+HASH = getattr(_hashlib, 'HASH', None)
+HASHXOF = getattr(_hashlib, 'HASHXOF', None)
+openssl_md_meth_names = getattr(_hashlib, 'openssl_md_meth_names', frozenset())
+get_fips_mode = getattr(_hashlib, 'get_fips_mode', None)
+if not get_fips_mode:
     def get_fips_mode():
         return 0
 
@@ -631,9 +634,14 @@ def check_sha3(self, name, capacity, rate, suffix):
         constructors = self.constructors_to_test[name]
         for hash_object_constructor in constructors:
             m = hash_object_constructor()
-            if HASH is not None and isinstance(m, HASH):
-                # _hashopenssl's variant does not have extra SHA3 attributes
-                continue
+            if name.startswith('shake_'):
+                if HASHXOF is not None and isinstance(m, HASHXOF):
+                    # _hashopenssl's variant does not have extra SHA3 attributes
+                    continue
+            else:
+                if HASH is not None and isinstance(m, HASH):
+                    # _hashopenssl's variant does not have extra SHA3 attributes
+                    continue
             self.assertEqual(capacity + rate, 1600)
             self.assertEqual(m._capacity_bits, capacity)
             self.assertEqual(m._rate_bits, rate)
@@ -1156,7 +1164,8 @@ def test_disallow_instantiation(self):
     def test_hash_disallow_instantiation(self):
         # internal types like _hashlib.HASH are not constructable
         support.check_disallow_instantiation(self, HASH)
-        support.check_disallow_instantiation(self, HASHXOF)
+        if HASHXOF is not None:
+            support.check_disallow_instantiation(self, HASHXOF)
 
     def test_readonly_types(self):
         for algorithm, constructors in self.constructors_to_test.items():
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py
index 19dde9362a4..e4eff1ea17a 100644
--- a/Lib/test/test_htmlparser.py
+++ b/Lib/test/test_htmlparser.py
@@ -109,12 +109,13 @@ def get_events(self):
 
 class TestCaseBase(unittest.TestCase):
 
-    def get_collector(self):
-        return EventCollector(convert_charrefs=False)
+    def get_collector(self, convert_charrefs=False):
+        return EventCollector(convert_charrefs=convert_charrefs)
 
-    def _run_check(self, source, expected_events, collector=None):
+    def _run_check(self, source, expected_events,
+                   *, collector=None, convert_charrefs=False):
         if collector is None:
-            collector = self.get_collector()
+            collector = self.get_collector(convert_charrefs=convert_charrefs)
         parser = collector
         for s in source:
             parser.feed(s)
@@ -128,7 +129,7 @@ def _run_check(self, source, expected_events, collector=None):
 
     def _run_check_extra(self, source, events):
         self._run_check(source, events,
-                        EventCollectorExtra(convert_charrefs=False))
+            collector=EventCollectorExtra(convert_charrefs=False))
 
 
 class HTMLParserTestCase(TestCaseBase):
@@ -187,10 +188,87 @@ def test_malformatted_charref(self):
         ])
 
     def test_unclosed_entityref(self):
-        self._run_check("&entityref foo", [
-            ("entityref", "entityref"),
-            ("data", " foo"),
-            ])
+        self._run_check('&gt &lt;', [('entityref', 'gt'), ('data', ' '), ('entityref', 'lt')],
+                        convert_charrefs=False)
+        self._run_check('&gt &lt;', [('data', '> <')], convert_charrefs=True)
+
+        self._run_check('&undefined &lt;',
+                        [('entityref', 'undefined'), ('data', ' '), ('entityref', 'lt')],
+                        convert_charrefs=False)
+        self._run_check('&undefined &lt;', [('data', '&undefined <')],
+                        convert_charrefs=True)
+
+        self._run_check('&gtundefined &lt;',
+                        [('entityref', 'gtundefined'), ('data', ' '), ('entityref', 'lt')],
+                        convert_charrefs=False)
+        self._run_check('&gtundefined &lt;', [('data', '>undefined <')],
+                        convert_charrefs=True)
+
+        self._run_check('& &lt;', [('data', '& '), ('entityref', 'lt')],
+                        convert_charrefs=False)
+        self._run_check('& &lt;', [('data', '& <')], convert_charrefs=True)
+
+    def test_eof_in_entityref(self):
+        self._run_check('&gt', [('entityref', 'gt')], convert_charrefs=False)
+        self._run_check('&gt', [('data', '>')], convert_charrefs=True)
+
+        self._run_check('&g', [('entityref', 'g')], convert_charrefs=False)
+        self._run_check('&g', [('data', '&g')], convert_charrefs=True)
+
+        self._run_check('&undefined', [('entityref', 'undefined')],
+                        convert_charrefs=False)
+        self._run_check('&undefined', [('data', '&undefined')],
+                        convert_charrefs=True)
+
+        self._run_check('&gtundefined', [('entityref', 'gtundefined')],
+                        convert_charrefs=False)
+        self._run_check('&gtundefined', [('data', '>undefined')],
+                        convert_charrefs=True)
+
+        self._run_check('&', [('data', '&')], convert_charrefs=False)
+        self._run_check('&', [('data', '&')], convert_charrefs=True)
+
+    def test_unclosed_charref(self):
+        self._run_check('&#123 &lt;', [('charref', '123'), ('data', ' '), ('entityref', 'lt')],
+                        convert_charrefs=False)
+        self._run_check('&#123 &lt;', [('data', '{ <')], convert_charrefs=True)
+        self._run_check('&#xab &lt;', [('charref', 'xab'), ('data', ' '), ('entityref', 'lt')],
+                        convert_charrefs=False)
+        self._run_check('&#xab &lt;', [('data', '\xab <')], convert_charrefs=True)
+
+        self._run_check('&#123456789 &lt;',
+                        [('charref', '123456789'), ('data', ' '), ('entityref', 'lt')],
+                        convert_charrefs=False)
+        self._run_check('&#123456789 &lt;', [('data', '\ufffd <')],
+                        convert_charrefs=True)
+        self._run_check('&#x123456789 &lt;',
+                        [('charref', 'x123456789'), ('data', ' '), ('entityref', 'lt')],
+                        convert_charrefs=False)
+        self._run_check('&#x123456789 &lt;', [('data', '\ufffd <')],
+                        convert_charrefs=True)
+
+        self._run_check('&# &lt;', [('data', '&# '), ('entityref', 'lt')], convert_charrefs=False)
+        self._run_check('&# &lt;', [('data', '&# <')], convert_charrefs=True)
+        self._run_check('&#x &lt;', [('data', '&#x '), ('entityref', 'lt')], convert_charrefs=False)
+        self._run_check('&#x &lt;', [('data', '&#x <')], convert_charrefs=True)
+
+    def test_eof_in_charref(self):
+        self._run_check('&#123', [('charref', '123')], convert_charrefs=False)
+        self._run_check('&#123', [('data', '{')], convert_charrefs=True)
+        self._run_check('&#xab', [('charref', 'xab')], convert_charrefs=False)
+        self._run_check('&#xab', [('data', '\xab')], convert_charrefs=True)
+
+        self._run_check('&#123456789', [('charref', '123456789')],
+                        convert_charrefs=False)
+        self._run_check('&#123456789', [('data', '\ufffd')], convert_charrefs=True)
+        self._run_check('&#x123456789', [('charref', 'x123456789')],
+                        convert_charrefs=False)
+        self._run_check('&#x123456789', [('data', '\ufffd')], convert_charrefs=True)
+
+        self._run_check('&#', [('data', '&#')], convert_charrefs=False)
+        self._run_check('&#', [('data', '&#')], convert_charrefs=True)
+        self._run_check('&#x', [('data', '&#x')], convert_charrefs=False)
+        self._run_check('&#x', [('data', '&#x')], convert_charrefs=True)
 
     def test_bad_nesting(self):
         # Strangely, this *is* supposed to test that overlapping
@@ -762,20 +840,6 @@ def test_correct_detection_of_start_tags(self):
         ]
         self._run_check(html, expected)
 
-    def test_EOF_in_charref(self):
-        # see #17802
-        # This test checks that the UnboundLocalError reported in the issue
-        # is not raised, however I'm not sure the returned values are correct.
-        # Maybe HTMLParser should use self.unescape for these
-        data = [
-            ('a&', [('data', 'a&')]),
-            ('a&b', [('data', 'ab')]),
-            ('a&b ', [('data', 'a'), ('entityref', 'b'), ('data', ' ')]),
-            ('a&b;', [('data', 'a'), ('entityref', 'b')]),
-        ]
-        for html, expected in data:
-            self._run_check(html, expected)
-
     def test_eof_in_comments(self):
         data = [
             ('<!--', [('comment', '')]),
diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py
index 47e3914d1dd..44044d0385c 100644
--- a/Lib/test/test_httplib.py
+++ b/Lib/test/test_httplib.py
@@ -1511,6 +1511,72 @@ def run_server():
         thread.join()
         self.assertEqual(result, b"proxied data\n")
 
+    def test_large_content_length(self):
+        serv = socket.create_server((HOST, 0))
+        self.addCleanup(serv.close)
+
+        def run_server():
+            [conn, address] = serv.accept()
+            with conn:
+                while conn.recv(1024):
+                    conn.sendall(
+                        b"HTTP/1.1 200 Ok\r\n"
+                        b"Content-Length: %d\r\n"
+                        b"\r\n" % size)
+                    conn.sendall(b'A' * (size//3))
+                    conn.sendall(b'B' * (size - size//3))
+
+        thread = threading.Thread(target=run_server)
+        thread.start()
+        self.addCleanup(thread.join, 1.0)
+
+        conn = client.HTTPConnection(*serv.getsockname())
+        try:
+            for w in range(15, 27):
+                size = 1 << w
+                conn.request("GET", "/")
+                with conn.getresponse() as response:
+                    self.assertEqual(len(response.read()), size)
+        finally:
+            conn.close()
+            thread.join(1.0)
+
+    def test_large_content_length_truncated(self):
+        serv = socket.create_server((HOST, 0))
+        self.addCleanup(serv.close)
+
+        def run_server():
+            while True:
+                [conn, address] = serv.accept()
+                with conn:
+                    conn.recv(1024)
+                    if not size:
+                        break
+                    conn.sendall(
+                        b"HTTP/1.1 200 Ok\r\n"
+                        b"Content-Length: %d\r\n"
+                        b"\r\n"
+                        b"Text" % size)
+
+        thread = threading.Thread(target=run_server)
+        thread.start()
+        self.addCleanup(thread.join, 1.0)
+
+        conn = client.HTTPConnection(*serv.getsockname())
+        try:
+            for w in range(18, 65):
+                size = 1 << w
+                conn.request("GET", "/")
+                with conn.getresponse() as response:
+                    self.assertRaises(client.IncompleteRead, response.read)
+                conn.close()
+        finally:
+            conn.close()
+            size = 0
+            conn.request("GET", "/")
+            conn.close()
+            thread.join(1.0)
+
     def test_putrequest_override_domain_validation(self):
         """
         It should be possible to override the default validation
diff --git a/Lib/test/test_import/__init__.py b/Lib/test/test_import/__init__.py
index fd9750eae80..271361ae816 100644
--- a/Lib/test/test_import/__init__.py
+++ b/Lib/test/test_import/__init__.py
@@ -65,8 +65,10 @@
     _testmultiphase = None
 try:
     import _interpreters
+    import concurrent.interpreters
 except ModuleNotFoundError:
     _interpreters = None
+    concurrent = None
 try:
     import _testinternalcapi
 except ImportError:
@@ -3407,6 +3409,39 @@ def test_from_modexport(self):
 
         self.assertEqual(module.__name__, modname)
 
+    @requires_subinterpreters
+    def test_from_modexport_gil_used(self):
+        # Test that a module with Py_MOD_GIL_USED (re-)enables the GIL.
+        # Do this in a new interpreter to avoid interfering with global state.
+        modname = '_test_from_modexport_gil_used'
+        filename = _testmultiphase.__file__
+        interp = concurrent.interpreters.create()
+        self.addCleanup(interp.close)
+        queue = concurrent.interpreters.create_queue()
+        interp.prepare_main(
+            modname=modname,
+            filename=filename,
+            queue=queue,
+        )
+        enabled_before = sys._is_gil_enabled()
+        interp.exec(f"""if True:
+            import sys
+            from test.support.warnings_helper import check_warnings
+            from {__name__} import import_extension_from_file
+            with check_warnings((".*GIL..has been enabled.*", RuntimeWarning),
+                                quiet=True):
+                module = import_extension_from_file(modname, filename,
+                                                    put_in_sys_modules=False)
+            queue.put(module.__name__)
+            queue.put(sys._is_gil_enabled())
+        """)
+
+        self.assertEqual(queue.get(), modname)
+        self.assertEqual(queue.get(), True)
+        self.assertTrue(queue.empty())
+
+        self.assertEqual(enabled_before, sys._is_gil_enabled())
+
     def test_from_modexport_null(self):
         modname = '_test_from_modexport_null'
         filename = _testmultiphase.__file__
@@ -3428,6 +3463,39 @@ def test_from_modexport_create_nonmodule(self):
                                             put_in_sys_modules=False)
         self.assertIsInstance(module, str)
 
+    @requires_subinterpreters
+    def test_from_modexport_create_nonmodule_gil_used(self):
+        # Test that a module with Py_MOD_GIL_USED (re-)enables the GIL.
+        # Do this in a new interpreter to avoid interfering with global state.
+        modname = '_test_from_modexport_create_nonmodule_gil_used'
+        filename = _testmultiphase.__file__
+        interp = concurrent.interpreters.create()
+        self.addCleanup(interp.close)
+        queue = concurrent.interpreters.create_queue()
+        interp.prepare_main(
+            modname=modname,
+            filename=filename,
+            queue=queue,
+        )
+        enabled_before = sys._is_gil_enabled()
+        interp.exec(f"""if True:
+            import sys
+            from test.support.warnings_helper import check_warnings
+            from {__name__} import import_extension_from_file
+            with check_warnings((".*GIL..has been enabled.*", RuntimeWarning),
+                                quiet=True):
+                module = import_extension_from_file(modname, filename,
+                                                    put_in_sys_modules=False)
+            queue.put(module)
+            queue.put(sys._is_gil_enabled())
+        """)
+
+        self.assertIsInstance(queue.get(), str)
+        self.assertEqual(queue.get(), True)
+        self.assertTrue(queue.empty())
+
+        self.assertEqual(enabled_before, sys._is_gil_enabled())
+
     def test_from_modexport_smoke(self):
         # General positive test for sundry features
         # (PyModule_FromSlotsAndSpec tests exercise these more carefully)
@@ -3456,6 +3524,7 @@ class Sub(tp):
             pass
         self.assertEqual(_testcapi.pytype_getmodulebytoken(Sub, token), module)
 
+    @requires_gil_enabled("empty slots re-enable GIL")
     def test_from_modexport_empty_slots(self):
         # Module to test that:
         # - no slots are mandatory for PyModExport
diff --git a/Lib/test/test_importlib/test_util.py b/Lib/test/test_importlib/test_util.py
index a77ce234dee..0adab8d14e0 100644
--- a/Lib/test/test_importlib/test_util.py
+++ b/Lib/test/test_importlib/test_util.py
@@ -788,31 +788,70 @@ def test_complete_multi_phase_init_module(self):
             self.run_with_own_gil(script)
 
 
-class MiscTests(unittest.TestCase):
-    def test_atomic_write_should_notice_incomplete_writes(self):
+class PatchAtomicWrites:
+    def __init__(self, truncate_at_length, never_complete=False):
+        self.truncate_at_length = truncate_at_length
+        self.never_complete = never_complete
+        self.seen_write = False
+        self._children = []
+
+    def __enter__(self):
         import _pyio
 
         oldwrite = os.write
-        seen_write = False
-
-        truncate_at_length = 100
 
         # Emulate an os.write that only writes partial data.
         def write(fd, data):
-            nonlocal seen_write
-            seen_write = True
-            return oldwrite(fd, data[:truncate_at_length])
+            if self.seen_write and self.never_complete:
+                return None
+            self.seen_write = True
+            return oldwrite(fd, data[:self.truncate_at_length])
 
         # Need to patch _io to be _pyio, so that io.FileIO is affected by the
         # os.write patch.
-        with (support.swap_attr(_bootstrap_external, '_io', _pyio),
-              support.swap_attr(os, 'write', write)):
-            with self.assertRaises(OSError):
-                # Make sure we write something longer than the point where we
-                # truncate.
-                content = b'x' * (truncate_at_length * 2)
-                _bootstrap_external._write_atomic(os_helper.TESTFN, content)
-        assert seen_write
+        self.children = [
+            support.swap_attr(_bootstrap_external, '_io', _pyio),
+            support.swap_attr(os, 'write', write)
+        ]
+        for child in self.children:
+            child.__enter__()
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        for child in self.children:
+            child.__exit__(exc_type, exc_val, exc_tb)
+
+
+class MiscTests(unittest.TestCase):
+
+    def test_atomic_write_retries_incomplete_writes(self):
+        truncate_at_length = 100
+        length = truncate_at_length * 2
+
+        with PatchAtomicWrites(truncate_at_length=truncate_at_length) as cm:
+            # Make sure we write something longer than the point where we
+            # truncate.
+            content = b'x' * length
+            _bootstrap_external._write_atomic(os_helper.TESTFN, content)
+        self.assertTrue(cm.seen_write)
+
+        self.assertEqual(os.stat(support.os_helper.TESTFN).st_size, length)
+        os.unlink(support.os_helper.TESTFN)
+
+    def test_atomic_write_errors_if_unable_to_complete(self):
+        truncate_at_length = 100
+
+        with (
+            PatchAtomicWrites(
+                truncate_at_length=truncate_at_length, never_complete=True,
+            ) as cm,
+            self.assertRaises(OSError)
+        ):
+            # Make sure we write something longer than the point where we
+            # truncate.
+            content = b'x' * (truncate_at_length * 2)
+            _bootstrap_external._write_atomic(os_helper.TESTFN, content)
+        self.assertTrue(cm.seen_write)
 
         with self.assertRaises(OSError):
             os.stat(support.os_helper.TESTFN) # Check that the file did not get written.
diff --git a/Lib/test/test_importlib/util.py b/Lib/test/test_importlib/util.py
index edbe78545a2..efbec667317 100644
--- a/Lib/test/test_importlib/util.py
+++ b/Lib/test/test_importlib/util.py
@@ -15,7 +15,8 @@
 import tempfile
 import types
 
-_testsinglephase = import_helper.import_module("_testsinglephase")
+# gh-116303: Skip test module dependent tests if test modules are unavailable
+import_helper.import_module("_testmultiphase")
 
 
 BUILTINS = types.SimpleNamespace()
diff --git a/Lib/test/test_io/test_general.py b/Lib/test/test_io/test_general.py
index f0677b01ea5..085ed3ea6a9 100644
--- a/Lib/test/test_io/test_general.py
+++ b/Lib/test/test_io/test_general.py
@@ -609,6 +609,25 @@ def readinto(self, b):
             with self.assertRaises(ValueError):
                 Misbehaved(bad_size).read()
 
+    def test_RawIOBase_read_gh60107(self):
+        # gh-60107: Ensure a "Raw I/O" which keeps a reference to the
+        # mutable memory doesn't allow making a mutable bytes.
+        class RawIOKeepsReference(self.MockRawIOWithoutRead):
+            def __init__(self, *args, **kwargs):
+                self.buf = None
+                super().__init__(*args, **kwargs)
+
+            def readinto(self, buf):
+                # buf is the bytearray so keeping a reference to it doesn't keep
+                # the memory alive; a memoryview does.
+                self.buf = memoryview(buf)
+                buf[0:4] = self._read_stack.pop()
+                return 3
+
+        with self.assertRaises(BufferError):
+            rawio = RawIOKeepsReference([b"1234"])
+            rawio.read(4)
+
     def test_types_have_dict(self):
         test = (
             self.IOBase(),
diff --git a/Lib/test/test_marshal.py b/Lib/test/test_marshal.py
index 8b1fb0eba1f..662bdfccc79 100644
--- a/Lib/test/test_marshal.py
+++ b/Lib/test/test_marshal.py
@@ -43,6 +43,11 @@ def test_ints(self):
             for expected in (-n, n):
                 self.helper(expected)
             n = n >> 1
+        n = 1 << 100
+        while n:
+            for expected in (-n, -n+1, n-1, n):
+                self.helper(expected)
+            n = n >> 1
 
     def test_int64(self):
         # Simulate int marshaling with TYPE_INT64.
diff --git a/Lib/test/test_memoryview.py b/Lib/test/test_memoryview.py
index 64f440f180b..1bd58eb6408 100644
--- a/Lib/test/test_memoryview.py
+++ b/Lib/test/test_memoryview.py
@@ -600,6 +600,25 @@ def test_memoryview_hex(self):
         m2 = m1[::-1]
         self.assertEqual(m2.hex(), '30' * 200000)
 
+    def test_memoryview_hex_separator(self):
+        x = bytes(range(97, 102))
+        m1 = memoryview(x)
+        m2 = m1[::-1]
+        self.assertEqual(m2.hex(':'), '65:64:63:62:61')
+        self.assertEqual(m2.hex(':', 2), '65:6463:6261')
+        self.assertEqual(m2.hex(':', -2), '6564:6362:61')
+        self.assertEqual(m2.hex(sep=':', bytes_per_sep=2), '65:6463:6261')
+        self.assertEqual(m2.hex(sep=':', bytes_per_sep=-2), '6564:6362:61')
+        for bytes_per_sep in 5, -5, 2**31-1, -(2**31-1):
+            with self.subTest(bytes_per_sep=bytes_per_sep):
+                self.assertEqual(m2.hex(':', bytes_per_sep), '6564636261')
+        for bytes_per_sep in 2**31, -2**31, 2**1000, -2**1000:
+            with self.subTest(bytes_per_sep=bytes_per_sep):
+                try:
+                    self.assertEqual(m2.hex(':', bytes_per_sep), '6564636261')
+                except OverflowError:
+                    pass
+
     def test_copy(self):
         m = memoryview(b'abc')
         with self.assertRaises(TypeError):
diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py
index 4f25e9c2a03..4fa5a4e6768 100644
--- a/Lib/test/test_minidom.py
+++ b/Lib/test/test_minidom.py
@@ -2,6 +2,7 @@
 
 import copy
 import pickle
+import time
 import io
 from test import support
 import unittest
@@ -173,6 +174,23 @@ def testAppendChild(self):
         self.assertEqual(dom.documentElement.childNodes[-1].data, "Hello")
         dom.unlink()
 
+    def testAppendChildNoQuadraticComplexity(self):
+        impl = getDOMImplementation()
+
+        newdoc = impl.createDocument(None, "some_tag", None)
+        top_element = newdoc.documentElement
+        children = [newdoc.createElement(f"child-{i}") for i in range(1, 2 ** 15 + 1)]
+        element = top_element
+
+        start = time.time()
+        for child in children:
+            element.appendChild(child)
+            element = child
+        end = time.time()
+
+        # This example used to take at least 30 seconds.
+        self.assertLess(end - start, 1)
+
     def testAppendChildFragment(self):
         dom, orig, c1, c2, c3, frag = self._create_fragment_test_nodes()
         dom.documentElement.appendChild(frag)
diff --git a/Lib/test/test_monitoring.py b/Lib/test/test_monitoring.py
index 9b940740ca1..83bf510ceea 100644
--- a/Lib/test/test_monitoring.py
+++ b/Lib/test/test_monitoring.py
@@ -12,10 +12,10 @@
 import unittest
 
 import test.support
-from test.support import requires_specialization_ft, script_helper
+from test.support import import_helper, requires_specialization_ft, script_helper
 
-_testcapi = test.support.import_helper.import_module("_testcapi")
-_testinternalcapi = test.support.import_helper.import_module("_testinternalcapi")
+_testcapi = import_helper.import_module("_testcapi")
+_testinternalcapi = import_helper.import_module("_testinternalcapi")
 
 PAIR = (0,1)
 
diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py
index c7eea75117d..4113b79ef5c 100644
--- a/Lib/test/test_opcache.py
+++ b/Lib/test/test_opcache.py
@@ -590,7 +590,7 @@ def make_deferred_ref_count_obj():
 class TestRacesDoNotCrash(TestBase):
     # Careful with these. Bigger numbers have a higher chance of catching bugs,
     # but you can also burn through a *ton* of type/dict/function versions:
-    ITEMS = 1000
+    ITEMS = 1400
     LOOPS = 4
     WRITERS = 2
 
diff --git a/Lib/test/test_pdb.py b/Lib/test/test_pdb.py
index 9d89008756a..c097808e7fd 100644
--- a/Lib/test/test_pdb.py
+++ b/Lib/test/test_pdb.py
@@ -3580,6 +3580,20 @@ def quux():
             ('bœr', 5),
         )
 
+    def test_print_stack_entry_uses_dynamic_line_prefix(self):
+        """Test that pdb.line_prefix binding is dynamic (gh-141781)."""
+        stdout = io.StringIO()
+        p = pdb.Pdb(stdout=stdout)
+
+        # Get the current frame to use for printing
+        frame = sys._getframe()
+
+        with support.swap_attr(pdb, 'line_prefix', 'CUSTOM_PREFIX> '):
+            p.print_stack_entry((frame, frame.f_lineno))
+
+        # Check if the custom prefix appeared in the output
+        self.assertIn('CUSTOM_PREFIX> ', stdout.getvalue())
+
     def test_find_function_found_with_encoding_cookie(self):
         self._assert_find_function(
             """\
@@ -4573,6 +4587,41 @@ def bar():
             ]))
             self.assertIn('break in bar', stdout)
 
+    @unittest.skipIf(SKIP_CORO_TESTS, "Coroutine tests are skipped")
+    def test_async_break(self):
+        script = """
+            import asyncio
+
+            async def main():
+                pass
+
+            asyncio.run(main())
+        """
+        commands = """
+            break main
+            continue
+            quit
+        """
+        stdout, stderr = self.run_pdb_script(script, commands)
+        self.assertRegex(stdout, r"Breakpoint 1 at .*main\.py:5")
+        self.assertIn("pass", stdout)
+
+    def test_issue_59000(self):
+        script = """
+            def foo():
+                pass
+
+            class C:
+                def foo(self):
+                    pass
+        """
+        commands = """
+            break C.foo
+            quit
+        """
+        stdout, stderr = self.run_pdb_script(script, commands)
+        self.assertIn("The specified object 'C.foo' is not a function", stdout)
+
 
 class ChecklineTests(unittest.TestCase):
     def setUp(self):
@@ -4744,6 +4793,19 @@ def test_readline_not_imported(self):
         self.assertNotIn("readline imported", stdout)
         self.assertEqual(stderr, "")
 
+    def test_alternate_stdin(self):
+        script = textwrap.dedent("""
+            import pdb
+            import io
+
+            input_data = io.StringIO("p 40 + 2\\nc\\n")
+            pdb.Pdb(stdin=input_data).set_trace()
+        """)
+        commands = ""
+        stdout, stderr = self._run_script(script, commands)
+        self.assertIn("42", stdout)
+        self.assertEqual(stderr, "")
+
 
 @support.force_colorized_test_class
 class PdbTestColorize(unittest.TestCase):
diff --git a/Lib/test/test_perf_profiler.py b/Lib/test/test_perf_profiler.py
index e6852c93e69..66348619073 100644
--- a/Lib/test/test_perf_profiler.py
+++ b/Lib/test/test_perf_profiler.py
@@ -160,6 +160,16 @@ def baz():
         self.assertIn(f"py::bar_fork:{script}", child_perf_file_contents)
         self.assertIn(f"py::baz_fork:{script}", child_perf_file_contents)
 
+        # The parent's map should not contain the child's symbols.
+        self.assertNotIn(f"py::foo_fork:{script}", perf_file_contents)
+        self.assertNotIn(f"py::bar_fork:{script}", perf_file_contents)
+        self.assertNotIn(f"py::baz_fork:{script}", perf_file_contents)
+
+        # The child's map should not contain the parent's symbols.
+        self.assertNotIn(f"py::foo:{script}", child_perf_file_contents)
+        self.assertNotIn(f"py::bar:{script}", child_perf_file_contents)
+        self.assertNotIn(f"py::baz:{script}", child_perf_file_contents)
+
     @unittest.skipIf(support.check_bolt_optimized(), "fails on BOLT instrumented binaries")
     def test_sys_api(self):
         for define_eval_hook in (False, True):
diff --git a/Lib/test/test_pickle.py b/Lib/test/test_pickle.py
index e2384b33345..22c70327fb0 100644
--- a/Lib/test/test_pickle.py
+++ b/Lib/test/test_pickle.py
@@ -59,6 +59,8 @@ class PyUnpicklerTests(AbstractUnpickleTests, unittest.TestCase):
     truncated_errors = (pickle.UnpicklingError, EOFError,
                         AttributeError, ValueError,
                         struct.error, IndexError, ImportError)
+    truncated_data_error = (EOFError, '')
+    size_overflow_error = (pickle.UnpicklingError, 'exceeds')
 
     def loads(self, buf, **kwds):
         f = io.BytesIO(buf)
@@ -103,6 +105,8 @@ class InMemoryPickleTests(AbstractPickleTests, AbstractUnpickleTests,
     truncated_errors = (pickle.UnpicklingError, EOFError,
                         AttributeError, ValueError,
                         struct.error, IndexError, ImportError)
+    truncated_data_error = ((pickle.UnpicklingError, EOFError), '')
+    size_overflow_error = ((OverflowError, pickle.UnpicklingError), 'exceeds')
 
     def dumps(self, arg, protocol=None, **kwargs):
         return pickle.dumps(arg, protocol, **kwargs)
@@ -375,6 +379,8 @@ class CUnpicklerTests(PyUnpicklerTests):
         unpickler = _pickle.Unpickler
         bad_stack_errors = (pickle.UnpicklingError,)
         truncated_errors = (pickle.UnpicklingError,)
+        truncated_data_error = (pickle.UnpicklingError, 'truncated')
+        size_overflow_error = (OverflowError, 'exceeds')
 
     class CPicklingErrorTests(PyPicklingErrorTests):
         pickler = _pickle.Pickler
@@ -478,7 +484,7 @@ def test_pickler(self):
                 0)  # Write buffer is cleared after every dump().
 
         def test_unpickler(self):
-            basesize = support.calcobjsize('2P2n2P 2P2n2i5P 2P3n8P2n2i')
+            basesize = support.calcobjsize('2P2n3P 2P2n2i5P 2P3n8P2n2i')
             unpickler = _pickle.Unpickler
             P = struct.calcsize('P')  # Size of memo table entry.
             n = struct.calcsize('n')  # Size of mark table entry.
diff --git a/Lib/test/test_pickletools.py b/Lib/test/test_pickletools.py
index cf990874621..57285ddf6eb 100644
--- a/Lib/test/test_pickletools.py
+++ b/Lib/test/test_pickletools.py
@@ -1,7 +1,11 @@
 import io
+import itertools
 import pickle
 import pickletools
+import tempfile
+import textwrap
 from test import support
+from test.support import os_helper
 from test.pickletester import AbstractPickleTests
 import doctest
 import unittest
@@ -514,6 +518,170 @@ def test__all__(self):
         support.check__all__(self, pickletools, not_exported=not_exported)
 
 
+class CommandLineTest(unittest.TestCase):
+    def setUp(self):
+        self.filename = tempfile.mktemp()
+        self.addCleanup(os_helper.unlink, self.filename)
+
+    @staticmethod
+    def text_normalize(string):
+        return textwrap.dedent(string).strip()
+
+    def set_pickle_data(self, data):
+        with open(self.filename, 'wb') as f:
+            pickle.dump(data, f)
+
+    def invoke_pickletools(self, *flags):
+        with (
+            support.captured_stdout() as stdout,
+            support.captured_stderr() as stderr,
+        ):
+            pickletools._main(args=[*flags, self.filename])
+        self.assertEqual(stderr.getvalue(), '')
+        return self.text_normalize(stdout.getvalue())
+
+    def check_output(self, data, expect, *flags):
+        with self.subTest(data=data, flags=flags):
+            self.set_pickle_data(data)
+            res = self.invoke_pickletools(*flags)
+            expect = self.text_normalize(expect)
+            self.assertListEqual(res.splitlines(), expect.splitlines())
+
+    def test_invocation(self):
+        # test various combinations of parameters
+        output_file = tempfile.mktemp()
+        self.addCleanup(os_helper.unlink, output_file)
+        base_flags = [
+            (f'-o={output_file}', f'--output={output_file}'),
+            ('-m', '--memo'),
+            ('-l=2', '--indentlevel=2'),
+            ('-a', '--annotate'),
+            ('-p="Another:"', '--preamble="Another:"'),
+        ]
+        data = { 'a', 'b', 'c' }
+
+        self.set_pickle_data(data)
+
+        for r in range(1, len(base_flags) + 1):
+            for choices in itertools.combinations(base_flags, r=r):
+                for args in itertools.product(*choices):
+                    with self.subTest(args=args[1:]):
+                        self.invoke_pickletools(*args)
+
+    def test_unknown_flag(self):
+        with self.assertRaises(SystemExit):
+            with support.captured_stderr() as stderr:
+                pickletools._main(args=['--unknown'])
+            self.assertStartsWith(stderr.getvalue(), 'usage: ')
+
+    def test_output_flag(self):
+        # test 'python -m pickletools -o/--output'
+        output_file = tempfile.mktemp()
+        self.addCleanup(os_helper.unlink, output_file)
+        data = ('fake_data',)
+        expect = r'''
+            0: \x80 PROTO      5
+            2: \x95 FRAME      15
+           11: \x8c SHORT_BINUNICODE 'fake_data'
+           22: \x94 MEMOIZE    (as 0)
+           23: \x85 TUPLE1
+           24: \x94 MEMOIZE    (as 1)
+           25: .    STOP
+        highest protocol among opcodes = 4
+        '''
+        for flag in [f'-o={output_file}', f'--output={output_file}']:
+            with self.subTest(data=data, flags=flag):
+                self.set_pickle_data(data)
+                res = self.invoke_pickletools(flag)
+                with open(output_file, 'r') as f:
+                    res_from_file = self.text_normalize(f.read())
+                expect = self.text_normalize(expect)
+
+                self.assertListEqual(res.splitlines(), [])
+                self.assertListEqual(res_from_file.splitlines(),
+                                     expect.splitlines())
+
+    def test_memo_flag(self):
+        # test 'python -m pickletools -m/--memo'
+        data = ('fake_data',)
+        expect = r'''
+            0: \x80 PROTO      5
+            2: \x95 FRAME      15
+           11: \x8c SHORT_BINUNICODE 'fake_data'
+           22: \x94 MEMOIZE    (as 0)
+           23: \x85 TUPLE1
+           24: \x94 MEMOIZE    (as 1)
+           25: .    STOP
+        highest protocol among opcodes = 4
+        '''
+        for flag in ['-m', '--memo']:
+            self.check_output(data, expect, flag)
+
+    def test_indentlevel_flag(self):
+        # test 'python -m pickletools -l/--indentlevel'
+        data = ('fake_data',)
+        expect = r'''
+            0: \x80 PROTO      5
+            2: \x95 FRAME      15
+           11: \x8c SHORT_BINUNICODE 'fake_data'
+           22: \x94 MEMOIZE    (as 0)
+           23: \x85 TUPLE1
+           24: \x94 MEMOIZE    (as 1)
+           25: .    STOP
+        highest protocol among opcodes = 4
+        '''
+        for flag in ['-l=2', '--indentlevel=2']:
+            self.check_output(data, expect, flag)
+
+    def test_annotate_flag(self):
+        # test 'python -m pickletools -a/--annotate'
+        data = ('fake_data',)
+        expect = r'''
+            0: \x80 PROTO      5              Protocol version indicator.
+            2: \x95 FRAME      15             Indicate the beginning of a new frame.
+           11: \x8c SHORT_BINUNICODE 'fake_data' Push a Python Unicode string object.
+           22: \x94 MEMOIZE    (as 0)            Store the stack top into the memo.  The stack is not popped.
+           23: \x85 TUPLE1                       Build a one-tuple out of the topmost item on the stack.
+           24: \x94 MEMOIZE    (as 1)            Store the stack top into the memo.  The stack is not popped.
+           25: .    STOP                         Stop the unpickling machine.
+        highest protocol among opcodes = 4
+        '''
+        for flag in ['-a', '--annotate']:
+            self.check_output(data, expect, flag)
+
+    def test_preamble_flag(self):
+        # test 'python -m pickletools -p/--preamble'
+        data = ('fake_data',)
+        expect = r'''
+        Another:
+            0: \x80 PROTO      5
+            2: \x95 FRAME      15
+           11: \x8c SHORT_BINUNICODE 'fake_data'
+           22: \x94 MEMOIZE    (as 0)
+           23: \x85 TUPLE1
+           24: \x94 MEMOIZE    (as 1)
+           25: .    STOP
+        highest protocol among opcodes = 4
+        Another:
+            0: \x80 PROTO      5
+            2: \x95 FRAME      15
+           11: \x8c SHORT_BINUNICODE 'fake_data'
+           22: \x94 MEMOIZE    (as 0)
+           23: \x85 TUPLE1
+           24: \x94 MEMOIZE    (as 1)
+           25: .    STOP
+        highest protocol among opcodes = 4
+        '''
+        for flag in ['-p=Another:', '--preamble=Another:']:
+            with self.subTest(data=data, flags=flag):
+                self.set_pickle_data(data)
+                with support.captured_stdout() as stdout:
+                    pickletools._main(args=[flag, self.filename, self.filename])
+                res = self.text_normalize(stdout.getvalue())
+                expect = self.text_normalize(expect)
+                self.assertListEqual(res.splitlines(), expect.splitlines())
+
+
 def load_tests(loader, tests, pattern):
     tests.addTest(doctest.DocTestSuite(pickletools))
     return tests
diff --git a/Lib/test/test_platform.py b/Lib/test/test_platform.py
index c07f96aecf4..9ee97b922ad 100644
--- a/Lib/test/test_platform.py
+++ b/Lib/test/test_platform.py
@@ -569,6 +569,8 @@ def test_libc_ver(self):
                 (b'/aports/main/musl/src/musl-1.2.5.7', ('musl', '1.2.5.7')),
                 (b'libc.musl.so.1', ('musl', '1')),
                 (b'libc.musl-x86_64.so.1.2.5', ('musl', '1.2.5')),
+                (b'ld-musl.so.1', ('musl', '1')),
+                (b'ld-musl-x86_64.so.1.2.5', ('musl', '1.2.5')),
                 (b'', ('', '')),
             ):
                 with open(filename, 'wb') as fp:
@@ -591,6 +593,10 @@ def test_libc_ver(self):
                     b'libc.musl-x86_64.so.1.4.1\0libc.musl-x86_64.so.2.1.1\0libc.musl-x86_64.so.2.0.1',
                     ('musl', '2.1.1'),
                 ),
+                (
+                    b'ld-musl-x86_64.so.1.4.1\0ld-musl-x86_64.so.2.1.1\0ld-musl-x86_64.so.2.0.1',
+                    ('musl', '2.1.1'),
+                ),
                 (b'no match here, so defaults are used', ('test', '100.1.0')),
             ):
             with open(filename, 'wb') as f:
diff --git a/Lib/test/test_plistlib.py b/Lib/test/test_plistlib.py
index a0c76e5dec5..de2a2fd1fc3 100644
--- a/Lib/test/test_plistlib.py
+++ b/Lib/test/test_plistlib.py
@@ -903,8 +903,7 @@ def test_dump_naive_datetime_with_aware_datetime_option(self):
 
 class TestBinaryPlistlib(unittest.TestCase):
 
-    @staticmethod
-    def decode(*objects, offset_size=1, ref_size=1):
+    def build(self, *objects, offset_size=1, ref_size=1):
         data = [b'bplist00']
         offset = 8
         offsets = []
@@ -916,7 +915,11 @@ def decode(*objects, offset_size=1, ref_size=1):
                            len(objects), 0, offset)
         data.extend(offsets)
         data.append(tail)
-        return plistlib.loads(b''.join(data), fmt=plistlib.FMT_BINARY)
+        return b''.join(data)
+
+    def decode(self, *objects, offset_size=1, ref_size=1):
+        data = self.build(*objects, offset_size=offset_size, ref_size=ref_size)
+        return plistlib.loads(data, fmt=plistlib.FMT_BINARY)
 
     def test_nonstandard_refs_size(self):
         # Issue #21538: Refs and offsets are 24-bit integers
@@ -1024,6 +1027,34 @@ def test_invalid_binary(self):
                 with self.assertRaises(plistlib.InvalidFileException):
                     plistlib.loads(b'bplist00' + data, fmt=plistlib.FMT_BINARY)
 
+    def test_truncated_large_data(self):
+        self.addCleanup(os_helper.unlink, os_helper.TESTFN)
+        def check(data):
+            with open(os_helper.TESTFN, 'wb') as f:
+                f.write(data)
+            # buffered file
+            with open(os_helper.TESTFN, 'rb') as f:
+                with self.assertRaises(plistlib.InvalidFileException):
+                    plistlib.load(f, fmt=plistlib.FMT_BINARY)
+            # unbuffered file
+            with open(os_helper.TESTFN, 'rb', buffering=0) as f:
+                with self.assertRaises(plistlib.InvalidFileException):
+                    plistlib.load(f, fmt=plistlib.FMT_BINARY)
+        for w in range(20, 64):
+            s = 1 << w
+            # data
+            check(self.build(b'\x4f\x13' + s.to_bytes(8, 'big')))
+            # ascii string
+            check(self.build(b'\x5f\x13' + s.to_bytes(8, 'big')))
+            # unicode string
+            check(self.build(b'\x6f\x13' + s.to_bytes(8, 'big')))
+            # array
+            check(self.build(b'\xaf\x13' + s.to_bytes(8, 'big')))
+            # dict
+            check(self.build(b'\xdf\x13' + s.to_bytes(8, 'big')))
+            # number of objects
+            check(b'bplist00' + struct.pack('>6xBBQQQ', 1, 1, s, 0, 8))
+
     def test_load_aware_datetime(self):
         data = (b'bplist003B\x04>\xd0d\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00'
                 b'\x01\x01\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00'
diff --git a/Lib/test/test_profiling/test_heatmap.py b/Lib/test/test_profiling/test_heatmap.py
new file mode 100644
index 00000000000..24bf3d21c2f
--- /dev/null
+++ b/Lib/test/test_profiling/test_heatmap.py
@@ -0,0 +1,647 @@
+"""Tests for the heatmap collector (profiling.sampling)."""
+
+import os
+import shutil
+import tempfile
+import unittest
+from pathlib import Path
+
+from profiling.sampling.heatmap_collector import (
+    HeatmapCollector,
+    get_python_path_info,
+    extract_module_name,
+)
+
+from test.support import captured_stdout, captured_stderr
+
+
+# =============================================================================
+# Unit Tests for Public Helper Functions
+# =============================================================================
+
+class TestPathInfoFunctions(unittest.TestCase):
+    """Test public helper functions for path information."""
+
+    def test_get_python_path_info_returns_dict(self):
+        """Test that get_python_path_info returns a dictionary with expected keys."""
+        path_info = get_python_path_info()
+
+        self.assertIsInstance(path_info, dict)
+        self.assertIn('stdlib', path_info)
+        self.assertIn('site_packages', path_info)
+        self.assertIn('sys_path', path_info)
+
+    def test_get_python_path_info_stdlib_is_path_or_none(self):
+        """Test that stdlib is either a Path object or None."""
+        path_info = get_python_path_info()
+
+        if path_info['stdlib'] is not None:
+            self.assertIsInstance(path_info['stdlib'], Path)
+
+    def test_get_python_path_info_site_packages_is_list(self):
+        """Test that site_packages is a list."""
+        path_info = get_python_path_info()
+
+        self.assertIsInstance(path_info['site_packages'], list)
+        for item in path_info['site_packages']:
+            self.assertIsInstance(item, Path)
+
+    def test_get_python_path_info_sys_path_is_list(self):
+        """Test that sys_path is a list of Path objects."""
+        path_info = get_python_path_info()
+
+        self.assertIsInstance(path_info['sys_path'], list)
+        for item in path_info['sys_path']:
+            self.assertIsInstance(item, Path)
+
+    def test_extract_module_name_with_none(self):
+        """Test extract_module_name with None filename."""
+        path_info = get_python_path_info()
+        module_name, module_type = extract_module_name(None, path_info)
+
+        self.assertEqual(module_name, 'unknown')
+        self.assertEqual(module_type, 'other')
+
+    def test_extract_module_name_with_empty_string(self):
+        """Test extract_module_name with empty filename."""
+        path_info = get_python_path_info()
+        module_name, module_type = extract_module_name('', path_info)
+
+        self.assertEqual(module_name, 'unknown')
+        self.assertEqual(module_type, 'other')
+
+    def test_extract_module_name_with_stdlib_file(self):
+        """Test extract_module_name with a standard library file."""
+        path_info = get_python_path_info()
+
+        # Use os module as a known stdlib file
+        if path_info['stdlib']:
+            stdlib_file = str(path_info['stdlib'] / 'os.py')
+            module_name, module_type = extract_module_name(stdlib_file, path_info)
+
+            self.assertEqual(module_type, 'stdlib')
+            self.assertIn('os', module_name)
+
+    def test_extract_module_name_with_project_file(self):
+        """Test extract_module_name with a project file."""
+        path_info = get_python_path_info()
+
+        # Create a mock project file path
+        if path_info['sys_path']:
+            # Use current directory as project path
+            project_file = '/some/project/path/mymodule.py'
+            module_name, module_type = extract_module_name(project_file, path_info)
+
+            # Should classify as 'other' if not in sys.path
+            self.assertIn(module_type, ['project', 'other'])
+
+    def test_extract_module_name_removes_py_extension(self):
+        """Test that .py extension is removed from module names."""
+        path_info = get_python_path_info()
+
+        # Test with a simple .py file
+        module_name, module_type = extract_module_name('/path/to/test.py', path_info)
+
+        # Module name should not contain .py
+        self.assertNotIn('.py', module_name)
+
+    def test_extract_module_name_with_special_files(self):
+        """Test extract_module_name with special filenames like <string>."""
+        path_info = get_python_path_info()
+
+        special_files = ['<string>', '<stdin>', '[eval]']
+        for special_file in special_files:
+            module_name, module_type = extract_module_name(special_file, path_info)
+            self.assertEqual(module_type, 'other')
+
+
+# =============================================================================
+# Unit Tests for HeatmapCollector Public API
+# =============================================================================
+
+class TestHeatmapCollectorInit(unittest.TestCase):
+    """Test HeatmapCollector initialization."""
+
+    def test_init_creates_empty_data_structures(self):
+        """Test that __init__ creates empty data structures."""
+        collector = HeatmapCollector(sample_interval_usec=100)
+
+        # Check that data structures are initialized
+        self.assertIsInstance(collector.line_samples, dict)
+        self.assertIsInstance(collector.file_samples, dict)
+        self.assertIsInstance(collector.line_self_samples, dict)
+        self.assertIsInstance(collector.file_self_samples, dict)
+        self.assertIsInstance(collector.call_graph, dict)
+        self.assertIsInstance(collector.callers_graph, dict)
+        self.assertIsInstance(collector.function_definitions, dict)
+        self.assertIsInstance(collector.edge_samples, dict)
+
+        # Check that they're empty
+        self.assertEqual(len(collector.line_samples), 0)
+        self.assertEqual(len(collector.file_samples), 0)
+        self.assertEqual(len(collector.line_self_samples), 0)
+        self.assertEqual(len(collector.file_self_samples), 0)
+
+    def test_init_sets_total_samples_to_zero(self):
+        """Test that total samples starts at zero."""
+        collector = HeatmapCollector(sample_interval_usec=100)
+        self.assertEqual(collector._total_samples, 0)
+
+    def test_init_gets_path_info(self):
+        """Test that path info is retrieved during init."""
+        collector = HeatmapCollector(sample_interval_usec=100)
+        self.assertIsNotNone(collector._path_info)
+        self.assertIn('stdlib', collector._path_info)
+
+
+class TestHeatmapCollectorSetStats(unittest.TestCase):
+    """Test HeatmapCollector.set_stats() method."""
+
+    def test_set_stats_stores_all_parameters(self):
+        """Test that set_stats stores all provided parameters."""
+        collector = HeatmapCollector(sample_interval_usec=100)
+
+        collector.set_stats(
+            sample_interval_usec=500,
+            duration_sec=10.5,
+            sample_rate=99.5,
+            error_rate=0.5
+        )
+
+        self.assertEqual(collector.stats['sample_interval_usec'], 500)
+        self.assertEqual(collector.stats['duration_sec'], 10.5)
+        self.assertEqual(collector.stats['sample_rate'], 99.5)
+        self.assertEqual(collector.stats['error_rate'], 0.5)
+
+    def test_set_stats_includes_system_info(self):
+        """Test that set_stats includes Python and platform info."""
+        collector = HeatmapCollector(sample_interval_usec=100)
+        collector.set_stats(sample_interval_usec=100, duration_sec=1.0, sample_rate=100.0)
+
+        self.assertIn('python_version', collector.stats)
+        self.assertIn('python_implementation', collector.stats)
+        self.assertIn('platform', collector.stats)
+
+    def test_set_stats_accepts_kwargs(self):
+        """Test that set_stats accepts additional kwargs."""
+        collector = HeatmapCollector(sample_interval_usec=100)
+
+        collector.set_stats(
+            sample_interval_usec=100,
+            duration_sec=1.0,
+            sample_rate=100.0,
+            custom_key='custom_value',
+            another_key=42
+        )
+
+        self.assertEqual(collector.stats['custom_key'], 'custom_value')
+        self.assertEqual(collector.stats['another_key'], 42)
+
+    def test_set_stats_with_none_error_rate(self):
+        """Test set_stats with error_rate=None."""
+        collector = HeatmapCollector(sample_interval_usec=100)
+        collector.set_stats(sample_interval_usec=100, duration_sec=1.0, sample_rate=100.0)
+
+        self.assertIn('error_rate', collector.stats)
+        self.assertIsNone(collector.stats['error_rate'])
+
+
+class TestHeatmapCollectorProcessFrames(unittest.TestCase):
+    """Test HeatmapCollector.process_frames() method."""
+
+    def test_process_frames_increments_total_samples(self):
+        """Test that process_frames increments total samples count."""
+        collector = HeatmapCollector(sample_interval_usec=100)
+
+        initial_count = collector._total_samples
+        frames = [('file.py', 10, 'func')]
+        collector.process_frames(frames, thread_id=1)
+
+        self.assertEqual(collector._total_samples, initial_count + 1)
+
+    def test_process_frames_records_line_samples(self):
+        """Test that process_frames records line samples."""
+        collector = HeatmapCollector(sample_interval_usec=100)
+
+        frames = [('test.py', 5, 'test_func')]
+        collector.process_frames(frames, thread_id=1)
+
+        # Check that line was recorded
+        self.assertIn(('test.py', 5), collector.line_samples)
+        self.assertEqual(collector.line_samples[('test.py', 5)], 1)
+
+    def test_process_frames_records_multiple_lines_in_stack(self):
+        """Test that process_frames records all lines in a stack."""
+        collector = HeatmapCollector(sample_interval_usec=100)
+
+        frames = [
+            ('file1.py', 10, 'func1'),
+            ('file2.py', 20, 'func2'),
+            ('file3.py', 30, 'func3')
+        ]
+        collector.process_frames(frames, thread_id=1)
+
+        # All frames should be recorded
+        self.assertIn(('file1.py', 10), collector.line_samples)
+        self.assertIn(('file2.py', 20), collector.line_samples)
+        self.assertIn(('file3.py', 30), collector.line_samples)
+
+    def test_process_frames_distinguishes_self_samples(self):
+        """Test that process_frames distinguishes self (leaf) samples."""
+        collector = HeatmapCollector(sample_interval_usec=100)
+
+        frames = [
+            ('leaf.py', 5, 'leaf_func'),  # This is the leaf (top of stack)
+            ('caller.py', 10, 'caller_func')
+        ]
+        collector.process_frames(frames, thread_id=1)
+
+        # Leaf should have self sample
+        self.assertIn(('leaf.py', 5), collector.line_self_samples)
+        self.assertEqual(collector.line_self_samples[('leaf.py', 5)], 1)
+
+        # Caller should NOT have self sample
+        self.assertNotIn(('caller.py', 10), collector.line_self_samples)
+
+    def test_process_frames_accumulates_samples(self):
+        """Test that multiple calls accumulate samples."""
+        collector = HeatmapCollector(sample_interval_usec=100)
+
+        frames = [('file.py', 10, 'func')]
+
+        collector.process_frames(frames, thread_id=1)
+        collector.process_frames(frames, thread_id=1)
+        collector.process_frames(frames, thread_id=1)
+
+        self.assertEqual(collector.line_samples[('file.py', 10)], 3)
+        self.assertEqual(collector._total_samples, 3)
+
+    def test_process_frames_ignores_invalid_frames(self):
+        """Test that process_frames ignores invalid frames."""
+        collector = HeatmapCollector(sample_interval_usec=100)
+
+        # These should be ignored
+        invalid_frames = [
+            ('<string>', 1, 'test'),
+            ('[eval]', 1, 'test'),
+            ('', 1, 'test'),
+            (None, 1, 'test'),
+            ('__init__', 0, 'test'),  # Special invalid frame
+        ]
+
+        for frame in invalid_frames:
+            collector.process_frames([frame], thread_id=1)
+
+        # Should not record these invalid frames
+        for frame in invalid_frames:
+            if frame[0]:
+                self.assertNotIn((frame[0], frame[1]), collector.line_samples)
+
+    def test_process_frames_builds_call_graph(self):
+        """Test that process_frames builds call graph relationships."""
+        collector = HeatmapCollector(sample_interval_usec=100)
+
+        frames = [
+            ('callee.py', 5, 'callee_func'),
+            ('caller.py', 10, 'caller_func')
+        ]
+        collector.process_frames(frames, thread_id=1)
+
+        # Check that call relationship was recorded
+        caller_key = ('caller.py', 10)
+        self.assertIn(caller_key, collector.call_graph)
+
+        # Check callers graph
+        callee_key = ('callee.py', 5)
+        self.assertIn(callee_key, collector.callers_graph)
+
+    def test_process_frames_records_function_definitions(self):
+        """Test that process_frames records function definition locations."""
+        collector = HeatmapCollector(sample_interval_usec=100)
+
+        frames = [('module.py', 42, 'my_function')]
+        collector.process_frames(frames, thread_id=1)
+
+        self.assertIn(('module.py', 'my_function'), collector.function_definitions)
+        self.assertEqual(collector.function_definitions[('module.py', 'my_function')], 42)
+
+    def test_process_frames_tracks_edge_samples(self):
+        """Test that process_frames tracks edge sample counts."""
+        collector = HeatmapCollector(sample_interval_usec=100)
+
+        frames = [
+            ('callee.py', 5, 'callee'),
+            ('caller.py', 10, 'caller')
+        ]
+
+        # Process same call stack multiple times
+        collector.process_frames(frames, thread_id=1)
+        collector.process_frames(frames, thread_id=1)
+
+        # Check that edge count is tracked
+        self.assertGreater(len(collector.edge_samples), 0)
+
+    def test_process_frames_handles_empty_frames(self):
+        """Test that process_frames handles empty frame list."""
+        collector = HeatmapCollector(sample_interval_usec=100)
+
+        initial_count = collector._total_samples
+        collector.process_frames([], thread_id=1)
+
+        # Should still increment total samples
+        self.assertEqual(collector._total_samples, initial_count + 1)
+
+    def test_process_frames_with_file_samples_dict(self):
+        """Test that file_samples dict is properly populated."""
+        collector = HeatmapCollector(sample_interval_usec=100)
+
+        frames = [('test.py', 10, 'func')]
+        collector.process_frames(frames, thread_id=1)
+
+        self.assertIn('test.py', collector.file_samples)
+        self.assertIn(10, collector.file_samples['test.py'])
+        self.assertEqual(collector.file_samples['test.py'][10], 1)
+
+
+class TestHeatmapCollectorExport(unittest.TestCase):
+    """Test HeatmapCollector.export() method."""
+
+    def setUp(self):
+        """Set up test directory."""
+        self.test_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, self.test_dir)
+
+    def test_export_creates_output_directory(self):
+        """Test that export creates the output directory."""
+        collector = HeatmapCollector(sample_interval_usec=100)
+
+        # Add some data
+        frames = [('test.py', 10, 'func')]
+        collector.process_frames(frames, thread_id=1)
+
+        output_path = os.path.join(self.test_dir, 'heatmap_output')
+
+        with captured_stdout(), captured_stderr():
+            collector.export(output_path)
+
+        self.assertTrue(os.path.exists(output_path))
+        self.assertTrue(os.path.isdir(output_path))
+
+    def test_export_creates_index_html(self):
+        """Test that export creates index.html."""
+        collector = HeatmapCollector(sample_interval_usec=100)
+
+        frames = [('test.py', 10, 'func')]
+        collector.process_frames(frames, thread_id=1)
+
+        output_path = os.path.join(self.test_dir, 'heatmap_output')
+
+        with captured_stdout(), captured_stderr():
+            collector.export(output_path)
+
+        index_path = os.path.join(output_path, 'index.html')
+        self.assertTrue(os.path.exists(index_path))
+
+    def test_export_creates_file_htmls(self):
+        """Test that export creates individual file HTMLs."""
+        collector = HeatmapCollector(sample_interval_usec=100)
+
+        frames = [('test.py', 10, 'func')]
+        collector.process_frames(frames, thread_id=1)
+
+        output_path = os.path.join(self.test_dir, 'heatmap_output')
+
+        with captured_stdout(), captured_stderr():
+            collector.export(output_path)
+
+        # Check for file_XXXX.html files
+        html_files = [f for f in os.listdir(output_path)
+                      if f.startswith('file_') and f.endswith('.html')]
+        self.assertGreater(len(html_files), 0)
+
+    def test_export_with_empty_data(self):
+        """Test export with no data collected."""
+        collector = HeatmapCollector(sample_interval_usec=100)
+
+        output_path = os.path.join(self.test_dir, 'empty_output')
+
+        # Should handle empty data gracefully
+        with captured_stdout(), captured_stderr():
+            collector.export(output_path)
+
+    def test_export_handles_html_suffix(self):
+        """Test that export handles .html suffix in output path."""
+        collector = HeatmapCollector(sample_interval_usec=100)
+
+        frames = [('test.py', 10, 'func')]
+        collector.process_frames(frames, thread_id=1)
+
+        # Path with .html suffix should be stripped
+        output_path = os.path.join(self.test_dir, 'output.html')
+
+        with captured_stdout(), captured_stderr():
+            collector.export(output_path)
+
+        # Should create directory without .html
+        expected_dir = os.path.join(self.test_dir, 'output')
+        self.assertTrue(os.path.exists(expected_dir))
+
+    def test_export_with_multiple_files(self):
+        """Test export with multiple files."""
+        collector = HeatmapCollector(sample_interval_usec=100)
+
+        # Add samples for multiple files
+        collector.process_frames([('file1.py', 10, 'func1')], thread_id=1)
+        collector.process_frames([('file2.py', 20, 'func2')], thread_id=1)
+        collector.process_frames([('file3.py', 30, 'func3')], thread_id=1)
+
+        output_path = os.path.join(self.test_dir, 'multi_file')
+
+        with captured_stdout(), captured_stderr():
+            collector.export(output_path)
+
+        # Should create HTML for each file
+        html_files = [f for f in os.listdir(output_path)
+                      if f.startswith('file_') and f.endswith('.html')]
+        self.assertGreaterEqual(len(html_files), 3)
+
+    def test_export_index_contains_file_references(self):
+        """Test that index.html contains references to profiled files."""
+        collector = HeatmapCollector(sample_interval_usec=100)
+        collector.set_stats(sample_interval_usec=100, duration_sec=1.0, sample_rate=100.0)
+
+        frames = [('mytest.py', 10, 'my_func')]
+        collector.process_frames(frames, thread_id=1)
+
+        output_path = os.path.join(self.test_dir, 'test_output')
+
+        with captured_stdout(), captured_stderr():
+            collector.export(output_path)
+
+        index_path = os.path.join(output_path, 'index.html')
+        with open(index_path, 'r', encoding='utf-8') as f:
+            content = f.read()
+
+        # Should contain reference to the file
+        self.assertIn('mytest', content)
+
+    def test_export_file_html_has_line_numbers(self):
+        """Test that exported file HTML contains line numbers."""
+        collector = HeatmapCollector(sample_interval_usec=100)
+
+        # Create a temporary Python file
+        temp_file = os.path.join(self.test_dir, 'temp_source.py')
+        with open(temp_file, 'w') as f:
+            f.write('def test():\n    pass\n')
+
+        frames = [(temp_file, 1, 'test')]
+        collector.process_frames(frames, thread_id=1)
+
+        output_path = os.path.join(self.test_dir, 'line_test')
+
+        with captured_stdout(), captured_stderr():
+            collector.export(output_path)
+
+        # Find the generated file HTML
+        html_files = [f for f in os.listdir(output_path)
+                      if f.startswith('file_') and f.endswith('.html')]
+
+        if html_files:
+            with open(os.path.join(output_path, html_files[0]), 'r', encoding='utf-8') as f:
+                content = f.read()
+
+            # Should have line-related content
+            self.assertIn('line-', content)
+
+
+class MockFrameInfo:
+    """Mock FrameInfo for testing since the real one isn't accessible."""
+
+    def __init__(self, filename, lineno, funcname):
+        self.filename = filename
+        self.lineno = lineno
+        self.funcname = funcname
+
+    def __repr__(self):
+        return f"MockFrameInfo(filename='{self.filename}', lineno={self.lineno}, funcname='{self.funcname}')"
+
+
+class MockThreadInfo:
+    """Mock ThreadInfo for testing since the real one isn't accessible."""
+
+    def __init__(self, thread_id, frame_info):
+        self.thread_id = thread_id
+        self.frame_info = frame_info
+
+    def __repr__(self):
+        return f"MockThreadInfo(thread_id={self.thread_id}, frame_info={self.frame_info})"
+
+
+class MockInterpreterInfo:
+    """Mock InterpreterInfo for testing since the real one isn't accessible."""
+
+    def __init__(self, interpreter_id, threads):
+        self.interpreter_id = interpreter_id
+        self.threads = threads
+
+    def __repr__(self):
+        return f"MockInterpreterInfo(interpreter_id={self.interpreter_id}, threads={self.threads})"
+
+
+class TestHeatmapCollector(unittest.TestCase):
+    """Tests for HeatmapCollector functionality."""
+
+    def test_heatmap_collector_basic(self):
+        """Test basic HeatmapCollector functionality."""
+        collector = HeatmapCollector(sample_interval_usec=100)
+
+        # Test empty state
+        self.assertEqual(len(collector.file_samples), 0)
+        self.assertEqual(len(collector.line_samples), 0)
+
+        # Test collecting sample data
+        test_frames = [
+            MockInterpreterInfo(
+                0,
+                [MockThreadInfo(
+                    1,
+                    [("file.py", 10, "func1"), ("file.py", 20, "func2")],
+                )]
+            )
+        ]
+        collector.collect(test_frames)
+
+        # Should have recorded samples for the file
+        self.assertGreater(len(collector.line_samples), 0)
+        self.assertIn("file.py", collector.file_samples)
+
+        # Check that line samples were recorded
+        file_data = collector.file_samples["file.py"]
+        self.assertGreater(len(file_data), 0)
+
+    def test_heatmap_collector_export(self):
+        """Test heatmap HTML export functionality."""
+        heatmap_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, heatmap_dir)
+
+        collector = HeatmapCollector(sample_interval_usec=100)
+
+        # Create test data with multiple files
+        test_frames1 = [
+            MockInterpreterInfo(
+                0,
+                [MockThreadInfo(1, [("file.py", 10, "func1"), ("file.py", 20, "func2")])],
+            )
+        ]
+        test_frames2 = [
+            MockInterpreterInfo(
+                0,
+                [MockThreadInfo(1, [("file.py", 10, "func1"), ("file.py", 20, "func2")])],
+            )
+        ]  # Same stack
+        test_frames3 = [
+            MockInterpreterInfo(0, [MockThreadInfo(1, [("other.py", 5, "other_func")])])
+        ]
+
+        collector.collect(test_frames1)
+        collector.collect(test_frames2)
+        collector.collect(test_frames3)
+
+        # Export heatmap
+        with (captured_stdout(), captured_stderr()):
+            collector.export(heatmap_dir)
+
+        # Verify index.html was created
+        index_path = os.path.join(heatmap_dir, "index.html")
+        self.assertTrue(os.path.exists(index_path))
+        self.assertGreater(os.path.getsize(index_path), 0)
+
+        # Check index contains HTML content
+        with open(index_path, "r", encoding="utf-8") as f:
+            content = f.read()
+
+        # Should be valid HTML
+        self.assertIn("<!doctype html>", content.lower())
+        self.assertIn("<html", content)
+        self.assertIn("Tachyon Profiler", content)
+
+        # Should contain file references
+        self.assertIn("file.py", content)
+        self.assertIn("other.py", content)
+
+        # Verify individual file HTMLs were created
+        file_htmls = [f for f in os.listdir(heatmap_dir) if f.startswith("file_") and f.endswith(".html")]
+        self.assertGreater(len(file_htmls), 0)
+
+        # Check one of the file HTMLs
+        file_html_path = os.path.join(heatmap_dir, file_htmls[0])
+        with open(file_html_path, "r", encoding="utf-8") as f:
+            file_content = f.read()
+
+        # Should contain heatmap styling and JavaScript
+        self.assertIn("line-sample", file_content)
+        self.assertIn("nav-btn", file_content)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/Lib/test/test_profiling/test_sampling_profiler.py b/Lib/test/test_profiling/test_sampling_profiler.py
deleted file mode 100644
index c2cc2ddd48a..00000000000
--- a/Lib/test/test_profiling/test_sampling_profiler.py
+++ /dev/null
@@ -1,3360 +0,0 @@
-"""Tests for the sampling profiler (profiling.sampling)."""
-
-import contextlib
-import io
-import json
-import marshal
-import os
-import shutil
-import socket
-import subprocess
-import sys
-import tempfile
-import unittest
-from collections import namedtuple
-from unittest import mock
-
-from profiling.sampling.pstats_collector import PstatsCollector
-from profiling.sampling.stack_collector import (
-    CollapsedStackCollector,
-    FlamegraphCollector,
-)
-from profiling.sampling.gecko_collector import GeckoCollector
-
-from test.support.os_helper import unlink
-from test.support import (
-    force_not_colorized_test_class,
-    SHORT_TIMEOUT,
-    script_helper,
-    os_helper,
-    SuppressCrashReport,
-)
-from test.support.socket_helper import find_unused_port
-from test.support import requires_subprocess, is_emscripten
-from test.support import captured_stdout, captured_stderr
-
-PROCESS_VM_READV_SUPPORTED = False
-
-try:
-    from _remote_debugging import PROCESS_VM_READV_SUPPORTED
-    import _remote_debugging
-except ImportError:
-    raise unittest.SkipTest(
-        "Test only runs when _remote_debugging is available"
-    )
-else:
-    import profiling.sampling
-    from profiling.sampling.sample import SampleProfiler
-
-
-
-class MockFrameInfo:
-    """Mock FrameInfo for testing since the real one isn't accessible."""
-
-    def __init__(self, filename, lineno, funcname):
-        self.filename = filename
-        self.lineno = lineno
-        self.funcname = funcname
-
-    def __repr__(self):
-        return f"MockFrameInfo(filename='{self.filename}', lineno={self.lineno}, funcname='{self.funcname}')"
-
-
-class MockThreadInfo:
-    """Mock ThreadInfo for testing since the real one isn't accessible."""
-
-    def __init__(self, thread_id, frame_info, status=0):  # Default to THREAD_STATE_RUNNING (0)
-        self.thread_id = thread_id
-        self.frame_info = frame_info
-        self.status = status
-
-    def __repr__(self):
-        return f"MockThreadInfo(thread_id={self.thread_id}, frame_info={self.frame_info}, status={self.status})"
-
-
-class MockInterpreterInfo:
-    """Mock InterpreterInfo for testing since the real one isn't accessible."""
-
-    def __init__(self, interpreter_id, threads):
-        self.interpreter_id = interpreter_id
-        self.threads = threads
-
-    def __repr__(self):
-        return f"MockInterpreterInfo(interpreter_id={self.interpreter_id}, threads={self.threads})"
-
-
-skip_if_not_supported = unittest.skipIf(
-    (
-        sys.platform != "darwin"
-        and sys.platform != "linux"
-        and sys.platform != "win32"
-    ),
-    "Test only runs on Linux, Windows and MacOS",
-)
-
-SubprocessInfo = namedtuple('SubprocessInfo', ['process', 'socket'])
-
-
-@contextlib.contextmanager
-def test_subprocess(script):
-    # Find an unused port for socket communication
-    port = find_unused_port()
-
-    # Inject socket connection code at the beginning of the script
-    socket_code = f'''
-import socket
-_test_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-_test_sock.connect(('localhost', {port}))
-_test_sock.sendall(b"ready")
-'''
-
-    # Combine socket code with user script
-    full_script = socket_code + script
-
-    # Create server socket to wait for process to be ready
-    server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-    server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-    server_socket.bind(("localhost", port))
-    server_socket.settimeout(SHORT_TIMEOUT)
-    server_socket.listen(1)
-
-    proc = subprocess.Popen(
-        [sys.executable, "-c", full_script],
-        stdout=subprocess.DEVNULL,
-        stderr=subprocess.DEVNULL,
-    )
-
-    client_socket = None
-    try:
-        # Wait for process to connect and send ready signal
-        client_socket, _ = server_socket.accept()
-        server_socket.close()
-        response = client_socket.recv(1024)
-        if response != b"ready":
-            raise RuntimeError(f"Unexpected response from subprocess: {response}")
-
-        yield SubprocessInfo(proc, client_socket)
-    finally:
-        if client_socket is not None:
-            client_socket.close()
-        if proc.poll() is None:
-            proc.kill()
-        proc.wait()
-
-
-def close_and_unlink(file):
-    file.close()
-    unlink(file.name)
-
-
-class TestSampleProfilerComponents(unittest.TestCase):
-    """Unit tests for individual profiler components."""
-
-    def test_mock_frame_info_with_empty_and_unicode_values(self):
-        """Test MockFrameInfo handles empty strings, unicode characters, and very long names correctly."""
-        # Test with empty strings
-        frame = MockFrameInfo("", 0, "")
-        self.assertEqual(frame.filename, "")
-        self.assertEqual(frame.lineno, 0)
-        self.assertEqual(frame.funcname, "")
-        self.assertIn("filename=''", repr(frame))
-
-        # Test with unicode characters
-        frame = MockFrameInfo("文件.py", 42, "函数名")
-        self.assertEqual(frame.filename, "文件.py")
-        self.assertEqual(frame.funcname, "函数名")
-
-        # Test with very long names
-        long_filename = "x" * 1000 + ".py"
-        long_funcname = "func_" + "x" * 1000
-        frame = MockFrameInfo(long_filename, 999999, long_funcname)
-        self.assertEqual(frame.filename, long_filename)
-        self.assertEqual(frame.lineno, 999999)
-        self.assertEqual(frame.funcname, long_funcname)
-
-    def test_pstats_collector_with_extreme_intervals_and_empty_data(self):
-        """Test PstatsCollector handles zero/large intervals, empty frames, None thread IDs, and duplicate frames."""
-        # Test with zero interval
-        collector = PstatsCollector(sample_interval_usec=0)
-        self.assertEqual(collector.sample_interval_usec, 0)
-
-        # Test with very large interval
-        collector = PstatsCollector(sample_interval_usec=1000000000)
-        self.assertEqual(collector.sample_interval_usec, 1000000000)
-
-        # Test collecting empty frames list
-        collector = PstatsCollector(sample_interval_usec=1000)
-        collector.collect([])
-        self.assertEqual(len(collector.result), 0)
-
-        # Test collecting frames with None thread id
-        test_frames = [MockInterpreterInfo(0, [MockThreadInfo(None, [MockFrameInfo("file.py", 10, "func")])])]
-        collector.collect(test_frames)
-        # Should still process the frames
-        self.assertEqual(len(collector.result), 1)
-
-        # Test collecting duplicate frames in same sample
-        test_frames = [
-            MockInterpreterInfo(
-                0,  # interpreter_id
-                [MockThreadInfo(
-                    1,
-                    [
-                        MockFrameInfo("file.py", 10, "func1"),
-                        MockFrameInfo("file.py", 10, "func1"),  # Duplicate
-                    ],
-                )]
-            )
-        ]
-        collector = PstatsCollector(sample_interval_usec=1000)
-        collector.collect(test_frames)
-        # Should count both occurrences
-        self.assertEqual(
-            collector.result[("file.py", 10, "func1")]["cumulative_calls"], 2
-        )
-
-    def test_pstats_collector_single_frame_stacks(self):
-        """Test PstatsCollector with single-frame call stacks to trigger len(frames) <= 1 branch."""
-        collector = PstatsCollector(sample_interval_usec=1000)
-
-        # Test with exactly one frame (should trigger the <= 1 condition)
-        single_frame = [MockInterpreterInfo(0, [MockThreadInfo(1, [MockFrameInfo("single.py", 10, "single_func")])])]
-        collector.collect(single_frame)
-
-        # Should record the single frame with inline call
-        self.assertEqual(len(collector.result), 1)
-        single_key = ("single.py", 10, "single_func")
-        self.assertIn(single_key, collector.result)
-        self.assertEqual(collector.result[single_key]["direct_calls"], 1)
-        self.assertEqual(collector.result[single_key]["cumulative_calls"], 1)
-
-        # Test with empty frames (should also trigger <= 1 condition)
-        empty_frames = [MockInterpreterInfo(0, [MockThreadInfo(1, [])])]
-        collector.collect(empty_frames)
-
-        # Should not add any new entries
-        self.assertEqual(
-            len(collector.result), 1
-        )  # Still just the single frame
-
-        # Test mixed single and multi-frame stacks
-        mixed_frames = [
-            MockInterpreterInfo(
-                0,
-                [
-                    MockThreadInfo(
-                        1,
-                        [MockFrameInfo("single2.py", 20, "single_func2")],
-                    ),  # Single frame
-                    MockThreadInfo(
-                        2,
-                        [  # Multi-frame stack
-                            MockFrameInfo("multi.py", 30, "multi_func1"),
-                            MockFrameInfo("multi.py", 40, "multi_func2"),
-                        ],
-                    ),
-                ]
-            ),
-        ]
-        collector.collect(mixed_frames)
-
-        # Should have recorded all functions
-        self.assertEqual(
-            len(collector.result), 4
-        )  # single + single2 + multi1 + multi2
-
-        # Verify single frame handling
-        single2_key = ("single2.py", 20, "single_func2")
-        self.assertIn(single2_key, collector.result)
-        self.assertEqual(collector.result[single2_key]["direct_calls"], 1)
-        self.assertEqual(collector.result[single2_key]["cumulative_calls"], 1)
-
-        # Verify multi-frame handling still works
-        multi1_key = ("multi.py", 30, "multi_func1")
-        multi2_key = ("multi.py", 40, "multi_func2")
-        self.assertIn(multi1_key, collector.result)
-        self.assertIn(multi2_key, collector.result)
-        self.assertEqual(collector.result[multi1_key]["direct_calls"], 1)
-        self.assertEqual(
-            collector.result[multi2_key]["cumulative_calls"], 1
-        )  # Called from multi1
-
-    def test_collapsed_stack_collector_with_empty_and_deep_stacks(self):
-        """Test CollapsedStackCollector handles empty frames, single-frame stacks, and very deep call stacks."""
-        collector = CollapsedStackCollector()
-
-        # Test with empty frames
-        collector.collect([])
-        self.assertEqual(len(collector.stack_counter), 0)
-
-        # Test with single frame stack
-        test_frames = [MockInterpreterInfo(0, [MockThreadInfo(1, [("file.py", 10, "func")])])]
-        collector.collect(test_frames)
-        self.assertEqual(len(collector.stack_counter), 1)
-        ((path, thread_id), count), = collector.stack_counter.items()
-        self.assertEqual(path, (("file.py", 10, "func"),))
-        self.assertEqual(thread_id, 1)
-        self.assertEqual(count, 1)
-
-        # Test with very deep stack
-        deep_stack = [(f"file{i}.py", i, f"func{i}") for i in range(100)]
-        test_frames = [MockInterpreterInfo(0, [MockThreadInfo(1, deep_stack)])]
-        collector = CollapsedStackCollector()
-        collector.collect(test_frames)
-        # One aggregated path with 100 frames (reversed)
-        ((path_tuple, thread_id),), = (collector.stack_counter.keys(),)
-        self.assertEqual(len(path_tuple), 100)
-        self.assertEqual(path_tuple[0], ("file99.py", 99, "func99"))
-        self.assertEqual(path_tuple[-1], ("file0.py", 0, "func0"))
-        self.assertEqual(thread_id, 1)
-
-    def test_pstats_collector_basic(self):
-        """Test basic PstatsCollector functionality."""
-        collector = PstatsCollector(sample_interval_usec=1000)
-
-        # Test empty state
-        self.assertEqual(len(collector.result), 0)
-        self.assertEqual(len(collector.stats), 0)
-
-        # Test collecting sample data
-        test_frames = [
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [
-                        MockFrameInfo("file.py", 10, "func1"),
-                        MockFrameInfo("file.py", 20, "func2"),
-                    ],
-                )]
-            )
-        ]
-        collector.collect(test_frames)
-
-        # Should have recorded calls for both functions
-        self.assertEqual(len(collector.result), 2)
-        self.assertIn(("file.py", 10, "func1"), collector.result)
-        self.assertIn(("file.py", 20, "func2"), collector.result)
-
-        # Top-level function should have direct call
-        self.assertEqual(
-            collector.result[("file.py", 10, "func1")]["direct_calls"], 1
-        )
-        self.assertEqual(
-            collector.result[("file.py", 10, "func1")]["cumulative_calls"], 1
-        )
-
-        # Calling function should have cumulative call but no direct calls
-        self.assertEqual(
-            collector.result[("file.py", 20, "func2")]["cumulative_calls"], 1
-        )
-        self.assertEqual(
-            collector.result[("file.py", 20, "func2")]["direct_calls"], 0
-        )
-
-    def test_pstats_collector_create_stats(self):
-        """Test PstatsCollector stats creation."""
-        collector = PstatsCollector(
-            sample_interval_usec=1000000
-        )  # 1 second intervals
-
-        test_frames = [
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [
-                        MockFrameInfo("file.py", 10, "func1"),
-                        MockFrameInfo("file.py", 20, "func2"),
-                    ],
-                )]
-            )
-        ]
-        collector.collect(test_frames)
-        collector.collect(test_frames)  # Collect twice
-
-        collector.create_stats()
-
-        # Check stats format: (direct_calls, cumulative_calls, tt, ct, callers)
-        func1_stats = collector.stats[("file.py", 10, "func1")]
-        self.assertEqual(func1_stats[0], 2)  # direct_calls (top of stack)
-        self.assertEqual(func1_stats[1], 2)  # cumulative_calls
-        self.assertEqual(
-            func1_stats[2], 2.0
-        )  # tt (total time - 2 samples * 1 sec)
-        self.assertEqual(func1_stats[3], 2.0)  # ct (cumulative time)
-
-        func2_stats = collector.stats[("file.py", 20, "func2")]
-        self.assertEqual(
-            func2_stats[0], 0
-        )  # direct_calls (never top of stack)
-        self.assertEqual(
-            func2_stats[1], 2
-        )  # cumulative_calls (appears in stack)
-        self.assertEqual(func2_stats[2], 0.0)  # tt (no direct calls)
-        self.assertEqual(func2_stats[3], 2.0)  # ct (cumulative time)
-
-    def test_collapsed_stack_collector_basic(self):
-        collector = CollapsedStackCollector()
-
-        # Test empty state
-        self.assertEqual(len(collector.stack_counter), 0)
-
-        # Test collecting sample data
-        test_frames = [
-            MockInterpreterInfo(0, [MockThreadInfo(1, [("file.py", 10, "func1"), ("file.py", 20, "func2")])])
-        ]
-        collector.collect(test_frames)
-
-        # Should store one reversed path
-        self.assertEqual(len(collector.stack_counter), 1)
-        ((path, thread_id), count), = collector.stack_counter.items()
-        expected_tree = (("file.py", 20, "func2"), ("file.py", 10, "func1"))
-        self.assertEqual(path, expected_tree)
-        self.assertEqual(thread_id, 1)
-        self.assertEqual(count, 1)
-
-    def test_collapsed_stack_collector_export(self):
-        collapsed_out = tempfile.NamedTemporaryFile(delete=False)
-        self.addCleanup(close_and_unlink, collapsed_out)
-
-        collector = CollapsedStackCollector()
-
-        test_frames1 = [
-            MockInterpreterInfo(0, [MockThreadInfo(1, [("file.py", 10, "func1"), ("file.py", 20, "func2")])])
-        ]
-        test_frames2 = [
-            MockInterpreterInfo(0, [MockThreadInfo(1, [("file.py", 10, "func1"), ("file.py", 20, "func2")])])
-        ]  # Same stack
-        test_frames3 = [MockInterpreterInfo(0, [MockThreadInfo(1, [("other.py", 5, "other_func")])])]
-
-        collector.collect(test_frames1)
-        collector.collect(test_frames2)
-        collector.collect(test_frames3)
-
-        with (captured_stdout(), captured_stderr()):
-            collector.export(collapsed_out.name)
-        # Check file contents
-        with open(collapsed_out.name, "r") as f:
-            content = f.read()
-
-        lines = content.strip().split("\n")
-        self.assertEqual(len(lines), 2)  # Two unique stacks
-
-        # Check collapsed format: tid:X;file:func:line;file:func:line count
-        stack1_expected = "tid:1;file.py:func2:20;file.py:func1:10 2"
-        stack2_expected = "tid:1;other.py:other_func:5 1"
-
-        self.assertIn(stack1_expected, lines)
-        self.assertIn(stack2_expected, lines)
-
-    def test_flamegraph_collector_basic(self):
-        """Test basic FlamegraphCollector functionality."""
-        collector = FlamegraphCollector()
-
-        # Empty collector should produce 'No Data'
-        data = collector._convert_to_flamegraph_format()
-        # With string table, name is now an index - resolve it using the strings array
-        strings = data.get("strings", [])
-        name_index = data.get("name", 0)
-        resolved_name = strings[name_index] if isinstance(name_index, int) and 0 <= name_index < len(strings) else str(name_index)
-        self.assertIn(resolved_name, ("No Data", "No significant data"))
-
-        # Test collecting sample data
-        test_frames = [
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(1, [("file.py", 10, "func1"), ("file.py", 20, "func2")])],
-            )
-        ]
-        collector.collect(test_frames)
-
-        # Convert and verify structure: func2 -> func1 with counts = 1
-        data = collector._convert_to_flamegraph_format()
-        # Expect promotion: root is the single child (func2), with func1 as its only child
-        strings = data.get("strings", [])
-        name_index = data.get("name", 0)
-        name = strings[name_index] if isinstance(name_index, int) and 0 <= name_index < len(strings) else str(name_index)
-        self.assertIsInstance(name, str)
-        self.assertTrue(name.startswith("Program Root: "))
-        self.assertIn("func2 (file.py:20)", name)  # formatted name
-        children = data.get("children", [])
-        self.assertEqual(len(children), 1)
-        child = children[0]
-        child_name_index = child.get("name", 0)
-        child_name = strings[child_name_index] if isinstance(child_name_index, int) and 0 <= child_name_index < len(strings) else str(child_name_index)
-        self.assertIn("func1 (file.py:10)", child_name)  # formatted name
-        self.assertEqual(child["value"], 1)
-
-    def test_flamegraph_collector_export(self):
-        """Test flamegraph HTML export functionality."""
-        flamegraph_out = tempfile.NamedTemporaryFile(
-            suffix=".html", delete=False
-        )
-        self.addCleanup(close_and_unlink, flamegraph_out)
-
-        collector = FlamegraphCollector()
-
-        # Create some test data (use Interpreter/Thread objects like runtime)
-        test_frames1 = [
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(1, [("file.py", 10, "func1"), ("file.py", 20, "func2")])],
-            )
-        ]
-        test_frames2 = [
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(1, [("file.py", 10, "func1"), ("file.py", 20, "func2")])],
-            )
-        ]  # Same stack
-        test_frames3 = [
-            MockInterpreterInfo(0, [MockThreadInfo(1, [("other.py", 5, "other_func")])])
-        ]
-
-        collector.collect(test_frames1)
-        collector.collect(test_frames2)
-        collector.collect(test_frames3)
-
-        # Export flamegraph
-        with (captured_stdout(), captured_stderr()):
-            collector.export(flamegraph_out.name)
-
-        # Verify file was created and contains valid data
-        self.assertTrue(os.path.exists(flamegraph_out.name))
-        self.assertGreater(os.path.getsize(flamegraph_out.name), 0)
-
-        # Check file contains HTML content
-        with open(flamegraph_out.name, "r", encoding="utf-8") as f:
-            content = f.read()
-
-        # Should be valid HTML
-        self.assertIn("<!doctype html>", content.lower())
-        self.assertIn("<html", content)
-        self.assertIn("Python Performance Flamegraph", content)
-        self.assertIn("d3-flame-graph", content)
-
-        # Should contain the data
-        self.assertIn('"name":', content)
-        self.assertIn('"value":', content)
-        self.assertIn('"children":', content)
-
-    def test_gecko_collector_basic(self):
-        """Test basic GeckoCollector functionality."""
-        collector = GeckoCollector()
-
-        # Test empty state
-        self.assertEqual(len(collector.threads), 0)
-        self.assertEqual(collector.sample_count, 0)
-        self.assertEqual(len(collector.global_strings), 1)  # "(root)"
-
-        # Test collecting sample data
-        test_frames = [
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [("file.py", 10, "func1"), ("file.py", 20, "func2")],
-                )]
-            )
-        ]
-        collector.collect(test_frames)
-
-        # Should have recorded one thread and one sample
-        self.assertEqual(len(collector.threads), 1)
-        self.assertEqual(collector.sample_count, 1)
-        self.assertIn(1, collector.threads)
-
-        profile_data = collector._build_profile()
-
-        # Verify profile structure
-        self.assertIn("meta", profile_data)
-        self.assertIn("threads", profile_data)
-        self.assertIn("shared", profile_data)
-
-        # Check shared string table
-        shared = profile_data["shared"]
-        self.assertIn("stringArray", shared)
-        string_array = shared["stringArray"]
-        self.assertGreater(len(string_array), 0)
-
-        # Should contain our functions in the string array
-        self.assertIn("func1", string_array)
-        self.assertIn("func2", string_array)
-
-        # Check thread data structure
-        threads = profile_data["threads"]
-        self.assertEqual(len(threads), 1)
-        thread_data = threads[0]
-
-        # Verify thread structure
-        self.assertIn("samples", thread_data)
-        self.assertIn("funcTable", thread_data)
-        self.assertIn("frameTable", thread_data)
-        self.assertIn("stackTable", thread_data)
-
-        # Verify samples
-        samples = thread_data["samples"]
-        self.assertEqual(len(samples["stack"]), 1)
-        self.assertEqual(len(samples["time"]), 1)
-        self.assertEqual(samples["length"], 1)
-
-        # Verify function table structure and content
-        func_table = thread_data["funcTable"]
-        self.assertIn("name", func_table)
-        self.assertIn("fileName", func_table)
-        self.assertIn("lineNumber", func_table)
-        self.assertEqual(func_table["length"], 2)  # Should have 2 functions
-
-        # Verify actual function content through string array indices
-        func_names = []
-        for idx in func_table["name"]:
-            func_name = string_array[idx] if isinstance(idx, int) and 0 <= idx < len(string_array) else str(idx)
-            func_names.append(func_name)
-
-        self.assertIn("func1", func_names, f"func1 not found in {func_names}")
-        self.assertIn("func2", func_names, f"func2 not found in {func_names}")
-
-        # Verify frame table
-        frame_table = thread_data["frameTable"]
-        self.assertEqual(frame_table["length"], 2)  # Should have frames for both functions
-        self.assertEqual(len(frame_table["func"]), 2)
-
-        # Verify stack structure
-        stack_table = thread_data["stackTable"]
-        self.assertGreater(stack_table["length"], 0)
-        self.assertGreater(len(stack_table["frame"]), 0)
-
-    def test_gecko_collector_export(self):
-        """Test Gecko profile export functionality."""
-        gecko_out = tempfile.NamedTemporaryFile(suffix=".json", delete=False)
-        self.addCleanup(close_and_unlink, gecko_out)
-
-        collector = GeckoCollector()
-
-        test_frames1 = [
-            MockInterpreterInfo(0, [MockThreadInfo(1, [("file.py", 10, "func1"), ("file.py", 20, "func2")])])
-        ]
-        test_frames2 = [
-            MockInterpreterInfo(0, [MockThreadInfo(1, [("file.py", 10, "func1"), ("file.py", 20, "func2")])])
-        ]  # Same stack
-        test_frames3 = [MockInterpreterInfo(0, [MockThreadInfo(1, [("other.py", 5, "other_func")])])]
-
-        collector.collect(test_frames1)
-        collector.collect(test_frames2)
-        collector.collect(test_frames3)
-
-        # Export gecko profile
-        with (captured_stdout(), captured_stderr()):
-            collector.export(gecko_out.name)
-
-        # Verify file was created and contains valid data
-        self.assertTrue(os.path.exists(gecko_out.name))
-        self.assertGreater(os.path.getsize(gecko_out.name), 0)
-
-        # Check file contains valid JSON
-        with open(gecko_out.name, "r") as f:
-            profile_data = json.load(f)
-
-        # Should be valid Gecko profile format
-        self.assertIn("meta", profile_data)
-        self.assertIn("threads", profile_data)
-        self.assertIn("shared", profile_data)
-
-        # Check meta information
-        self.assertIn("categories", profile_data["meta"])
-        self.assertIn("interval", profile_data["meta"])
-
-        # Check shared string table
-        self.assertIn("stringArray", profile_data["shared"])
-        self.assertGreater(len(profile_data["shared"]["stringArray"]), 0)
-
-        # Should contain our functions
-        string_array = profile_data["shared"]["stringArray"]
-        self.assertIn("func1", string_array)
-        self.assertIn("func2", string_array)
-        self.assertIn("other_func", string_array)
-
-    def test_gecko_collector_markers(self):
-        """Test Gecko profile markers for GIL and CPU state tracking."""
-        try:
-            from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_GIL_REQUESTED
-        except ImportError:
-            THREAD_STATUS_HAS_GIL = (1 << 0)
-            THREAD_STATUS_ON_CPU = (1 << 1)
-            THREAD_STATUS_GIL_REQUESTED = (1 << 3)
-
-        collector = GeckoCollector()
-
-        # Status combinations for different thread states
-        HAS_GIL_ON_CPU = THREAD_STATUS_HAS_GIL | THREAD_STATUS_ON_CPU  # Running Python code
-        NO_GIL_ON_CPU = THREAD_STATUS_ON_CPU  # Running native code
-        WAITING_FOR_GIL = THREAD_STATUS_GIL_REQUESTED  # Waiting for GIL
-
-        # Simulate thread state transitions
-        collector.collect([
-            MockInterpreterInfo(0, [
-                MockThreadInfo(1, [("test.py", 10, "python_func")], status=HAS_GIL_ON_CPU)
-            ])
-        ])
-
-        collector.collect([
-            MockInterpreterInfo(0, [
-                MockThreadInfo(1, [("test.py", 15, "wait_func")], status=WAITING_FOR_GIL)
-            ])
-        ])
-
-        collector.collect([
-            MockInterpreterInfo(0, [
-                MockThreadInfo(1, [("test.py", 20, "python_func2")], status=HAS_GIL_ON_CPU)
-            ])
-        ])
-
-        collector.collect([
-            MockInterpreterInfo(0, [
-                MockThreadInfo(1, [("native.c", 100, "native_func")], status=NO_GIL_ON_CPU)
-            ])
-        ])
-
-        profile_data = collector._build_profile()
-
-        # Verify we have threads with markers
-        self.assertIn("threads", profile_data)
-        self.assertEqual(len(profile_data["threads"]), 1)
-        thread_data = profile_data["threads"][0]
-
-        # Check markers exist
-        self.assertIn("markers", thread_data)
-        markers = thread_data["markers"]
-
-        # Should have marker arrays
-        self.assertIn("name", markers)
-        self.assertIn("startTime", markers)
-        self.assertIn("endTime", markers)
-        self.assertIn("category", markers)
-        self.assertGreater(markers["length"], 0, "Should have generated markers")
-
-        # Get marker names from string table
-        string_array = profile_data["shared"]["stringArray"]
-        marker_names = [string_array[idx] for idx in markers["name"]]
-
-        # Verify we have different marker types
-        marker_name_set = set(marker_names)
-
-        # Should have "Has GIL" markers (when thread had GIL)
-        self.assertIn("Has GIL", marker_name_set, "Should have 'Has GIL' markers")
-
-        # Should have "No GIL" markers (when thread didn't have GIL)
-        self.assertIn("No GIL", marker_name_set, "Should have 'No GIL' markers")
-
-        # Should have "On CPU" markers (when thread was on CPU)
-        self.assertIn("On CPU", marker_name_set, "Should have 'On CPU' markers")
-
-        # Should have "Waiting for GIL" markers (when thread was waiting)
-        self.assertIn("Waiting for GIL", marker_name_set, "Should have 'Waiting for GIL' markers")
-
-        # Verify marker structure
-        for i in range(markers["length"]):
-            # All markers should be interval markers (phase = 1)
-            self.assertEqual(markers["phase"][i], 1, f"Marker {i} should be interval marker")
-
-            # All markers should have valid time range
-            start_time = markers["startTime"][i]
-            end_time = markers["endTime"][i]
-            self.assertLessEqual(start_time, end_time, f"Marker {i} should have valid time range")
-
-            # All markers should have valid category
-            self.assertGreaterEqual(markers["category"][i], 0, f"Marker {i} should have valid category")
-
-    def test_pstats_collector_export(self):
-        collector = PstatsCollector(
-            sample_interval_usec=1000000
-        )  # 1 second intervals
-
-        test_frames1 = [
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [
-                        MockFrameInfo("file.py", 10, "func1"),
-                        MockFrameInfo("file.py", 20, "func2"),
-                    ],
-                )]
-            )
-        ]
-        test_frames2 = [
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [
-                        MockFrameInfo("file.py", 10, "func1"),
-                        MockFrameInfo("file.py", 20, "func2"),
-                    ],
-                )]
-            )
-        ]  # Same stack
-        test_frames3 = [MockInterpreterInfo(0, [MockThreadInfo(1, [MockFrameInfo("other.py", 5, "other_func")])])]
-
-        collector.collect(test_frames1)
-        collector.collect(test_frames2)
-        collector.collect(test_frames3)
-
-        pstats_out = tempfile.NamedTemporaryFile(
-            suffix=".pstats", delete=False
-        )
-        self.addCleanup(close_and_unlink, pstats_out)
-        collector.export(pstats_out.name)
-
-        # Check file can be loaded with marshal
-        with open(pstats_out.name, "rb") as f:
-            stats_data = marshal.load(f)
-
-        # Should be a dictionary with the sampled marker
-        self.assertIsInstance(stats_data, dict)
-        self.assertIn(("__sampled__",), stats_data)
-        self.assertTrue(stats_data[("__sampled__",)])
-
-        # Should have function data
-        function_entries = [
-            k for k in stats_data.keys() if k != ("__sampled__",)
-        ]
-        self.assertGreater(len(function_entries), 0)
-
-        # Check specific function stats format: (cc, nc, tt, ct, callers)
-        func1_key = ("file.py", 10, "func1")
-        func2_key = ("file.py", 20, "func2")
-        other_key = ("other.py", 5, "other_func")
-
-        self.assertIn(func1_key, stats_data)
-        self.assertIn(func2_key, stats_data)
-        self.assertIn(other_key, stats_data)
-
-        # Check func1 stats (should have 2 samples)
-        func1_stats = stats_data[func1_key]
-        self.assertEqual(func1_stats[0], 2)  # total_calls
-        self.assertEqual(func1_stats[1], 2)  # nc (non-recursive calls)
-        self.assertEqual(func1_stats[2], 2.0)  # tt (total time)
-        self.assertEqual(func1_stats[3], 2.0)  # ct (cumulative time)
-
-
-class TestSampleProfiler(unittest.TestCase):
-    """Test the SampleProfiler class."""
-
-    def test_sample_profiler_initialization(self):
-        """Test SampleProfiler initialization with various parameters."""
-        from profiling.sampling.sample import SampleProfiler
-
-        # Mock RemoteUnwinder to avoid permission issues
-        with mock.patch(
-            "_remote_debugging.RemoteUnwinder"
-        ) as mock_unwinder_class:
-            mock_unwinder_class.return_value = mock.MagicMock()
-
-            # Test basic initialization
-            profiler = SampleProfiler(
-                pid=12345, sample_interval_usec=1000, all_threads=False
-            )
-            self.assertEqual(profiler.pid, 12345)
-            self.assertEqual(profiler.sample_interval_usec, 1000)
-            self.assertEqual(profiler.all_threads, False)
-
-            # Test with all_threads=True
-            profiler = SampleProfiler(
-                pid=54321, sample_interval_usec=5000, all_threads=True
-            )
-            self.assertEqual(profiler.pid, 54321)
-            self.assertEqual(profiler.sample_interval_usec, 5000)
-            self.assertEqual(profiler.all_threads, True)
-
-    def test_sample_profiler_sample_method_timing(self):
-        """Test that the sample method respects duration and handles timing correctly."""
-        from profiling.sampling.sample import SampleProfiler
-
-        # Mock the unwinder to avoid needing a real process
-        mock_unwinder = mock.MagicMock()
-        mock_unwinder.get_stack_trace.return_value = [
-            (
-                1,
-                [
-                    mock.MagicMock(
-                        filename="test.py", lineno=10, funcname="test_func"
-                    )
-                ],
-            )
-        ]
-
-        with mock.patch(
-            "_remote_debugging.RemoteUnwinder"
-        ) as mock_unwinder_class:
-            mock_unwinder_class.return_value = mock_unwinder
-
-            profiler = SampleProfiler(
-                pid=12345, sample_interval_usec=100000, all_threads=False
-            )  # 100ms interval
-
-            # Mock collector
-            mock_collector = mock.MagicMock()
-
-            # Mock time to control the sampling loop
-            start_time = 1000.0
-            times = [
-                start_time + i * 0.1 for i in range(12)
-            ]  # 0, 0.1, 0.2, ..., 1.1 seconds
-
-            with mock.patch("time.perf_counter", side_effect=times):
-                with io.StringIO() as output:
-                    with mock.patch("sys.stdout", output):
-                        profiler.sample(mock_collector, duration_sec=1)
-
-                    result = output.getvalue()
-
-            # Should have captured approximately 10 samples (1 second / 0.1 second interval)
-            self.assertIn("Captured", result)
-            self.assertIn("samples", result)
-
-            # Verify collector was called multiple times
-            self.assertGreaterEqual(mock_collector.collect.call_count, 5)
-            self.assertLessEqual(mock_collector.collect.call_count, 11)
-
-    def test_sample_profiler_error_handling(self):
-        """Test that the sample method handles errors gracefully."""
-        from profiling.sampling.sample import SampleProfiler
-
-        # Mock unwinder that raises errors
-        mock_unwinder = mock.MagicMock()
-        error_sequence = [
-            RuntimeError("Process died"),
-            [
-                (
-                    1,
-                    [
-                        mock.MagicMock(
-                            filename="test.py", lineno=10, funcname="test_func"
-                        )
-                    ],
-                )
-            ],
-            UnicodeDecodeError("utf-8", b"", 0, 1, "invalid"),
-            [
-                (
-                    1,
-                    [
-                        mock.MagicMock(
-                            filename="test.py",
-                            lineno=20,
-                            funcname="test_func2",
-                        )
-                    ],
-                )
-            ],
-            OSError("Permission denied"),
-        ]
-        mock_unwinder.get_stack_trace.side_effect = error_sequence
-
-        with mock.patch(
-            "_remote_debugging.RemoteUnwinder"
-        ) as mock_unwinder_class:
-            mock_unwinder_class.return_value = mock_unwinder
-
-            profiler = SampleProfiler(
-                pid=12345, sample_interval_usec=10000, all_threads=False
-            )
-
-            mock_collector = mock.MagicMock()
-
-            # Control timing to run exactly 5 samples
-            times = [0.0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06]
-
-            with mock.patch("time.perf_counter", side_effect=times):
-                with io.StringIO() as output:
-                    with mock.patch("sys.stdout", output):
-                        profiler.sample(mock_collector, duration_sec=0.05)
-
-                    result = output.getvalue()
-
-            # Should report error rate
-            self.assertIn("Error rate:", result)
-            self.assertIn("%", result)
-
-            # Collector should have been called only for successful samples (should be > 0)
-            self.assertGreater(mock_collector.collect.call_count, 0)
-            self.assertLessEqual(mock_collector.collect.call_count, 3)
-
-    def test_sample_profiler_missed_samples_warning(self):
-        """Test that the profiler warns about missed samples when sampling is too slow."""
-        from profiling.sampling.sample import SampleProfiler
-
-        mock_unwinder = mock.MagicMock()
-        mock_unwinder.get_stack_trace.return_value = [
-            (
-                1,
-                [
-                    mock.MagicMock(
-                        filename="test.py", lineno=10, funcname="test_func"
-                    )
-                ],
-            )
-        ]
-
-        with mock.patch(
-            "_remote_debugging.RemoteUnwinder"
-        ) as mock_unwinder_class:
-            mock_unwinder_class.return_value = mock_unwinder
-
-            # Use very short interval that we'll miss
-            profiler = SampleProfiler(
-                pid=12345, sample_interval_usec=1000, all_threads=False
-            )  # 1ms interval
-
-            mock_collector = mock.MagicMock()
-
-            # Simulate slow sampling where we miss many samples
-            times = [
-                0.0,
-                0.1,
-                0.2,
-                0.3,
-                0.4,
-                0.5,
-                0.6,
-                0.7,
-            ]  # Extra time points to avoid StopIteration
-
-            with mock.patch("time.perf_counter", side_effect=times):
-                with io.StringIO() as output:
-                    with mock.patch("sys.stdout", output):
-                        profiler.sample(mock_collector, duration_sec=0.5)
-
-                    result = output.getvalue()
-
-            # Should warn about missed samples
-            self.assertIn("Warning: missed", result)
-            self.assertIn("samples from the expected total", result)
-
-
-@force_not_colorized_test_class
-class TestPrintSampledStats(unittest.TestCase):
-    """Test the print_sampled_stats function."""
-
-    def setUp(self):
-        """Set up test data."""
-        # Mock stats data
-        self.mock_stats = mock.MagicMock()
-        self.mock_stats.stats = {
-            ("file1.py", 10, "func1"): (
-                100,
-                100,
-                0.5,
-                0.5,
-                {},
-            ),  # cc, nc, tt, ct, callers
-            ("file2.py", 20, "func2"): (50, 50, 0.25, 0.3, {}),
-            ("file3.py", 30, "func3"): (200, 200, 1.5, 2.0, {}),
-            ("file4.py", 40, "func4"): (
-                10,
-                10,
-                0.001,
-                0.001,
-                {},
-            ),  # millisecond range
-            ("file5.py", 50, "func5"): (
-                5,
-                5,
-                0.000001,
-                0.000002,
-                {},
-            ),  # microsecond range
-        }
-
-    def test_print_sampled_stats_basic(self):
-        """Test basic print_sampled_stats functionality."""
-        from profiling.sampling.sample import print_sampled_stats
-
-        # Capture output
-        with io.StringIO() as output:
-            with mock.patch("sys.stdout", output):
-                print_sampled_stats(self.mock_stats, sample_interval_usec=100)
-
-            result = output.getvalue()
-
-        # Check header is present
-        self.assertIn("Profile Stats:", result)
-        self.assertIn("nsamples", result)
-        self.assertIn("tottime", result)
-        self.assertIn("cumtime", result)
-
-        # Check functions are present
-        self.assertIn("func1", result)
-        self.assertIn("func2", result)
-        self.assertIn("func3", result)
-
-    def test_print_sampled_stats_sorting(self):
-        """Test different sorting options."""
-        from profiling.sampling.sample import print_sampled_stats
-
-        # Test sort by calls
-        with io.StringIO() as output:
-            with mock.patch("sys.stdout", output):
-                print_sampled_stats(
-                    self.mock_stats, sort=0, sample_interval_usec=100
-                )
-
-            result = output.getvalue()
-            lines = result.strip().split("\n")
-
-        # Find the data lines (skip header)
-        data_lines = [l for l in lines if "file" in l and ".py" in l]
-        # func3 should be first (200 calls)
-        self.assertIn("func3", data_lines[0])
-
-        # Test sort by time
-        with io.StringIO() as output:
-            with mock.patch("sys.stdout", output):
-                print_sampled_stats(
-                    self.mock_stats, sort=1, sample_interval_usec=100
-                )
-
-            result = output.getvalue()
-            lines = result.strip().split("\n")
-
-        data_lines = [l for l in lines if "file" in l and ".py" in l]
-        # func3 should be first (1.5s time)
-        self.assertIn("func3", data_lines[0])
-
-    def test_print_sampled_stats_limit(self):
-        """Test limiting output rows."""
-        from profiling.sampling.sample import print_sampled_stats
-
-        with io.StringIO() as output:
-            with mock.patch("sys.stdout", output):
-                print_sampled_stats(
-                    self.mock_stats, limit=2, sample_interval_usec=100
-                )
-
-            result = output.getvalue()
-
-        # Count function entries in the main stats section (not in summary)
-        lines = result.split("\n")
-        # Find where the main stats section ends (before summary)
-        main_section_lines = []
-        for line in lines:
-            if "Summary of Interesting Functions:" in line:
-                break
-            main_section_lines.append(line)
-
-        # Count function entries only in main section
-        func_count = sum(
-            1
-            for line in main_section_lines
-            if "func" in line and ".py" in line
-        )
-        self.assertEqual(func_count, 2)
-
-    def test_print_sampled_stats_time_units(self):
-        """Test proper time unit selection."""
-        from profiling.sampling.sample import print_sampled_stats
-
-        with io.StringIO() as output:
-            with mock.patch("sys.stdout", output):
-                print_sampled_stats(self.mock_stats, sample_interval_usec=100)
-
-            result = output.getvalue()
-
-        # Should use seconds for the header since max time is > 1s
-        self.assertIn("tottime (s)", result)
-        self.assertIn("cumtime (s)", result)
-
-        # Test with only microsecond-range times
-        micro_stats = mock.MagicMock()
-        micro_stats.stats = {
-            ("file1.py", 10, "func1"): (100, 100, 0.000005, 0.000010, {}),
-        }
-
-        with io.StringIO() as output:
-            with mock.patch("sys.stdout", output):
-                print_sampled_stats(micro_stats, sample_interval_usec=100)
-
-            result = output.getvalue()
-
-        # Should use microseconds
-        self.assertIn("tottime (μs)", result)
-        self.assertIn("cumtime (μs)", result)
-
-    def test_print_sampled_stats_summary(self):
-        """Test summary section generation."""
-        from profiling.sampling.sample import print_sampled_stats
-
-        with io.StringIO() as output:
-            with mock.patch("sys.stdout", output):
-                print_sampled_stats(
-                    self.mock_stats,
-                    show_summary=True,
-                    sample_interval_usec=100,
-                )
-
-            result = output.getvalue()
-
-        # Check summary sections are present
-        self.assertIn("Summary of Interesting Functions:", result)
-        self.assertIn(
-            "Functions with Highest Direct/Cumulative Ratio (Hot Spots):",
-            result,
-        )
-        self.assertIn(
-            "Functions with Highest Call Frequency (Indirect Calls):", result
-        )
-        self.assertIn(
-            "Functions with Highest Call Magnification (Cumulative/Direct):",
-            result,
-        )
-
-    def test_print_sampled_stats_no_summary(self):
-        """Test disabling summary output."""
-        from profiling.sampling.sample import print_sampled_stats
-
-        with io.StringIO() as output:
-            with mock.patch("sys.stdout", output):
-                print_sampled_stats(
-                    self.mock_stats,
-                    show_summary=False,
-                    sample_interval_usec=100,
-                )
-
-            result = output.getvalue()
-
-        # Summary should not be present
-        self.assertNotIn("Summary of Interesting Functions:", result)
-
-    def test_print_sampled_stats_empty_stats(self):
-        """Test with empty stats."""
-        from profiling.sampling.sample import print_sampled_stats
-
-        empty_stats = mock.MagicMock()
-        empty_stats.stats = {}
-
-        with io.StringIO() as output:
-            with mock.patch("sys.stdout", output):
-                print_sampled_stats(empty_stats, sample_interval_usec=100)
-
-            result = output.getvalue()
-
-        # Should still print header
-        self.assertIn("Profile Stats:", result)
-
-    def test_print_sampled_stats_sample_percentage_sorting(self):
-        """Test sample percentage sorting options."""
-        from profiling.sampling.sample import print_sampled_stats
-
-        # Add a function with high sample percentage (more direct calls than func3's 200)
-        self.mock_stats.stats[("expensive.py", 60, "expensive_func")] = (
-            300,  # direct calls (higher than func3's 200)
-            300,  # cumulative calls
-            1.0,  # total time
-            1.0,  # cumulative time
-            {},
-        )
-
-        # Test sort by sample percentage
-        with io.StringIO() as output:
-            with mock.patch("sys.stdout", output):
-                print_sampled_stats(
-                    self.mock_stats, sort=3, sample_interval_usec=100
-                )  # sample percentage
-
-            result = output.getvalue()
-            lines = result.strip().split("\n")
-
-        data_lines = [l for l in lines if ".py" in l and "func" in l]
-        # expensive_func should be first (highest sample percentage)
-        self.assertIn("expensive_func", data_lines[0])
-
-    def test_print_sampled_stats_with_recursive_calls(self):
-        """Test print_sampled_stats with recursive calls where nc != cc."""
-        from profiling.sampling.sample import print_sampled_stats
-
-        # Create stats with recursive calls (nc != cc)
-        recursive_stats = mock.MagicMock()
-        recursive_stats.stats = {
-            # (direct_calls, cumulative_calls, tt, ct, callers) - recursive function
-            ("recursive.py", 10, "factorial"): (
-                5,  # direct_calls
-                10,  # cumulative_calls (appears more times in stack due to recursion)
-                0.5,
-                0.6,
-                {},
-            ),
-            ("normal.py", 20, "normal_func"): (
-                3,  # direct_calls
-                3,  # cumulative_calls (same as direct for non-recursive)
-                0.2,
-                0.2,
-                {},
-            ),
-        }
-
-        with io.StringIO() as output:
-            with mock.patch("sys.stdout", output):
-                print_sampled_stats(recursive_stats, sample_interval_usec=100)
-
-            result = output.getvalue()
-
-        # Should display recursive calls as "5/10" format
-        self.assertIn("5/10", result)  # nc/cc format for recursive calls
-        self.assertIn("3", result)  # just nc for non-recursive calls
-        self.assertIn("factorial", result)
-        self.assertIn("normal_func", result)
-
-    def test_print_sampled_stats_with_zero_call_counts(self):
-        """Test print_sampled_stats with zero call counts to trigger division protection."""
-        from profiling.sampling.sample import print_sampled_stats
-
-        # Create stats with zero call counts
-        zero_stats = mock.MagicMock()
-        zero_stats.stats = {
-            ("file.py", 10, "zero_calls"): (0, 0, 0.0, 0.0, {}),  # Zero calls
-            ("file.py", 20, "normal_func"): (
-                5,
-                5,
-                0.1,
-                0.1,
-                {},
-            ),  # Normal function
-        }
-
-        with io.StringIO() as output:
-            with mock.patch("sys.stdout", output):
-                print_sampled_stats(zero_stats, sample_interval_usec=100)
-
-            result = output.getvalue()
-
-        # Should handle zero call counts gracefully
-        self.assertIn("zero_calls", result)
-        self.assertIn("zero_calls", result)
-        self.assertIn("normal_func", result)
-
-    def test_print_sampled_stats_sort_by_name(self):
-        """Test sort by function name option."""
-        from profiling.sampling.sample import print_sampled_stats
-
-        with io.StringIO() as output:
-            with mock.patch("sys.stdout", output):
-                print_sampled_stats(
-                    self.mock_stats, sort=-1, sample_interval_usec=100
-                )  # sort by name
-
-            result = output.getvalue()
-            lines = result.strip().split("\n")
-
-        # Find the data lines (skip header and summary)
-        # Data lines start with whitespace and numbers, and contain filename:lineno(function)
-        data_lines = []
-        for line in lines:
-            # Skip header lines and summary sections
-            if (
-                line.startswith("     ")
-                and "(" in line
-                and ")" in line
-                and not line.startswith(
-                    "     1."
-                )  # Skip summary lines that start with times
-                and not line.startswith(
-                    "     0."
-                )  # Skip summary lines that start with times
-                and not "per call" in line  # Skip summary lines
-                and not "calls" in line  # Skip summary lines
-                and not "total time" in line  # Skip summary lines
-                and not "cumulative time" in line
-            ):  # Skip summary lines
-                data_lines.append(line)
-
-        # Extract just the function names for comparison
-        func_names = []
-        import re
-
-        for line in data_lines:
-            # Function name is between the last ( and ), accounting for ANSI color codes
-            match = re.search(r"\(([^)]+)\)$", line)
-            if match:
-                func_name = match.group(1)
-                # Remove ANSI color codes
-                func_name = re.sub(r"\x1b\[[0-9;]*m", "", func_name)
-                func_names.append(func_name)
-
-        # Verify we extracted function names and they are sorted
-        self.assertGreater(
-            len(func_names), 0, "Should have extracted some function names"
-        )
-        self.assertEqual(
-            func_names,
-            sorted(func_names),
-            f"Function names {func_names} should be sorted alphabetically",
-        )
-
-    def test_print_sampled_stats_with_zero_time_functions(self):
-        """Test summary sections with functions that have zero time."""
-        from profiling.sampling.sample import print_sampled_stats
-
-        # Create stats with zero-time functions
-        zero_time_stats = mock.MagicMock()
-        zero_time_stats.stats = {
-            ("file1.py", 10, "zero_time_func"): (
-                5,
-                5,
-                0.0,
-                0.0,
-                {},
-            ),  # Zero time
-            ("file2.py", 20, "normal_func"): (
-                3,
-                3,
-                0.1,
-                0.1,
-                {},
-            ),  # Normal time
-        }
-
-        with io.StringIO() as output:
-            with mock.patch("sys.stdout", output):
-                print_sampled_stats(
-                    zero_time_stats,
-                    show_summary=True,
-                    sample_interval_usec=100,
-                )
-
-            result = output.getvalue()
-
-        # Should handle zero-time functions gracefully in summary
-        self.assertIn("Summary of Interesting Functions:", result)
-        self.assertIn("zero_time_func", result)
-        self.assertIn("normal_func", result)
-
-    def test_print_sampled_stats_with_malformed_qualified_names(self):
-        """Test summary generation with function names that don't contain colons."""
-        from profiling.sampling.sample import print_sampled_stats
-
-        # Create stats with function names that would create malformed qualified names
-        malformed_stats = mock.MagicMock()
-        malformed_stats.stats = {
-            # Function name without clear module separation
-            ("no_colon_func", 10, "func"): (3, 3, 0.1, 0.1, {}),
-            ("", 20, "empty_filename_func"): (2, 2, 0.05, 0.05, {}),
-            ("normal.py", 30, "normal_func"): (5, 5, 0.2, 0.2, {}),
-        }
-
-        with io.StringIO() as output:
-            with mock.patch("sys.stdout", output):
-                print_sampled_stats(
-                    malformed_stats,
-                    show_summary=True,
-                    sample_interval_usec=100,
-                )
-
-            result = output.getvalue()
-
-        # Should handle malformed names gracefully in summary aggregation
-        self.assertIn("Summary of Interesting Functions:", result)
-        # All function names should appear somewhere in the output
-        self.assertIn("func", result)
-        self.assertIn("empty_filename_func", result)
-        self.assertIn("normal_func", result)
-
-    def test_print_sampled_stats_with_recursive_call_stats_creation(self):
-        """Test create_stats with recursive call data to trigger total_rec_calls branch."""
-        collector = PstatsCollector(sample_interval_usec=1000000)  # 1 second
-
-        # Simulate recursive function data where total_rec_calls would be set
-        # We need to manually manipulate the collector result to test this branch
-        collector.result = {
-            ("recursive.py", 10, "factorial"): {
-                "total_rec_calls": 3,  # Non-zero recursive calls
-                "direct_calls": 5,
-                "cumulative_calls": 10,
-            },
-            ("normal.py", 20, "normal_func"): {
-                "total_rec_calls": 0,  # Zero recursive calls
-                "direct_calls": 2,
-                "cumulative_calls": 5,
-            },
-        }
-
-        collector.create_stats()
-
-        # Check that recursive calls are handled differently from non-recursive
-        factorial_stats = collector.stats[("recursive.py", 10, "factorial")]
-        normal_stats = collector.stats[("normal.py", 20, "normal_func")]
-
-        # factorial should use cumulative_calls (10) as nc
-        self.assertEqual(
-            factorial_stats[1], 10
-        )  # nc should be cumulative_calls
-        self.assertEqual(factorial_stats[0], 5)  # cc should be direct_calls
-
-        # normal_func should use cumulative_calls as nc
-        self.assertEqual(normal_stats[1], 5)  # nc should be cumulative_calls
-        self.assertEqual(normal_stats[0], 2)  # cc should be direct_calls
-
-
-@skip_if_not_supported
-@unittest.skipIf(
-    sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
-    "Test only runs on Linux with process_vm_readv support",
-)
-class TestRecursiveFunctionProfiling(unittest.TestCase):
-    """Test profiling of recursive functions and complex call patterns."""
-
-    def test_recursive_function_call_counting(self):
-        """Test that recursive function calls are counted correctly."""
-        collector = PstatsCollector(sample_interval_usec=1000)
-
-        # Simulate a recursive call pattern: fibonacci(5) calling itself
-        recursive_frames = [
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [  # First sample: deep in recursion
-                        MockFrameInfo("fib.py", 10, "fibonacci"),
-                        MockFrameInfo("fib.py", 10, "fibonacci"),  # recursive call
-                        MockFrameInfo(
-                            "fib.py", 10, "fibonacci"
-                        ),  # deeper recursion
-                        MockFrameInfo("fib.py", 10, "fibonacci"),  # even deeper
-                        MockFrameInfo("main.py", 5, "main"),  # main caller
-                    ],
-                )]
-            ),
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [  # Second sample: different recursion depth
-                        MockFrameInfo("fib.py", 10, "fibonacci"),
-                        MockFrameInfo("fib.py", 10, "fibonacci"),  # recursive call
-                        MockFrameInfo("main.py", 5, "main"),  # main caller
-                    ],
-                )]
-            ),
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [  # Third sample: back to deeper recursion
-                        MockFrameInfo("fib.py", 10, "fibonacci"),
-                        MockFrameInfo("fib.py", 10, "fibonacci"),
-                        MockFrameInfo("fib.py", 10, "fibonacci"),
-                        MockFrameInfo("main.py", 5, "main"),
-                    ],
-                )]
-            ),
-        ]
-
-        for frames in recursive_frames:
-            collector.collect([frames])
-
-        collector.create_stats()
-
-        # Check that recursive calls are counted properly
-        fib_key = ("fib.py", 10, "fibonacci")
-        main_key = ("main.py", 5, "main")
-
-        self.assertIn(fib_key, collector.stats)
-        self.assertIn(main_key, collector.stats)
-
-        # Fibonacci should have many calls due to recursion
-        fib_stats = collector.stats[fib_key]
-        direct_calls, cumulative_calls, tt, ct, callers = fib_stats
-
-        # Should have recorded multiple calls (9 total appearances in samples)
-        self.assertEqual(cumulative_calls, 9)
-        self.assertGreater(tt, 0)  # Should have some total time
-        self.assertGreater(ct, 0)  # Should have some cumulative time
-
-        # Main should have fewer calls
-        main_stats = collector.stats[main_key]
-        main_direct_calls, main_cumulative_calls = main_stats[0], main_stats[1]
-        self.assertEqual(main_direct_calls, 0)  # Never directly executing
-        self.assertEqual(main_cumulative_calls, 3)  # Appears in all 3 samples
-
-    def test_nested_function_hierarchy(self):
-        """Test profiling of deeply nested function calls."""
-        collector = PstatsCollector(sample_interval_usec=1000)
-
-        # Simulate a deep call hierarchy
-        deep_call_frames = [
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [
-                        MockFrameInfo("level1.py", 10, "level1_func"),
-                        MockFrameInfo("level2.py", 20, "level2_func"),
-                        MockFrameInfo("level3.py", 30, "level3_func"),
-                        MockFrameInfo("level4.py", 40, "level4_func"),
-                        MockFrameInfo("level5.py", 50, "level5_func"),
-                        MockFrameInfo("main.py", 5, "main"),
-                    ],
-                )]
-            ),
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [  # Same hierarchy sampled again
-                        MockFrameInfo("level1.py", 10, "level1_func"),
-                        MockFrameInfo("level2.py", 20, "level2_func"),
-                        MockFrameInfo("level3.py", 30, "level3_func"),
-                        MockFrameInfo("level4.py", 40, "level4_func"),
-                        MockFrameInfo("level5.py", 50, "level5_func"),
-                        MockFrameInfo("main.py", 5, "main"),
-                    ],
-                )]
-            ),
-        ]
-
-        for frames in deep_call_frames:
-            collector.collect([frames])
-
-        collector.create_stats()
-
-        # All levels should be recorded
-        for level in range(1, 6):
-            key = (f"level{level}.py", level * 10, f"level{level}_func")
-            self.assertIn(key, collector.stats)
-
-            stats = collector.stats[key]
-            direct_calls, cumulative_calls, tt, ct, callers = stats
-
-            # Each level should appear in stack twice (2 samples)
-            self.assertEqual(cumulative_calls, 2)
-
-            # Only level1 (deepest) should have direct calls
-            if level == 1:
-                self.assertEqual(direct_calls, 2)
-            else:
-                self.assertEqual(direct_calls, 0)
-
-            # Deeper levels should have lower cumulative time than higher levels
-            # (since they don't include time from functions they call)
-            if level == 1:  # Deepest level with most time
-                self.assertGreater(ct, 0)
-
-    def test_alternating_call_patterns(self):
-        """Test profiling with alternating call patterns."""
-        collector = PstatsCollector(sample_interval_usec=1000)
-
-        # Simulate alternating execution paths
-        pattern_frames = [
-            # Pattern A: path through func_a
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [
-                        MockFrameInfo("module.py", 10, "func_a"),
-                        MockFrameInfo("module.py", 30, "shared_func"),
-                        MockFrameInfo("main.py", 5, "main"),
-                    ],
-                )]
-            ),
-            # Pattern B: path through func_b
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [
-                        MockFrameInfo("module.py", 20, "func_b"),
-                        MockFrameInfo("module.py", 30, "shared_func"),
-                        MockFrameInfo("main.py", 5, "main"),
-                    ],
-                )]
-            ),
-            # Pattern A again
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [
-                        MockFrameInfo("module.py", 10, "func_a"),
-                        MockFrameInfo("module.py", 30, "shared_func"),
-                        MockFrameInfo("main.py", 5, "main"),
-                    ],
-                )]
-            ),
-            # Pattern B again
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [
-                        MockFrameInfo("module.py", 20, "func_b"),
-                        MockFrameInfo("module.py", 30, "shared_func"),
-                        MockFrameInfo("main.py", 5, "main"),
-                    ],
-                )]
-            ),
-        ]
-
-        for frames in pattern_frames:
-            collector.collect([frames])
-
-        collector.create_stats()
-
-        # Check that both paths are recorded equally
-        func_a_key = ("module.py", 10, "func_a")
-        func_b_key = ("module.py", 20, "func_b")
-        shared_key = ("module.py", 30, "shared_func")
-        main_key = ("main.py", 5, "main")
-
-        # func_a and func_b should each be directly executing twice
-        self.assertEqual(collector.stats[func_a_key][0], 2)  # direct_calls
-        self.assertEqual(collector.stats[func_a_key][1], 2)  # cumulative_calls
-        self.assertEqual(collector.stats[func_b_key][0], 2)  # direct_calls
-        self.assertEqual(collector.stats[func_b_key][1], 2)  # cumulative_calls
-
-        # shared_func should appear in all samples (4 times) but never directly executing
-        self.assertEqual(collector.stats[shared_key][0], 0)  # direct_calls
-        self.assertEqual(collector.stats[shared_key][1], 4)  # cumulative_calls
-
-        # main should appear in all samples but never directly executing
-        self.assertEqual(collector.stats[main_key][0], 0)  # direct_calls
-        self.assertEqual(collector.stats[main_key][1], 4)  # cumulative_calls
-
-    def test_collapsed_stack_with_recursion(self):
-        """Test collapsed stack collector with recursive patterns."""
-        collector = CollapsedStackCollector()
-
-        # Recursive call pattern
-        recursive_frames = [
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [
-                        ("factorial.py", 10, "factorial"),
-                        ("factorial.py", 10, "factorial"),  # recursive
-                        ("factorial.py", 10, "factorial"),  # deeper
-                        ("main.py", 5, "main"),
-                    ],
-                )]
-            ),
-            MockInterpreterInfo(
-                0,
-                [MockThreadInfo(
-                    1,
-                    [
-                        ("factorial.py", 10, "factorial"),
-                        ("factorial.py", 10, "factorial"),  # different depth
-                        ("main.py", 5, "main"),
-                    ],
-                )]
-            ),
-        ]
-
-        for frames in recursive_frames:
-            collector.collect([frames])
-
-        # Should capture both call paths
-        self.assertEqual(len(collector.stack_counter), 2)
-
-        # First path should be longer (deeper recursion) than the second
-        path_tuples = list(collector.stack_counter.keys())
-        paths = [p[0] for p in path_tuples]  # Extract just the call paths
-        lengths = [len(p) for p in paths]
-        self.assertNotEqual(lengths[0], lengths[1])
-
-        # Both should contain factorial calls
-        self.assertTrue(any(any(f[2] == "factorial" for f in p) for p in paths))
-
-        # Verify total occurrences via aggregation
-        factorial_key = ("factorial.py", 10, "factorial")
-        main_key = ("main.py", 5, "main")
-
-        def total_occurrences(func):
-            total = 0
-            for (path, thread_id), count in collector.stack_counter.items():
-                total += sum(1 for f in path if f == func) * count
-            return total
-
-        self.assertEqual(total_occurrences(factorial_key), 5)
-        self.assertEqual(total_occurrences(main_key), 2)
-
-
-@requires_subprocess()
-@skip_if_not_supported
-class TestSampleProfilerIntegration(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.test_script = '''
-import time
-import os
-
-def slow_fibonacci(n):
-    """Recursive fibonacci - should show up prominently in profiler."""
-    if n <= 1:
-        return n
-    return slow_fibonacci(n-1) + slow_fibonacci(n-2)
-
-def cpu_intensive_work():
-    """CPU intensive work that should show in profiler."""
-    result = 0
-    for i in range(10000):
-        result += i * i
-        if i % 100 == 0:
-            result = result % 1000000
-    return result
-
-def medium_computation():
-    """Medium complexity function."""
-    result = 0
-    for i in range(100):
-        result += i * i
-    return result
-
-def fast_loop():
-    """Fast simple loop."""
-    total = 0
-    for i in range(50):
-        total += i
-    return total
-
-def nested_calls():
-    """Test nested function calls."""
-    def level1():
-        def level2():
-            return medium_computation()
-        return level2()
-    return level1()
-
-def main_loop():
-    """Main test loop with different execution paths."""
-    iteration = 0
-
-    while True:
-        iteration += 1
-
-        # Different execution paths - focus on CPU intensive work
-        if iteration % 3 == 0:
-            # Very CPU intensive
-            result = cpu_intensive_work()
-        elif iteration % 5 == 0:
-            # Expensive recursive operation
-            result = slow_fibonacci(12)
-        else:
-            # Medium operation
-            result = nested_calls()
-
-        # No sleep - keep CPU busy
-
-if __name__ == "__main__":
-    main_loop()
-'''
-
-    def test_sampling_basic_functionality(self):
-        with (
-            test_subprocess(self.test_script) as subproc,
-            io.StringIO() as captured_output,
-            mock.patch("sys.stdout", captured_output),
-        ):
-            try:
-                profiling.sampling.sample.sample(
-                    subproc.process.pid,
-                    duration_sec=2,
-                    sample_interval_usec=1000,  # 1ms
-                    show_summary=False,
-                )
-            except PermissionError:
-                self.skipTest("Insufficient permissions for remote profiling")
-
-            output = captured_output.getvalue()
-
-        # Basic checks on output
-        self.assertIn("Captured", output)
-        self.assertIn("samples", output)
-        self.assertIn("Profile Stats", output)
-
-        # Should see some of our test functions
-        self.assertIn("slow_fibonacci", output)
-
-    def test_sampling_with_pstats_export(self):
-        pstats_out = tempfile.NamedTemporaryFile(
-            suffix=".pstats", delete=False
-        )
-        self.addCleanup(close_and_unlink, pstats_out)
-
-        with test_subprocess(self.test_script) as subproc:
-            # Suppress profiler output when testing file export
-            with (
-                io.StringIO() as captured_output,
-                mock.patch("sys.stdout", captured_output),
-            ):
-                try:
-                    profiling.sampling.sample.sample(
-                        subproc.process.pid,
-                        duration_sec=1,
-                        filename=pstats_out.name,
-                        sample_interval_usec=10000,
-                    )
-                except PermissionError:
-                    self.skipTest(
-                        "Insufficient permissions for remote profiling"
-                    )
-
-            # Verify file was created and contains valid data
-            self.assertTrue(os.path.exists(pstats_out.name))
-            self.assertGreater(os.path.getsize(pstats_out.name), 0)
-
-            # Try to load the stats file
-            with open(pstats_out.name, "rb") as f:
-                stats_data = marshal.load(f)
-
-            # Should be a dictionary with the sampled marker
-            self.assertIsInstance(stats_data, dict)
-            self.assertIn(("__sampled__",), stats_data)
-            self.assertTrue(stats_data[("__sampled__",)])
-
-            # Should have some function data
-            function_entries = [
-                k for k in stats_data.keys() if k != ("__sampled__",)
-            ]
-            self.assertGreater(len(function_entries), 0)
-
-    def test_sampling_with_collapsed_export(self):
-        collapsed_file = tempfile.NamedTemporaryFile(
-            suffix=".txt", delete=False
-        )
-        self.addCleanup(close_and_unlink, collapsed_file)
-
-        with (
-            test_subprocess(self.test_script) as subproc,
-        ):
-            # Suppress profiler output when testing file export
-            with (
-                io.StringIO() as captured_output,
-                mock.patch("sys.stdout", captured_output),
-            ):
-                try:
-                    profiling.sampling.sample.sample(
-                        subproc.process.pid,
-                        duration_sec=1,
-                        filename=collapsed_file.name,
-                        output_format="collapsed",
-                        sample_interval_usec=10000,
-                    )
-                except PermissionError:
-                    self.skipTest(
-                        "Insufficient permissions for remote profiling"
-                    )
-
-            # Verify file was created and contains valid data
-            self.assertTrue(os.path.exists(collapsed_file.name))
-            self.assertGreater(os.path.getsize(collapsed_file.name), 0)
-
-            # Check file format
-            with open(collapsed_file.name, "r") as f:
-                content = f.read()
-
-            lines = content.strip().split("\n")
-            self.assertGreater(len(lines), 0)
-
-            # Each line should have format: stack_trace count
-            for line in lines:
-                parts = line.rsplit(" ", 1)
-                self.assertEqual(len(parts), 2)
-
-                stack_trace, count_str = parts
-                self.assertGreater(len(stack_trace), 0)
-                self.assertTrue(count_str.isdigit())
-                self.assertGreater(int(count_str), 0)
-
-                # Stack trace should contain semicolon-separated entries
-                if ";" in stack_trace:
-                    stack_parts = stack_trace.split(";")
-                    for part in stack_parts:
-                        # Each part should be file:function:line
-                        self.assertIn(":", part)
-
-    def test_sampling_all_threads(self):
-        with (
-            test_subprocess(self.test_script) as subproc,
-            # Suppress profiler output
-            io.StringIO() as captured_output,
-            mock.patch("sys.stdout", captured_output),
-        ):
-            try:
-                profiling.sampling.sample.sample(
-                    subproc.process.pid,
-                    duration_sec=1,
-                    all_threads=True,
-                    sample_interval_usec=10000,
-                    show_summary=False,
-                )
-            except PermissionError:
-                self.skipTest("Insufficient permissions for remote profiling")
-
-        # Just verify that sampling completed without error
-        # We're not testing output format here
-
-    def test_sample_target_script(self):
-        script_file = tempfile.NamedTemporaryFile(delete=False)
-        script_file.write(self.test_script.encode("utf-8"))
-        script_file.flush()
-        self.addCleanup(close_and_unlink, script_file)
-
-        test_args = ["profiling.sampling.sample", "-d", "1", script_file.name]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            io.StringIO() as captured_output,
-            mock.patch("sys.stdout", captured_output),
-        ):
-            try:
-                profiling.sampling.sample.main()
-            except PermissionError:
-                self.skipTest("Insufficient permissions for remote profiling")
-
-            output = captured_output.getvalue()
-
-        # Basic checks on output
-        self.assertIn("Captured", output)
-        self.assertIn("samples", output)
-        self.assertIn("Profile Stats", output)
-
-        # Should see some of our test functions
-        self.assertIn("slow_fibonacci", output)
-
-    def test_sample_target_module(self):
-        tempdir = tempfile.TemporaryDirectory(delete=False)
-        self.addCleanup(lambda x: shutil.rmtree(x), tempdir.name)
-
-        module_path = os.path.join(tempdir.name, "test_module.py")
-
-        with open(module_path, "w") as f:
-            f.write(self.test_script)
-
-        test_args = ["profiling.sampling.sample", "-d", "1", "-m", "test_module"]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            io.StringIO() as captured_output,
-            mock.patch("sys.stdout", captured_output),
-            # Change to temp directory so subprocess can find the module
-            contextlib.chdir(tempdir.name),
-        ):
-            try:
-                profiling.sampling.sample.main()
-            except PermissionError:
-                self.skipTest("Insufficient permissions for remote profiling")
-
-            output = captured_output.getvalue()
-
-        # Basic checks on output
-        self.assertIn("Captured", output)
-        self.assertIn("samples", output)
-        self.assertIn("Profile Stats", output)
-
-        # Should see some of our test functions
-        self.assertIn("slow_fibonacci", output)
-
-
-@skip_if_not_supported
-@unittest.skipIf(
-    sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
-    "Test only runs on Linux with process_vm_readv support",
-)
-class TestSampleProfilerErrorHandling(unittest.TestCase):
-    def test_invalid_pid(self):
-        with self.assertRaises((OSError, RuntimeError)):
-            profiling.sampling.sample.sample(-1, duration_sec=1)
-
-    def test_process_dies_during_sampling(self):
-        with test_subprocess("import time; time.sleep(0.5); exit()") as subproc:
-            with (
-                io.StringIO() as captured_output,
-                mock.patch("sys.stdout", captured_output),
-            ):
-                try:
-                    profiling.sampling.sample.sample(
-                        subproc.process.pid,
-                        duration_sec=2,  # Longer than process lifetime
-                        sample_interval_usec=50000,
-                    )
-                except PermissionError:
-                    self.skipTest(
-                        "Insufficient permissions for remote profiling"
-                    )
-
-                output = captured_output.getvalue()
-
-            self.assertIn("Error rate", output)
-
-    def test_invalid_output_format(self):
-        with self.assertRaises(ValueError):
-            profiling.sampling.sample.sample(
-                os.getpid(),
-                duration_sec=1,
-                output_format="invalid_format",
-            )
-
-    def test_invalid_output_format_with_mocked_profiler(self):
-        """Test invalid output format with proper mocking to avoid permission issues."""
-        with mock.patch(
-            "profiling.sampling.sample.SampleProfiler"
-        ) as mock_profiler_class:
-            mock_profiler = mock.MagicMock()
-            mock_profiler_class.return_value = mock_profiler
-
-            with self.assertRaises(ValueError) as cm:
-                profiling.sampling.sample.sample(
-                    12345,
-                    duration_sec=1,
-                    output_format="unknown_format",
-                )
-
-            # Should raise ValueError with the invalid format name
-            self.assertIn(
-                "Invalid output format: unknown_format", str(cm.exception)
-            )
-
-    def test_is_process_running(self):
-        with test_subprocess("import time; time.sleep(1000)") as subproc:
-            try:
-                profiler = SampleProfiler(pid=subproc.process.pid, sample_interval_usec=1000, all_threads=False)
-            except PermissionError:
-                self.skipTest(
-                    "Insufficient permissions to read the stack trace"
-                )
-            self.assertTrue(profiler._is_process_running())
-            self.assertIsNotNone(profiler.unwinder.get_stack_trace())
-            subproc.process.kill()
-            subproc.process.wait()
-            self.assertRaises(ProcessLookupError, profiler.unwinder.get_stack_trace)
-
-        # Exit the context manager to ensure the process is terminated
-        self.assertFalse(profiler._is_process_running())
-        self.assertRaises(ProcessLookupError, profiler.unwinder.get_stack_trace)
-
-    @unittest.skipUnless(sys.platform == "linux", "Only valid on Linux")
-    def test_esrch_signal_handling(self):
-        with test_subprocess("import time; time.sleep(1000)") as subproc:
-            try:
-                unwinder = _remote_debugging.RemoteUnwinder(subproc.process.pid)
-            except PermissionError:
-                self.skipTest(
-                    "Insufficient permissions to read the stack trace"
-                )
-            initial_trace = unwinder.get_stack_trace()
-            self.assertIsNotNone(initial_trace)
-
-            subproc.process.kill()
-
-            # Wait for the process to die and try to get another trace
-            subproc.process.wait()
-
-            with self.assertRaises(ProcessLookupError):
-                unwinder.get_stack_trace()
-
-    def test_valid_output_formats(self):
-        """Test that all valid output formats are accepted."""
-        valid_formats = ["pstats", "collapsed", "flamegraph", "gecko"]
-
-        tempdir = tempfile.TemporaryDirectory(delete=False)
-        self.addCleanup(shutil.rmtree, tempdir.name)
-
-
-        with (contextlib.chdir(tempdir.name), captured_stdout(), captured_stderr()):
-            for fmt in valid_formats:
-                try:
-                    # This will likely fail with permissions, but the format should be valid
-                    profiling.sampling.sample.sample(
-                        os.getpid(),
-                        duration_sec=0.1,
-                        output_format=fmt,
-                        filename=f"test_{fmt}.out",
-                    )
-                except (OSError, RuntimeError, PermissionError):
-                    # Expected errors - we just want to test format validation
-                    pass
-
-    def test_script_error_treatment(self):
-        script_file = tempfile.NamedTemporaryFile("w", delete=False, suffix=".py")
-        script_file.write("open('nonexistent_file.txt')\n")
-        script_file.close()
-        self.addCleanup(os.unlink, script_file.name)
-
-        result = subprocess.run(
-            [sys.executable, "-m", "profiling.sampling.sample", "-d", "1", script_file.name],
-            capture_output=True,
-            text=True,
-        )
-        output = result.stdout + result.stderr
-
-        if "PermissionError" in output:
-            self.skipTest("Insufficient permissions for remote profiling")
-        self.assertNotIn("Script file not found", output)
-        self.assertIn("No such file or directory: 'nonexistent_file.txt'", output)
-
-
-class TestSampleProfilerCLI(unittest.TestCase):
-    def _setup_sync_mocks(self, mock_socket, mock_popen):
-        """Helper to set up socket and process mocks for coordinator tests."""
-        # Mock the sync socket with context manager support
-        mock_sock_instance = mock.MagicMock()
-        mock_sock_instance.getsockname.return_value = ("127.0.0.1", 12345)
-
-        # Mock the connection with context manager support
-        mock_conn = mock.MagicMock()
-        mock_conn.recv.return_value = b"ready"
-        mock_conn.__enter__.return_value = mock_conn
-        mock_conn.__exit__.return_value = None
-
-        # Mock accept() to return (connection, address) and support indexing
-        mock_accept_result = mock.MagicMock()
-        mock_accept_result.__getitem__.return_value = mock_conn  # [0] returns the connection
-        mock_sock_instance.accept.return_value = mock_accept_result
-
-        # Mock socket with context manager support
-        mock_sock_instance.__enter__.return_value = mock_sock_instance
-        mock_sock_instance.__exit__.return_value = None
-        mock_socket.return_value = mock_sock_instance
-
-        # Mock the subprocess
-        mock_process = mock.MagicMock()
-        mock_process.pid = 12345
-        mock_process.poll.return_value = None
-        mock_popen.return_value = mock_process
-        return mock_process
-
-    def _verify_coordinator_command(self, mock_popen, expected_target_args):
-        """Helper to verify the coordinator command was called correctly."""
-        args, kwargs = mock_popen.call_args
-        coordinator_cmd = args[0]
-        self.assertEqual(coordinator_cmd[0], sys.executable)
-        self.assertEqual(coordinator_cmd[1], "-m")
-        self.assertEqual(coordinator_cmd[2], "profiling.sampling._sync_coordinator")
-        self.assertEqual(coordinator_cmd[3], "12345")  # port
-        # cwd is coordinator_cmd[4]
-        self.assertEqual(coordinator_cmd[5:], expected_target_args)
-
-    @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
-    def test_cli_module_argument_parsing(self):
-        test_args = ["profiling.sampling.sample", "-m", "mymodule"]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("profiling.sampling.sample.sample") as mock_sample,
-            mock.patch("subprocess.Popen") as mock_popen,
-            mock.patch("socket.socket") as mock_socket,
-        ):
-            self._setup_sync_mocks(mock_socket, mock_popen)
-            profiling.sampling.sample.main()
-
-            self._verify_coordinator_command(mock_popen, ("-m", "mymodule"))
-            mock_sample.assert_called_once_with(
-                12345,
-                sort=2,  # default sort (sort_value from args.sort)
-                sample_interval_usec=100,
-                duration_sec=10,
-                filename=None,
-                all_threads=False,
-                limit=15,
-                show_summary=True,
-                output_format="pstats",
-                realtime_stats=False,
-                mode=0,
-                native=False,
-                gc=True,
-            )
-
-    @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
-    def test_cli_module_with_arguments(self):
-        test_args = ["profiling.sampling.sample", "-m", "mymodule", "arg1", "arg2", "--flag"]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("profiling.sampling.sample.sample") as mock_sample,
-            mock.patch("subprocess.Popen") as mock_popen,
-            mock.patch("socket.socket") as mock_socket,
-        ):
-            self._setup_sync_mocks(mock_socket, mock_popen)
-            profiling.sampling.sample.main()
-
-            self._verify_coordinator_command(mock_popen, ("-m", "mymodule", "arg1", "arg2", "--flag"))
-            mock_sample.assert_called_once_with(
-                12345,
-                sort=2,
-                sample_interval_usec=100,
-                duration_sec=10,
-                filename=None,
-                all_threads=False,
-                limit=15,
-                show_summary=True,
-                output_format="pstats",
-                realtime_stats=False,
-                mode=0,
-                native=False,
-                gc=True,
-            )
-
-    @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
-    def test_cli_script_argument_parsing(self):
-        test_args = ["profiling.sampling.sample", "myscript.py"]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("profiling.sampling.sample.sample") as mock_sample,
-            mock.patch("subprocess.Popen") as mock_popen,
-            mock.patch("socket.socket") as mock_socket,
-        ):
-            self._setup_sync_mocks(mock_socket, mock_popen)
-            profiling.sampling.sample.main()
-
-            self._verify_coordinator_command(mock_popen, ("myscript.py",))
-            mock_sample.assert_called_once_with(
-                12345,
-                sort=2,
-                sample_interval_usec=100,
-                duration_sec=10,
-                filename=None,
-                all_threads=False,
-                limit=15,
-                show_summary=True,
-                output_format="pstats",
-                realtime_stats=False,
-                mode=0,
-                native=False,
-                gc=True,
-            )
-
-    @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
-    def test_cli_script_with_arguments(self):
-        test_args = ["profiling.sampling.sample", "myscript.py", "arg1", "arg2", "--flag"]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("profiling.sampling.sample.sample") as mock_sample,
-            mock.patch("subprocess.Popen") as mock_popen,
-            mock.patch("socket.socket") as mock_socket,
-        ):
-            # Use the helper to set up mocks consistently
-            mock_process = self._setup_sync_mocks(mock_socket, mock_popen)
-            # Override specific behavior for this test
-            mock_process.wait.side_effect = [subprocess.TimeoutExpired(test_args, 0.1), None]
-
-            profiling.sampling.sample.main()
-
-            # Verify the coordinator command was called
-            args, kwargs = mock_popen.call_args
-            coordinator_cmd = args[0]
-            self.assertEqual(coordinator_cmd[0], sys.executable)
-            self.assertEqual(coordinator_cmd[1], "-m")
-            self.assertEqual(coordinator_cmd[2], "profiling.sampling._sync_coordinator")
-            self.assertEqual(coordinator_cmd[3], "12345")  # port
-            # cwd is coordinator_cmd[4]
-            self.assertEqual(coordinator_cmd[5:], ("myscript.py", "arg1", "arg2", "--flag"))
-
-    def test_cli_mutually_exclusive_pid_module(self):
-        test_args = ["profiling.sampling.sample", "-p", "12345", "-m", "mymodule"]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
-            self.assertRaises(SystemExit) as cm,
-        ):
-            profiling.sampling.sample.main()
-
-        self.assertEqual(cm.exception.code, 2)  # argparse error
-        error_msg = mock_stderr.getvalue()
-        self.assertIn("not allowed with argument", error_msg)
-
-    def test_cli_mutually_exclusive_pid_script(self):
-        test_args = ["profiling.sampling.sample", "-p", "12345", "myscript.py"]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
-            self.assertRaises(SystemExit) as cm,
-        ):
-            profiling.sampling.sample.main()
-
-        self.assertEqual(cm.exception.code, 2)  # argparse error
-        error_msg = mock_stderr.getvalue()
-        self.assertIn("only one target type can be specified", error_msg)
-
-    def test_cli_no_target_specified(self):
-        test_args = ["profiling.sampling.sample", "-d", "5"]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
-            self.assertRaises(SystemExit) as cm,
-        ):
-            profiling.sampling.sample.main()
-
-        self.assertEqual(cm.exception.code, 2)  # argparse error
-        error_msg = mock_stderr.getvalue()
-        self.assertIn("one of the arguments", error_msg)
-
-    @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
-    def test_cli_module_with_profiler_options(self):
-        test_args = [
-            "profiling.sampling.sample", "-i", "1000", "-d", "30", "-a",
-            "--sort-tottime", "-l", "20", "-m", "mymodule",
-        ]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("profiling.sampling.sample.sample") as mock_sample,
-            mock.patch("subprocess.Popen") as mock_popen,
-            mock.patch("socket.socket") as mock_socket,
-        ):
-            self._setup_sync_mocks(mock_socket, mock_popen)
-            profiling.sampling.sample.main()
-
-            self._verify_coordinator_command(mock_popen, ("-m", "mymodule"))
-            mock_sample.assert_called_once_with(
-                12345,
-                sort=1,  # sort-tottime
-                sample_interval_usec=1000,
-                duration_sec=30,
-                filename=None,
-                all_threads=True,
-                limit=20,
-                show_summary=True,
-                output_format="pstats",
-                realtime_stats=False,
-                mode=0,
-                native=False,
-                gc=True,
-            )
-
-    @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
-    def test_cli_script_with_profiler_options(self):
-        """Test script with various profiler options."""
-        test_args = [
-            "profiling.sampling.sample", "-i", "2000", "-d", "60",
-            "--collapsed", "-o", "output.txt",
-            "myscript.py", "scriptarg",
-        ]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("profiling.sampling.sample.sample") as mock_sample,
-            mock.patch("subprocess.Popen") as mock_popen,
-            mock.patch("socket.socket") as mock_socket,
-        ):
-            self._setup_sync_mocks(mock_socket, mock_popen)
-            profiling.sampling.sample.main()
-
-            self._verify_coordinator_command(mock_popen, ("myscript.py", "scriptarg"))
-            # Verify profiler options were passed correctly
-            mock_sample.assert_called_once_with(
-                12345,
-                sort=2,  # default sort
-                sample_interval_usec=2000,
-                duration_sec=60,
-                filename="output.txt",
-                all_threads=False,
-                limit=15,
-                show_summary=True,
-                output_format="collapsed",
-                realtime_stats=False,
-                mode=0,
-                native=False,
-                gc=True,
-            )
-
-    def test_cli_empty_module_name(self):
-        test_args = ["profiling.sampling.sample", "-m"]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
-            self.assertRaises(SystemExit) as cm,
-        ):
-            profiling.sampling.sample.main()
-
-        self.assertEqual(cm.exception.code, 2)  # argparse error
-        error_msg = mock_stderr.getvalue()
-        self.assertIn("argument -m/--module: expected one argument", error_msg)
-
-    @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
-    def test_cli_long_module_option(self):
-        test_args = ["profiling.sampling.sample", "--module", "mymodule", "arg1"]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("profiling.sampling.sample.sample") as mock_sample,
-            mock.patch("subprocess.Popen") as mock_popen,
-            mock.patch("socket.socket") as mock_socket,
-        ):
-            self._setup_sync_mocks(mock_socket, mock_popen)
-            profiling.sampling.sample.main()
-
-            self._verify_coordinator_command(mock_popen, ("-m", "mymodule", "arg1"))
-
-    def test_cli_complex_script_arguments(self):
-        test_args = [
-            "profiling.sampling.sample", "script.py",
-            "--input", "file.txt", "-v", "--output=/tmp/out", "positional"
-        ]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("profiling.sampling.sample.sample") as mock_sample,
-            mock.patch("profiling.sampling.sample._run_with_sync") as mock_run_with_sync,
-        ):
-            mock_process = mock.MagicMock()
-            mock_process.pid = 12345
-            mock_process.wait.side_effect = [subprocess.TimeoutExpired(test_args, 0.1), None]
-            mock_process.poll.return_value = None
-            mock_run_with_sync.return_value = mock_process
-
-            profiling.sampling.sample.main()
-
-            mock_run_with_sync.assert_called_once_with((
-                sys.executable, "script.py",
-                "--input", "file.txt", "-v", "--output=/tmp/out", "positional",
-            ))
-
-    def test_cli_collapsed_format_validation(self):
-        """Test that CLI properly validates incompatible options with collapsed format."""
-        test_cases = [
-            # Test sort options are invalid with collapsed
-            (
-                ["profiling.sampling.sample", "--collapsed", "--sort-nsamples", "-p", "12345"],
-                "sort",
-            ),
-            (
-                ["profiling.sampling.sample", "--collapsed", "--sort-tottime", "-p", "12345"],
-                "sort",
-            ),
-            (
-                [
-                    "profiling.sampling.sample",
-                    "--collapsed",
-                    "--sort-cumtime",
-                    "-p",
-                    "12345",
-                ],
-                "sort",
-            ),
-            (
-                [
-                    "profiling.sampling.sample",
-                    "--collapsed",
-                    "--sort-sample-pct",
-                    "-p",
-                    "12345",
-                ],
-                "sort",
-            ),
-            (
-                [
-                    "profiling.sampling.sample",
-                    "--collapsed",
-                    "--sort-cumul-pct",
-                    "-p",
-                    "12345",
-                ],
-                "sort",
-            ),
-            (
-                ["profiling.sampling.sample", "--collapsed", "--sort-name", "-p", "12345"],
-                "sort",
-            ),
-            # Test limit option is invalid with collapsed
-            (["profiling.sampling.sample", "--collapsed", "-l", "20", "-p", "12345"], "limit"),
-            (
-                ["profiling.sampling.sample", "--collapsed", "--limit", "20", "-p", "12345"],
-                "limit",
-            ),
-            # Test no-summary option is invalid with collapsed
-            (
-                ["profiling.sampling.sample", "--collapsed", "--no-summary", "-p", "12345"],
-                "summary",
-            ),
-        ]
-
-        for test_args, expected_error_keyword in test_cases:
-            with (
-                mock.patch("sys.argv", test_args),
-                mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
-                self.assertRaises(SystemExit) as cm,
-            ):
-                profiling.sampling.sample.main()
-
-            self.assertEqual(cm.exception.code, 2)  # argparse error code
-            error_msg = mock_stderr.getvalue()
-            self.assertIn("error:", error_msg)
-            self.assertIn("--pstats format", error_msg)
-
-    def test_cli_default_collapsed_filename(self):
-        """Test that collapsed format gets a default filename when not specified."""
-        test_args = ["profiling.sampling.sample", "--collapsed", "-p", "12345"]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("profiling.sampling.sample.sample") as mock_sample,
-        ):
-            profiling.sampling.sample.main()
-
-            # Check that filename was set to default collapsed format
-            mock_sample.assert_called_once()
-            call_args = mock_sample.call_args[1]
-            self.assertEqual(call_args["output_format"], "collapsed")
-            self.assertEqual(call_args["filename"], "collapsed.12345.txt")
-
-    def test_cli_custom_output_filenames(self):
-        """Test custom output filenames for both formats."""
-        test_cases = [
-            (
-                ["profiling.sampling.sample", "--pstats", "-o", "custom.pstats", "-p", "12345"],
-                "custom.pstats",
-                "pstats",
-            ),
-            (
-                ["profiling.sampling.sample", "--collapsed", "-o", "custom.txt", "-p", "12345"],
-                "custom.txt",
-                "collapsed",
-            ),
-        ]
-
-        for test_args, expected_filename, expected_format in test_cases:
-            with (
-                mock.patch("sys.argv", test_args),
-                mock.patch("profiling.sampling.sample.sample") as mock_sample,
-            ):
-                profiling.sampling.sample.main()
-
-                mock_sample.assert_called_once()
-                call_args = mock_sample.call_args[1]
-                self.assertEqual(call_args["filename"], expected_filename)
-                self.assertEqual(call_args["output_format"], expected_format)
-
-    def test_cli_missing_required_arguments(self):
-        """Test that CLI requires PID argument."""
-        with (
-            mock.patch("sys.argv", ["profiling.sampling.sample"]),
-            mock.patch("sys.stderr", io.StringIO()),
-        ):
-            with self.assertRaises(SystemExit):
-                profiling.sampling.sample.main()
-
-    def test_cli_mutually_exclusive_format_options(self):
-        """Test that pstats and collapsed options are mutually exclusive."""
-        with (
-            mock.patch(
-                "sys.argv",
-                ["profiling.sampling.sample", "--pstats", "--collapsed", "-p", "12345"],
-            ),
-            mock.patch("sys.stderr", io.StringIO()),
-        ):
-            with self.assertRaises(SystemExit):
-                profiling.sampling.sample.main()
-
-    def test_argument_parsing_basic(self):
-        test_args = ["profiling.sampling.sample", "-p", "12345"]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("profiling.sampling.sample.sample") as mock_sample,
-        ):
-            profiling.sampling.sample.main()
-
-            mock_sample.assert_called_once_with(
-                12345,
-                sample_interval_usec=100,
-                duration_sec=10,
-                filename=None,
-                all_threads=False,
-                limit=15,
-                sort=2,
-                show_summary=True,
-                output_format="pstats",
-                realtime_stats=False,
-                mode=0,
-                native=False,
-                gc=True,
-            )
-
-    def test_sort_options(self):
-        sort_options = [
-            ("--sort-nsamples", 0),
-            ("--sort-tottime", 1),
-            ("--sort-cumtime", 2),
-            ("--sort-sample-pct", 3),
-            ("--sort-cumul-pct", 4),
-            ("--sort-name", -1),
-        ]
-
-        for option, expected_sort_value in sort_options:
-            test_args = ["profiling.sampling.sample", option, "-p", "12345"]
-
-            with (
-                mock.patch("sys.argv", test_args),
-                mock.patch("profiling.sampling.sample.sample") as mock_sample,
-            ):
-                profiling.sampling.sample.main()
-
-                mock_sample.assert_called_once()
-                call_args = mock_sample.call_args[1]
-                self.assertEqual(
-                    call_args["sort"],
-                    expected_sort_value,
-                )
-                mock_sample.reset_mock()
-
-
-class TestCpuModeFiltering(unittest.TestCase):
-    """Test CPU mode filtering functionality (--mode=cpu)."""
-
-    def test_mode_validation(self):
-        """Test that CLI validates mode choices correctly."""
-        # Invalid mode choice should raise SystemExit
-        test_args = ["profiling.sampling.sample", "--mode", "invalid", "-p", "12345"]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
-            self.assertRaises(SystemExit) as cm,
-        ):
-            profiling.sampling.sample.main()
-
-        self.assertEqual(cm.exception.code, 2)  # argparse error
-        error_msg = mock_stderr.getvalue()
-        self.assertIn("invalid choice", error_msg)
-
-    def test_frames_filtered_with_skip_idle(self):
-        """Test that frames are actually filtered when skip_idle=True."""
-        # Import thread status flags
-        try:
-            from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU
-        except ImportError:
-            THREAD_STATUS_HAS_GIL = (1 << 0)
-            THREAD_STATUS_ON_CPU = (1 << 1)
-
-        # Create mock frames with different thread statuses
-        class MockThreadInfoWithStatus:
-            def __init__(self, thread_id, frame_info, status):
-                self.thread_id = thread_id
-                self.frame_info = frame_info
-                self.status = status
-
-        # Create test data: active thread (HAS_GIL | ON_CPU), idle thread (neither), and another active thread
-        ACTIVE_STATUS = THREAD_STATUS_HAS_GIL | THREAD_STATUS_ON_CPU  # Has GIL and on CPU
-        IDLE_STATUS = 0  # Neither has GIL nor on CPU
-
-        test_frames = [
-            MockInterpreterInfo(0, [
-                MockThreadInfoWithStatus(1, [MockFrameInfo("active1.py", 10, "active_func1")], ACTIVE_STATUS),
-                MockThreadInfoWithStatus(2, [MockFrameInfo("idle.py", 20, "idle_func")], IDLE_STATUS),
-                MockThreadInfoWithStatus(3, [MockFrameInfo("active2.py", 30, "active_func2")], ACTIVE_STATUS),
-            ])
-        ]
-
-        # Test with skip_idle=True - should only process running threads
-        collector_skip = PstatsCollector(sample_interval_usec=1000, skip_idle=True)
-        collector_skip.collect(test_frames)
-
-        # Should only have functions from running threads (status 0)
-        active1_key = ("active1.py", 10, "active_func1")
-        active2_key = ("active2.py", 30, "active_func2")
-        idle_key = ("idle.py", 20, "idle_func")
-
-        self.assertIn(active1_key, collector_skip.result)
-        self.assertIn(active2_key, collector_skip.result)
-        self.assertNotIn(idle_key, collector_skip.result)  # Idle thread should be filtered out
-
-        # Test with skip_idle=False - should process all threads
-        collector_no_skip = PstatsCollector(sample_interval_usec=1000, skip_idle=False)
-        collector_no_skip.collect(test_frames)
-
-        # Should have functions from all threads
-        self.assertIn(active1_key, collector_no_skip.result)
-        self.assertIn(active2_key, collector_no_skip.result)
-        self.assertIn(idle_key, collector_no_skip.result)  # Idle thread should be included
-
-    @requires_subprocess()
-    def test_cpu_mode_integration_filtering(self):
-        """Integration test: CPU mode should only capture active threads, not idle ones."""
-        # Script with one mostly-idle thread and one CPU-active thread
-        cpu_vs_idle_script = '''
-import time
-import threading
-
-cpu_ready = threading.Event()
-
-def idle_worker():
-    time.sleep(999999)
-
-def cpu_active_worker():
-    cpu_ready.set()
-    x = 1
-    while True:
-        x += 1
-
-def main():
-    # Start both threads
-    idle_thread = threading.Thread(target=idle_worker)
-    cpu_thread = threading.Thread(target=cpu_active_worker)
-    idle_thread.start()
-    cpu_thread.start()
-
-    # Wait for CPU thread to be running, then signal test
-    cpu_ready.wait()
-    _test_sock.sendall(b"threads_ready")
-
-    idle_thread.join()
-    cpu_thread.join()
-
-main()
-
-'''
-        with test_subprocess(cpu_vs_idle_script) as subproc:
-            # Wait for signal that threads are running
-            response = subproc.socket.recv(1024)
-            self.assertEqual(response, b"threads_ready")
-
-            with (
-                io.StringIO() as captured_output,
-                mock.patch("sys.stdout", captured_output),
-            ):
-                try:
-                    profiling.sampling.sample.sample(
-                        subproc.process.pid,
-                        duration_sec=2.0,
-                        sample_interval_usec=5000,
-                        mode=1,  # CPU mode
-                        show_summary=False,
-                        all_threads=True,
-                    )
-                except (PermissionError, RuntimeError) as e:
-                    self.skipTest("Insufficient permissions for remote profiling")
-
-                cpu_mode_output = captured_output.getvalue()
-
-            # Test wall-clock mode (mode=0) - should capture both functions
-            with (
-                io.StringIO() as captured_output,
-                mock.patch("sys.stdout", captured_output),
-            ):
-                try:
-                    profiling.sampling.sample.sample(
-                        subproc.process.pid,
-                        duration_sec=2.0,
-                        sample_interval_usec=5000,
-                        mode=0,  # Wall-clock mode
-                        show_summary=False,
-                        all_threads=True,
-                    )
-                except (PermissionError, RuntimeError) as e:
-                    self.skipTest("Insufficient permissions for remote profiling")
-
-                wall_mode_output = captured_output.getvalue()
-
-            # Verify both modes captured samples
-            self.assertIn("Captured", cpu_mode_output)
-            self.assertIn("samples", cpu_mode_output)
-            self.assertIn("Captured", wall_mode_output)
-            self.assertIn("samples", wall_mode_output)
-
-            # CPU mode should strongly favor cpu_active_worker over mostly_idle_worker
-            self.assertIn("cpu_active_worker", cpu_mode_output)
-            self.assertNotIn("idle_worker", cpu_mode_output)
-
-            # Wall-clock mode should capture both types of work
-            self.assertIn("cpu_active_worker", wall_mode_output)
-            self.assertIn("idle_worker", wall_mode_output)
-
-    def test_cpu_mode_with_no_samples(self):
-        """Test that CPU mode handles no samples gracefully when no samples are collected."""
-        # Mock a collector that returns empty stats
-        mock_collector = mock.MagicMock()
-        mock_collector.stats = {}
-        mock_collector.create_stats = mock.MagicMock()
-
-        with (
-            io.StringIO() as captured_output,
-            mock.patch("sys.stdout", captured_output),
-            mock.patch("profiling.sampling.sample.PstatsCollector", return_value=mock_collector),
-            mock.patch("profiling.sampling.sample.SampleProfiler") as mock_profiler_class,
-        ):
-            mock_profiler = mock.MagicMock()
-            mock_profiler_class.return_value = mock_profiler
-
-            profiling.sampling.sample.sample(
-                12345,  # dummy PID
-                duration_sec=0.5,
-                sample_interval_usec=5000,
-                mode=1,  # CPU mode
-                show_summary=False,
-                all_threads=True,
-            )
-
-            output = captured_output.getvalue()
-
-        # Should see the "No samples were collected" message
-        self.assertIn("No samples were collected", output)
-        self.assertIn("CPU mode", output)
-
-
-class TestGilModeFiltering(unittest.TestCase):
-    """Test GIL mode filtering functionality (--mode=gil)."""
-
-    def test_gil_mode_validation(self):
-        """Test that CLI accepts gil mode choice correctly."""
-        test_args = ["profiling.sampling.sample", "--mode", "gil", "-p", "12345"]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("profiling.sampling.sample.sample") as mock_sample,
-        ):
-            try:
-                profiling.sampling.sample.main()
-            except SystemExit:
-                pass  # Expected due to invalid PID
-
-        # Should have attempted to call sample with mode=2 (GIL mode)
-        mock_sample.assert_called_once()
-        call_args = mock_sample.call_args[1]
-        self.assertEqual(call_args["mode"], 2)  # PROFILING_MODE_GIL
-
-    def test_gil_mode_sample_function_call(self):
-        """Test that sample() function correctly uses GIL mode."""
-        with (
-            mock.patch("profiling.sampling.sample.SampleProfiler") as mock_profiler,
-            mock.patch("profiling.sampling.sample.PstatsCollector") as mock_collector,
-        ):
-            # Mock the profiler instance
-            mock_instance = mock.Mock()
-            mock_profiler.return_value = mock_instance
-
-            # Mock the collector instance
-            mock_collector_instance = mock.Mock()
-            mock_collector.return_value = mock_collector_instance
-
-            # Call sample with GIL mode and a filename to avoid pstats creation
-            profiling.sampling.sample.sample(
-                12345,
-                mode=2,  # PROFILING_MODE_GIL
-                duration_sec=1,
-                sample_interval_usec=1000,
-                filename="test_output.txt",
-            )
-
-            # Verify SampleProfiler was created with correct mode
-            mock_profiler.assert_called_once()
-            call_args = mock_profiler.call_args
-            self.assertEqual(call_args[1]['mode'], 2)  # mode parameter
-
-            # Verify profiler.sample was called
-            mock_instance.sample.assert_called_once()
-
-            # Verify collector.export was called since we provided a filename
-            mock_collector_instance.export.assert_called_once_with("test_output.txt")
-
-    def test_gil_mode_collector_configuration(self):
-        """Test that collectors are configured correctly for GIL mode."""
-        with (
-            mock.patch("profiling.sampling.sample.SampleProfiler") as mock_profiler,
-            mock.patch("profiling.sampling.sample.PstatsCollector") as mock_collector,
-            captured_stdout(), captured_stderr()
-        ):
-            # Mock the profiler instance
-            mock_instance = mock.Mock()
-            mock_profiler.return_value = mock_instance
-
-            # Call sample with GIL mode
-            profiling.sampling.sample.sample(
-                12345,
-                mode=2,  # PROFILING_MODE_GIL
-                output_format="pstats",
-            )
-
-            # Verify collector was created with skip_idle=True (since mode != WALL)
-            mock_collector.assert_called_once()
-            call_args = mock_collector.call_args[1]
-            self.assertTrue(call_args['skip_idle'])
-
-    def test_gil_mode_with_collapsed_format(self):
-        """Test GIL mode with collapsed stack format."""
-        with (
-            mock.patch("profiling.sampling.sample.SampleProfiler") as mock_profiler,
-            mock.patch("profiling.sampling.sample.CollapsedStackCollector") as mock_collector,
-        ):
-            # Mock the profiler instance
-            mock_instance = mock.Mock()
-            mock_profiler.return_value = mock_instance
-
-            # Call sample with GIL mode and collapsed format
-            profiling.sampling.sample.sample(
-                12345,
-                mode=2,  # PROFILING_MODE_GIL
-                output_format="collapsed",
-                filename="test_output.txt",
-            )
-
-            # Verify collector was created with skip_idle=True
-            mock_collector.assert_called_once()
-            call_args = mock_collector.call_args[1]
-            self.assertTrue(call_args['skip_idle'])
-
-    def test_gil_mode_cli_argument_parsing(self):
-        """Test CLI argument parsing for GIL mode with various options."""
-        test_args = [
-            "profiling.sampling.sample",
-            "--mode", "gil",
-            "--interval", "500",
-            "--duration", "5",
-            "-p", "12345"
-        ]
-
-        with (
-            mock.patch("sys.argv", test_args),
-            mock.patch("profiling.sampling.sample.sample") as mock_sample,
-        ):
-            try:
-                profiling.sampling.sample.main()
-            except SystemExit:
-                pass  # Expected due to invalid PID
-
-        # Verify all arguments were parsed correctly
-        mock_sample.assert_called_once()
-        call_args = mock_sample.call_args[1]
-        self.assertEqual(call_args["mode"], 2)  # GIL mode
-        self.assertEqual(call_args["sample_interval_usec"], 500)
-        self.assertEqual(call_args["duration_sec"], 5)
-
-    @requires_subprocess()
-    def test_gil_mode_integration_behavior(self):
-        """Integration test: GIL mode should capture GIL-holding threads."""
-        # Create a test script with GIL-releasing operations
-        gil_test_script = '''
-import time
-import threading
-
-gil_ready = threading.Event()
-
-def gil_releasing_work():
-    time.sleep(999999)
-
-def gil_holding_work():
-    gil_ready.set()
-    x = 1
-    while True:
-        x += 1
-
-def main():
-    # Start both threads
-    idle_thread = threading.Thread(target=gil_releasing_work)
-    cpu_thread = threading.Thread(target=gil_holding_work)
-    idle_thread.start()
-    cpu_thread.start()
-
-    # Wait for GIL-holding thread to be running, then signal test
-    gil_ready.wait()
-    _test_sock.sendall(b"threads_ready")
-
-    idle_thread.join()
-    cpu_thread.join()
-
-main()
-'''
-        with test_subprocess(gil_test_script) as subproc:
-            # Wait for signal that threads are running
-            response = subproc.socket.recv(1024)
-            self.assertEqual(response, b"threads_ready")
-
-            with (
-                io.StringIO() as captured_output,
-                mock.patch("sys.stdout", captured_output),
-            ):
-                try:
-                    profiling.sampling.sample.sample(
-                        subproc.process.pid,
-                        duration_sec=2.0,
-                        sample_interval_usec=5000,
-                        mode=2,  # GIL mode
-                        show_summary=False,
-                        all_threads=True,
-                    )
-                except (PermissionError, RuntimeError) as e:
-                    self.skipTest("Insufficient permissions for remote profiling")
-
-                gil_mode_output = captured_output.getvalue()
-
-            # Test wall-clock mode for comparison
-            with (
-                io.StringIO() as captured_output,
-                mock.patch("sys.stdout", captured_output),
-            ):
-                try:
-                    profiling.sampling.sample.sample(
-                        subproc.process.pid,
-                        duration_sec=0.5,
-                        sample_interval_usec=5000,
-                        mode=0,  # Wall-clock mode
-                        show_summary=False,
-                        all_threads=True,
-                    )
-                except (PermissionError, RuntimeError) as e:
-                    self.skipTest("Insufficient permissions for remote profiling")
-
-                wall_mode_output = captured_output.getvalue()
-
-            # GIL mode should primarily capture GIL-holding work
-            # (Note: actual behavior depends on threading implementation)
-            self.assertIn("gil_holding_work", gil_mode_output)
-
-            # Wall-clock mode should capture both types of work
-            self.assertIn("gil_holding_work", wall_mode_output)
-
-    def test_mode_constants_are_defined(self):
-        """Test that all profiling mode constants are properly defined."""
-        self.assertEqual(profiling.sampling.sample.PROFILING_MODE_WALL, 0)
-        self.assertEqual(profiling.sampling.sample.PROFILING_MODE_CPU, 1)
-        self.assertEqual(profiling.sampling.sample.PROFILING_MODE_GIL, 2)
-
-    def test_parse_mode_function(self):
-        """Test the _parse_mode function with all valid modes."""
-        self.assertEqual(profiling.sampling.sample._parse_mode("wall"), 0)
-        self.assertEqual(profiling.sampling.sample._parse_mode("cpu"), 1)
-        self.assertEqual(profiling.sampling.sample._parse_mode("gil"), 2)
-
-        # Test invalid mode raises KeyError
-        with self.assertRaises(KeyError):
-            profiling.sampling.sample._parse_mode("invalid")
-
-
-@requires_subprocess()
-@skip_if_not_supported
-class TestGCFrameTracking(unittest.TestCase):
-    """Tests for GC frame tracking in the sampling profiler."""
-
-    @classmethod
-    def setUpClass(cls):
-        """Create a static test script with GC frames and CPU-intensive work."""
-        cls.gc_test_script = '''
-import gc
-
-class ExpensiveGarbage:
-    """Class that triggers GC with expensive finalizer (callback)."""
-    def __init__(self):
-        self.cycle = self
-
-    def __del__(self):
-        # CPU-intensive work in the finalizer callback
-        result = 0
-        for i in range(100000):
-            result += i * i
-            if i % 1000 == 0:
-                result = result % 1000000
-
-def main_loop():
-    """Main loop that triggers GC with expensive callback."""
-    while True:
-        ExpensiveGarbage()
-        gc.collect()
-
-if __name__ == "__main__":
-    main_loop()
-'''
-
-    def test_gc_frames_enabled(self):
-        """Test that GC frames appear when gc tracking is enabled."""
-        with (
-            test_subprocess(self.gc_test_script) as subproc,
-            io.StringIO() as captured_output,
-            mock.patch("sys.stdout", captured_output),
-        ):
-            try:
-                profiling.sampling.sample.sample(
-                    subproc.process.pid,
-                    duration_sec=1,
-                    sample_interval_usec=5000,
-                    show_summary=False,
-                    native=False,
-                    gc=True,
-                )
-            except PermissionError:
-                self.skipTest("Insufficient permissions for remote profiling")
-
-            output = captured_output.getvalue()
-
-        # Should capture samples
-        self.assertIn("Captured", output)
-        self.assertIn("samples", output)
-
-        # GC frames should be present
-        self.assertIn("<GC>", output)
-
-    def test_gc_frames_disabled(self):
-        """Test that GC frames do not appear when gc tracking is disabled."""
-        with (
-            test_subprocess(self.gc_test_script) as subproc,
-            io.StringIO() as captured_output,
-            mock.patch("sys.stdout", captured_output),
-        ):
-            try:
-                profiling.sampling.sample.sample(
-                    subproc.process.pid,
-                    duration_sec=1,
-                    sample_interval_usec=5000,
-                    show_summary=False,
-                    native=False,
-                    gc=False,
-                )
-            except PermissionError:
-                self.skipTest("Insufficient permissions for remote profiling")
-
-            output = captured_output.getvalue()
-
-        # Should capture samples
-        self.assertIn("Captured", output)
-        self.assertIn("samples", output)
-
-        # GC frames should NOT be present
-        self.assertNotIn("<GC>", output)
-
-
-@requires_subprocess()
-@skip_if_not_supported
-class TestNativeFrameTracking(unittest.TestCase):
-    """Tests for native frame tracking in the sampling profiler."""
-
-    @classmethod
-    def setUpClass(cls):
-        """Create a static test script with native frames and CPU-intensive work."""
-        cls.native_test_script = '''
-import operator
-
-def main_loop():
-    while True:
-        # Native code in the middle of the stack:
-        operator.call(inner)
-
-def inner():
-    # Python code at the top of the stack:
-    for _ in range(1_000_0000):
-        pass
-
-if __name__ == "__main__":
-    main_loop()
-'''
-
-    def test_native_frames_enabled(self):
-        """Test that native frames appear when native tracking is enabled."""
-        collapsed_file = tempfile.NamedTemporaryFile(
-            suffix=".txt", delete=False
-        )
-        self.addCleanup(close_and_unlink, collapsed_file)
-
-        with (
-            test_subprocess(self.native_test_script) as subproc,
-        ):
-            # Suppress profiler output when testing file export
-            with (
-                io.StringIO() as captured_output,
-                mock.patch("sys.stdout", captured_output),
-            ):
-                try:
-                    profiling.sampling.sample.sample(
-                        subproc.process.pid,
-                        duration_sec=1,
-                        filename=collapsed_file.name,
-                        output_format="collapsed",
-                        sample_interval_usec=1000,
-                        native=True,
-                    )
-                except PermissionError:
-                    self.skipTest("Insufficient permissions for remote profiling")
-
-            # Verify file was created and contains valid data
-            self.assertTrue(os.path.exists(collapsed_file.name))
-            self.assertGreater(os.path.getsize(collapsed_file.name), 0)
-
-            # Check file format
-            with open(collapsed_file.name, "r") as f:
-                content = f.read()
-
-        lines = content.strip().split("\n")
-        self.assertGreater(len(lines), 0)
-
-        stacks = [line.rsplit(" ", 1)[0] for line in lines]
-
-        # Most samples should have native code in the middle of the stack:
-        self.assertTrue(any(";<native>;" in stack for stack in stacks))
-
-        # No samples should have native code at the top of the stack:
-        self.assertFalse(any(stack.endswith(";<native>") for stack in stacks))
-
-    def test_native_frames_disabled(self):
-        """Test that native frames do not appear when native tracking is disabled."""
-        with (
-            test_subprocess(self.native_test_script) as subproc,
-            io.StringIO() as captured_output,
-            mock.patch("sys.stdout", captured_output),
-        ):
-            try:
-                profiling.sampling.sample.sample(
-                    subproc.process.pid,
-                    duration_sec=1,
-                    sample_interval_usec=5000,
-                    show_summary=False,
-                )
-            except PermissionError:
-                self.skipTest("Insufficient permissions for remote profiling")
-            output = captured_output.getvalue()
-        # Native frames should NOT be present:
-        self.assertNotIn("<native>", output)
-
-
-@requires_subprocess()
-@skip_if_not_supported
-class TestProcessPoolExecutorSupport(unittest.TestCase):
-    """
-    Test that ProcessPoolExecutor works correctly with profiling.sampling.
-    """
-
-    def test_process_pool_executor_pickle(self):
-        # gh-140729: test use ProcessPoolExecutor.map() can sampling
-        test_script = '''
-import concurrent.futures
-
-def worker(x):
-    return x * 2
-
-if __name__ == "__main__":
-    with concurrent.futures.ProcessPoolExecutor() as executor:
-        results = list(executor.map(worker, [1, 2, 3]))
-        print(f"Results: {results}")
-'''
-        with os_helper.temp_dir() as temp_dir:
-            script = script_helper.make_script(
-                temp_dir, 'test_process_pool_executor_pickle', test_script
-            )
-            with SuppressCrashReport():
-                with script_helper.spawn_python(
-                    "-m", "profiling.sampling.sample",
-                    "-d", "5",
-                    "-i", "100000",
-                    script,
-                    stdout=subprocess.PIPE,
-                    stderr=subprocess.PIPE,
-                    text=True
-                ) as proc:
-                    try:
-                        stdout, stderr = proc.communicate(timeout=SHORT_TIMEOUT)
-                    except subprocess.TimeoutExpired:
-                        proc.kill()
-                        stdout, stderr = proc.communicate()
-
-        if "PermissionError" in stderr:
-            self.skipTest("Insufficient permissions for remote profiling")
-
-        self.assertIn("Results: [2, 4, 6]", stdout)
-        self.assertNotIn("Can't pickle", stderr)
-if __name__ == "__main__":
-    unittest.main()
diff --git a/Lib/test/test_profiling/test_sampling_profiler/__init__.py b/Lib/test/test_profiling/test_sampling_profiler/__init__.py
new file mode 100644
index 00000000000..616ae5b49f0
--- /dev/null
+++ b/Lib/test/test_profiling/test_sampling_profiler/__init__.py
@@ -0,0 +1,9 @@
+"""Tests for the sampling profiler (profiling.sampling)."""
+
+import os
+from test.support import load_package_tests
+
+
+def load_tests(*args):
+    """Load all tests from this subpackage."""
+    return load_package_tests(os.path.dirname(__file__), *args)
diff --git a/Lib/test/test_profiling/test_sampling_profiler/_live_collector_helpers.py b/Lib/test/test_profiling/test_sampling_profiler/_live_collector_helpers.py
new file mode 100644
index 00000000000..4bb6877f16f
--- /dev/null
+++ b/Lib/test/test_profiling/test_sampling_profiler/_live_collector_helpers.py
@@ -0,0 +1,41 @@
+"""Common test helpers and mocks for live collector tests."""
+
+from profiling.sampling.constants import (
+    THREAD_STATUS_HAS_GIL,
+    THREAD_STATUS_ON_CPU,
+)
+
+
+class MockFrameInfo:
+    """Mock FrameInfo for testing."""
+
+    def __init__(self, filename, lineno, funcname):
+        self.filename = filename
+        self.lineno = lineno
+        self.funcname = funcname
+
+    def __repr__(self):
+        return f"MockFrameInfo(filename='{self.filename}', lineno={self.lineno}, funcname='{self.funcname}')"
+
+
+class MockThreadInfo:
+    """Mock ThreadInfo for testing."""
+
+    def __init__(self, thread_id, frame_info, status=THREAD_STATUS_HAS_GIL | THREAD_STATUS_ON_CPU):
+        self.thread_id = thread_id
+        self.frame_info = frame_info
+        self.status = status
+
+    def __repr__(self):
+        return f"MockThreadInfo(thread_id={self.thread_id}, frame_info={self.frame_info}, status={self.status})"
+
+
+class MockInterpreterInfo:
+    """Mock InterpreterInfo for testing."""
+
+    def __init__(self, interpreter_id, threads):
+        self.interpreter_id = interpreter_id
+        self.threads = threads
+
+    def __repr__(self):
+        return f"MockInterpreterInfo(interpreter_id={self.interpreter_id}, threads={self.threads})"
diff --git a/Lib/test/test_profiling/test_sampling_profiler/helpers.py b/Lib/test/test_profiling/test_sampling_profiler/helpers.py
new file mode 100644
index 00000000000..0e32d8dd9ea
--- /dev/null
+++ b/Lib/test/test_profiling/test_sampling_profiler/helpers.py
@@ -0,0 +1,176 @@
+"""Helper utilities for sampling profiler tests."""
+
+import contextlib
+import socket
+import subprocess
+import sys
+import unittest
+from collections import namedtuple
+
+from test.support import SHORT_TIMEOUT
+from test.support.socket_helper import find_unused_port
+from test.support.os_helper import unlink
+
+
+PROCESS_VM_READV_SUPPORTED = False
+
+try:
+    from _remote_debugging import PROCESS_VM_READV_SUPPORTED  # noqa: F401
+    import _remote_debugging  # noqa: F401
+except ImportError:
+    raise unittest.SkipTest(
+        "Test only runs when _remote_debugging is available"
+    )
+else:
+    import profiling.sampling  # noqa: F401
+    from profiling.sampling.sample import SampleProfiler  # noqa: F401
+
+
+skip_if_not_supported = unittest.skipIf(
+    (
+        sys.platform != "darwin"
+        and sys.platform != "linux"
+        and sys.platform != "win32"
+    ),
+    "Test only runs on Linux, Windows and MacOS",
+)
+
+SubprocessInfo = namedtuple("SubprocessInfo", ["process", "socket"])
+
+
+def _wait_for_signal(sock, expected_signals, timeout=SHORT_TIMEOUT):
+    """
+    Wait for expected signal(s) from a socket with proper timeout and EOF handling.
+
+    Args:
+        sock: Connected socket to read from
+        expected_signals: Single bytes object or list of bytes objects to wait for
+        timeout: Socket timeout in seconds
+
+    Returns:
+        bytes: Complete accumulated response buffer
+
+    Raises:
+        RuntimeError: If connection closed before signal received or timeout
+    """
+    if isinstance(expected_signals, bytes):
+        expected_signals = [expected_signals]
+
+    sock.settimeout(timeout)
+    buffer = b""
+
+    while True:
+        # Check if all expected signals are in buffer
+        if all(sig in buffer for sig in expected_signals):
+            return buffer
+
+        try:
+            chunk = sock.recv(4096)
+            if not chunk:
+                raise RuntimeError(
+                    f"Connection closed before receiving expected signals. "
+                    f"Expected: {expected_signals}, Got: {buffer[-200:]!r}"
+                )
+            buffer += chunk
+        except socket.timeout:
+            raise RuntimeError(
+                f"Timeout waiting for signals. "
+                f"Expected: {expected_signals}, Got: {buffer[-200:]!r}"
+            ) from None
+        except OSError as e:
+            raise RuntimeError(
+                f"Socket error while waiting for signals: {e}. "
+                f"Expected: {expected_signals}, Got: {buffer[-200:]!r}"
+            ) from None
+
+
+def _cleanup_sockets(*sockets):
+    """Safely close multiple sockets, ignoring errors."""
+    for sock in sockets:
+        if sock is not None:
+            try:
+                sock.close()
+            except OSError:
+                pass
+
+
+def _cleanup_process(proc, timeout=SHORT_TIMEOUT):
+    """Terminate a process gracefully, escalating to kill if needed."""
+    if proc.poll() is not None:
+        return
+    proc.terminate()
+    try:
+        proc.wait(timeout=timeout)
+        return
+    except subprocess.TimeoutExpired:
+        pass
+    proc.kill()
+    try:
+        proc.wait(timeout=timeout)
+    except subprocess.TimeoutExpired:
+        pass  # Process refuses to die, nothing more we can do
+
+
+@contextlib.contextmanager
+def test_subprocess(script, wait_for_working=False):
+    """Context manager to create a test subprocess with socket synchronization.
+
+    Args:
+        script: Python code to execute in the subprocess. If wait_for_working
+                is True, script should send b"working" after starting work.
+        wait_for_working: If True, wait for both "ready" and "working" signals.
+                         Default False for backward compatibility.
+
+    Yields:
+        SubprocessInfo: Named tuple with process and socket objects
+    """
+    # Find an unused port for socket communication
+    port = find_unused_port()
+
+    # Inject socket connection code at the beginning of the script
+    socket_code = f"""
+import socket
+_test_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+_test_sock.connect(('localhost', {port}))
+_test_sock.sendall(b"ready")
+"""
+
+    # Combine socket code with user script
+    full_script = socket_code + script
+
+    # Create server socket to wait for process to be ready
+    server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+    server_socket.bind(("localhost", port))
+    server_socket.settimeout(SHORT_TIMEOUT)
+    server_socket.listen(1)
+
+    proc = subprocess.Popen(
+        [sys.executable, "-c", full_script],
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.DEVNULL,
+    )
+
+    client_socket = None
+    try:
+        # Wait for process to connect and send ready signal
+        client_socket, _ = server_socket.accept()
+        server_socket.close()
+        server_socket = None
+
+        # Wait for ready signal, and optionally working signal
+        if wait_for_working:
+            _wait_for_signal(client_socket, [b"ready", b"working"])
+        else:
+            _wait_for_signal(client_socket, b"ready")
+
+        yield SubprocessInfo(proc, client_socket)
+    finally:
+        _cleanup_sockets(client_socket, server_socket)
+        _cleanup_process(proc)
+
+
+def close_and_unlink(file):
+    """Close a file and unlink it from the filesystem."""
+    file.close()
+    unlink(file.name)
diff --git a/Lib/test/test_profiling/test_sampling_profiler/mocks.py b/Lib/test/test_profiling/test_sampling_profiler/mocks.py
new file mode 100644
index 00000000000..7083362c771
--- /dev/null
+++ b/Lib/test/test_profiling/test_sampling_profiler/mocks.py
@@ -0,0 +1,73 @@
+"""Mock classes for sampling profiler tests."""
+
+
+class MockFrameInfo:
+    """Mock FrameInfo for testing since the real one isn't accessible."""
+
+    def __init__(self, filename, lineno, funcname):
+        self.filename = filename
+        self.lineno = lineno
+        self.funcname = funcname
+
+    def __repr__(self):
+        return f"MockFrameInfo(filename='{self.filename}', lineno={self.lineno}, funcname='{self.funcname}')"
+
+
+class MockThreadInfo:
+    """Mock ThreadInfo for testing since the real one isn't accessible."""
+
+    def __init__(
+        self, thread_id, frame_info, status=0
+    ):  # Default to THREAD_STATE_RUNNING (0)
+        self.thread_id = thread_id
+        self.frame_info = frame_info
+        self.status = status
+
+    def __repr__(self):
+        return f"MockThreadInfo(thread_id={self.thread_id}, frame_info={self.frame_info}, status={self.status})"
+
+
+class MockInterpreterInfo:
+    """Mock InterpreterInfo for testing since the real one isn't accessible."""
+
+    def __init__(self, interpreter_id, threads):
+        self.interpreter_id = interpreter_id
+        self.threads = threads
+
+    def __repr__(self):
+        return f"MockInterpreterInfo(interpreter_id={self.interpreter_id}, threads={self.threads})"
+
+
+class MockCoroInfo:
+    """Mock CoroInfo for testing async tasks."""
+
+    def __init__(self, task_name, call_stack):
+        self.task_name = task_name  # In reality, this is the parent task ID
+        self.call_stack = call_stack
+
+    def __repr__(self):
+        return f"MockCoroInfo(task_name={self.task_name}, call_stack={self.call_stack})"
+
+
+class MockTaskInfo:
+    """Mock TaskInfo for testing async tasks."""
+
+    def __init__(self, task_id, task_name, coroutine_stack, awaited_by=None):
+        self.task_id = task_id
+        self.task_name = task_name
+        self.coroutine_stack = coroutine_stack  # List of CoroInfo objects
+        self.awaited_by = awaited_by or []  # List of CoroInfo objects (parents)
+
+    def __repr__(self):
+        return f"MockTaskInfo(task_id={self.task_id}, task_name={self.task_name})"
+
+
+class MockAwaitedInfo:
+    """Mock AwaitedInfo for testing async tasks."""
+
+    def __init__(self, thread_id, awaited_by):
+        self.thread_id = thread_id
+        self.awaited_by = awaited_by  # List of TaskInfo objects
+
+    def __repr__(self):
+        return f"MockAwaitedInfo(thread_id={self.thread_id}, awaited_by={len(self.awaited_by)} tasks)"
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_advanced.py b/Lib/test/test_profiling/test_sampling_profiler/test_advanced.py
new file mode 100644
index 00000000000..843fb3b7416
--- /dev/null
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_advanced.py
@@ -0,0 +1,258 @@
+"""Tests for advanced sampling profiler features (GC tracking, native frames, ProcessPoolExecutor support)."""
+
+import io
+import os
+import subprocess
+import tempfile
+import unittest
+from unittest import mock
+
+try:
+    import _remote_debugging  # noqa: F401
+    import profiling.sampling
+    import profiling.sampling.sample
+except ImportError:
+    raise unittest.SkipTest(
+        "Test only runs when _remote_debugging is available"
+    )
+
+from test.support import (
+    SHORT_TIMEOUT,
+    SuppressCrashReport,
+    os_helper,
+    requires_subprocess,
+    script_helper,
+)
+
+from .helpers import close_and_unlink, skip_if_not_supported, test_subprocess
+
+
+@requires_subprocess()
+@skip_if_not_supported
+class TestGCFrameTracking(unittest.TestCase):
+    """Tests for GC frame tracking in the sampling profiler."""
+
+    @classmethod
+    def setUpClass(cls):
+        """Create a static test script with GC frames and CPU-intensive work."""
+        cls.gc_test_script = '''
+import gc
+
+class ExpensiveGarbage:
+    def __init__(self):
+        self.cycle = self
+
+    def __del__(self):
+        result = 0
+        for i in range(100000):
+            result += i * i
+            if i % 1000 == 0:
+                result = result % 1000000
+
+_test_sock.sendall(b"working")
+while True:
+    ExpensiveGarbage()
+    gc.collect()
+'''
+
+    def test_gc_frames_enabled(self):
+        """Test that GC frames appear when gc tracking is enabled."""
+        with (
+            test_subprocess(self.gc_test_script, wait_for_working=True) as subproc,
+            io.StringIO() as captured_output,
+            mock.patch("sys.stdout", captured_output),
+        ):
+            try:
+                from profiling.sampling.pstats_collector import PstatsCollector
+                collector = PstatsCollector(sample_interval_usec=5000, skip_idle=False)
+                profiling.sampling.sample.sample(
+                    subproc.process.pid,
+                    collector,
+                    duration_sec=1,
+                    native=False,
+                    gc=True,
+                )
+                collector.print_stats(show_summary=False)
+            except PermissionError:
+                self.skipTest("Insufficient permissions for remote profiling")
+
+            output = captured_output.getvalue()
+
+        # Should capture samples
+        self.assertIn("Captured", output)
+        self.assertIn("samples", output)
+
+        # GC frames should be present
+        self.assertIn("<GC>", output)
+
+    def test_gc_frames_disabled(self):
+        """Test that GC frames do not appear when gc tracking is disabled."""
+        with (
+            test_subprocess(self.gc_test_script, wait_for_working=True) as subproc,
+            io.StringIO() as captured_output,
+            mock.patch("sys.stdout", captured_output),
+        ):
+            try:
+                from profiling.sampling.pstats_collector import PstatsCollector
+                collector = PstatsCollector(sample_interval_usec=5000, skip_idle=False)
+                profiling.sampling.sample.sample(
+                    subproc.process.pid,
+                    collector,
+                    duration_sec=1,
+                    native=False,
+                    gc=False,
+                )
+                collector.print_stats(show_summary=False)
+            except PermissionError:
+                self.skipTest("Insufficient permissions for remote profiling")
+
+            output = captured_output.getvalue()
+
+        # Should capture samples
+        self.assertIn("Captured", output)
+        self.assertIn("samples", output)
+
+        # GC frames should NOT be present
+        self.assertNotIn("<GC>", output)
+
+
+@requires_subprocess()
+@skip_if_not_supported
+class TestNativeFrameTracking(unittest.TestCase):
+    """Tests for native frame tracking in the sampling profiler."""
+
+    @classmethod
+    def setUpClass(cls):
+        """Create a static test script with native frames and CPU-intensive work."""
+        cls.native_test_script = """
+import operator
+
+def inner():
+    for _ in range(1_000_0000):
+        pass
+
+_test_sock.sendall(b"working")
+while True:
+    operator.call(inner)
+"""
+
+    def test_native_frames_enabled(self):
+        """Test that native frames appear when native tracking is enabled."""
+        collapsed_file = tempfile.NamedTemporaryFile(
+            suffix=".txt", delete=False
+        )
+        self.addCleanup(close_and_unlink, collapsed_file)
+
+        with test_subprocess(self.native_test_script, wait_for_working=True) as subproc:
+            with (
+                io.StringIO() as captured_output,
+                mock.patch("sys.stdout", captured_output),
+            ):
+                try:
+                    from profiling.sampling.stack_collector import CollapsedStackCollector
+                    collector = CollapsedStackCollector(1000, skip_idle=False)
+                    profiling.sampling.sample.sample(
+                        subproc.process.pid,
+                        collector,
+                        duration_sec=1,
+                        native=True,
+                    )
+                    collector.export(collapsed_file.name)
+                except PermissionError:
+                    self.skipTest(
+                        "Insufficient permissions for remote profiling"
+                    )
+
+            # Verify file was created and contains valid data
+            self.assertTrue(os.path.exists(collapsed_file.name))
+            self.assertGreater(os.path.getsize(collapsed_file.name), 0)
+
+            # Check file format
+            with open(collapsed_file.name, "r") as f:
+                content = f.read()
+
+        lines = content.strip().split("\n")
+        self.assertGreater(len(lines), 0)
+
+        stacks = [line.rsplit(" ", 1)[0] for line in lines]
+
+        # Most samples should have native code in the middle of the stack:
+        self.assertTrue(any(";<native>;" in stack for stack in stacks))
+
+        # No samples should have native code at the top of the stack:
+        self.assertFalse(any(stack.endswith(";<native>") for stack in stacks))
+
+    def test_native_frames_disabled(self):
+        """Test that native frames do not appear when native tracking is disabled."""
+        with (
+            test_subprocess(self.native_test_script, wait_for_working=True) as subproc,
+            io.StringIO() as captured_output,
+            mock.patch("sys.stdout", captured_output),
+        ):
+            try:
+                from profiling.sampling.pstats_collector import PstatsCollector
+                collector = PstatsCollector(sample_interval_usec=5000, skip_idle=False)
+                profiling.sampling.sample.sample(
+                    subproc.process.pid,
+                    collector,
+                    duration_sec=1,
+                )
+                collector.print_stats(show_summary=False)
+            except PermissionError:
+                self.skipTest("Insufficient permissions for remote profiling")
+            output = captured_output.getvalue()
+        # Native frames should NOT be present:
+        self.assertNotIn("<native>", output)
+
+
+@requires_subprocess()
+@skip_if_not_supported
+class TestProcessPoolExecutorSupport(unittest.TestCase):
+    """
+    Test that ProcessPoolExecutor works correctly with profiling.sampling.
+    """
+
+    def test_process_pool_executor_pickle(self):
+        # gh-140729: test use ProcessPoolExecutor.map() can sampling
+        test_script = """
+import concurrent.futures
+
+def worker(x):
+    return x * 2
+
+if __name__ == "__main__":
+    with concurrent.futures.ProcessPoolExecutor() as executor:
+        results = list(executor.map(worker, [1, 2, 3]))
+        print(f"Results: {results}")
+"""
+        with os_helper.temp_dir() as temp_dir:
+            script = script_helper.make_script(
+                temp_dir, "test_process_pool_executor_pickle", test_script
+            )
+            with SuppressCrashReport():
+                with script_helper.spawn_python(
+                    "-m",
+                    "profiling.sampling",
+                    "run",
+                    "-d",
+                    "5",
+                    "-i",
+                    "100000",
+                    script,
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.PIPE,
+                    text=True,
+                ) as proc:
+                    try:
+                        stdout, stderr = proc.communicate(
+                            timeout=SHORT_TIMEOUT
+                        )
+                    except subprocess.TimeoutExpired:
+                        proc.kill()
+                        stdout, stderr = proc.communicate()
+
+        if "Permission Error" in stderr:
+            self.skipTest("Insufficient permissions for remote profiling")
+
+        self.assertIn("Results: [2, 4, 6]", stdout)
+        self.assertNotIn("Can't pickle", stderr)
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_async.py b/Lib/test/test_profiling/test_sampling_profiler/test_async.py
new file mode 100644
index 00000000000..d8ca86c996b
--- /dev/null
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_async.py
@@ -0,0 +1,799 @@
+"""Tests for async stack reconstruction in the sampling profiler.
+
+Each test covers a distinct algorithm path or edge case:
+1. Graph building: _build_task_graph()
+2. Leaf identification: _find_leaf_tasks()
+3. Stack traversal: _build_linear_stacks() with BFS
+"""
+
+import unittest
+
+try:
+    import _remote_debugging  # noqa: F401
+    from profiling.sampling.pstats_collector import PstatsCollector
+except ImportError:
+    raise unittest.SkipTest(
+        "Test only runs when _remote_debugging is available"
+    )
+
+from .mocks import MockFrameInfo, MockCoroInfo, MockTaskInfo, MockAwaitedInfo
+
+
+class TestAsyncStackReconstruction(unittest.TestCase):
+    """Test async task tree linear stack reconstruction algorithm."""
+
+    def test_empty_input(self):
+        """Test _build_task_graph with empty awaited_info_list."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+        stacks = list(collector._iter_async_frames([]))
+        self.assertEqual(len(stacks), 0)
+
+    def test_single_root_task(self):
+        """Test _find_leaf_tasks: root task with no parents is its own leaf."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        root = MockTaskInfo(
+            task_id=123,
+            task_name="Task-1",
+            coroutine_stack=[
+                MockCoroInfo(
+                    task_name="Task-1",
+                    call_stack=[MockFrameInfo("main.py", 10, "main")]
+                )
+            ],
+            awaited_by=[]
+        )
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=100, awaited_by=[root])]
+        stacks = list(collector._iter_async_frames(awaited_info_list))
+
+        # Single root is both leaf and root
+        self.assertEqual(len(stacks), 1)
+        frames, thread_id, leaf_id = stacks[0]
+        self.assertEqual(leaf_id, 123)
+        self.assertEqual(thread_id, 100)
+
+    def test_parent_child_chain(self):
+        """Test _build_linear_stacks: BFS follows parent links from leaf to root.
+
+        Task graph:
+
+            Parent (id=1)
+                |
+            Child (id=2)
+        """
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        child = MockTaskInfo(
+            task_id=2,
+            task_name="Child",
+            coroutine_stack=[
+                MockCoroInfo(task_name="Child", call_stack=[MockFrameInfo("c.py", 5, "child_fn")])
+            ],
+            awaited_by=[
+                MockCoroInfo(task_name=1, call_stack=[MockFrameInfo("p.py", 10, "parent_await")])
+            ]
+        )
+
+        parent = MockTaskInfo(
+            task_id=1,
+            task_name="Parent",
+            coroutine_stack=[
+                MockCoroInfo(task_name="Parent", call_stack=[MockFrameInfo("p.py", 15, "parent_fn")])
+            ],
+            awaited_by=[]
+        )
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=200, awaited_by=[child, parent])]
+        stacks = list(collector._iter_async_frames(awaited_info_list))
+
+        # Leaf is child, traverses to parent
+        self.assertEqual(len(stacks), 1)
+        frames, thread_id, leaf_id = stacks[0]
+        self.assertEqual(leaf_id, 2)
+
+        # Verify both child and parent frames present
+        func_names = [f.funcname for f in frames]
+        self.assertIn("child_fn", func_names)
+        self.assertIn("parent_fn", func_names)
+
+    def test_multiple_leaf_tasks(self):
+        """Test _find_leaf_tasks: identifies multiple leaves correctly.
+
+        Task graph (fan-out from root):
+
+                Root (id=1)
+                /        \
+          Leaf1 (id=10)  Leaf2 (id=20)
+
+        Expected: 2 stacks (one for each leaf).
+        """
+        collector = PstatsCollector(sample_interval_usec=1000)
+        leaf1 = MockTaskInfo(
+            task_id=10,
+            task_name="Leaf1",
+            coroutine_stack=[MockCoroInfo(task_name="Leaf1", call_stack=[MockFrameInfo("l1.py", 1, "f1")])],
+            awaited_by=[MockCoroInfo(task_name=1, call_stack=[MockFrameInfo("r.py", 5, "root")])]
+        )
+
+        leaf2 = MockTaskInfo(
+            task_id=20,
+            task_name="Leaf2",
+            coroutine_stack=[MockCoroInfo(task_name="Leaf2", call_stack=[MockFrameInfo("l2.py", 2, "f2")])],
+            awaited_by=[MockCoroInfo(task_name=1, call_stack=[MockFrameInfo("r.py", 5, "root")])]
+        )
+
+        root = MockTaskInfo(
+            task_id=1,
+            task_name="Root",
+            coroutine_stack=[MockCoroInfo(task_name="Root", call_stack=[MockFrameInfo("r.py", 10, "main")])],
+            awaited_by=[]
+        )
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=300, awaited_by=[leaf1, leaf2, root])]
+        stacks = list(collector._iter_async_frames(awaited_info_list))
+
+        # Two leaves = two stacks
+        self.assertEqual(len(stacks), 2)
+        leaf_ids = {leaf_id for _, _, leaf_id in stacks}
+        self.assertEqual(leaf_ids, {10, 20})
+
+    def test_cycle_detection(self):
+        """Test _build_linear_stacks: cycle detection prevents infinite loops.
+
+        Task graph (cyclic dependency):
+
+            A (id=1) <---> B (id=2)
+
+        Neither task is a leaf (both have parents), so no stacks are produced.
+        """
+        collector = PstatsCollector(sample_interval_usec=1000)
+        task_a = MockTaskInfo(
+            task_id=1,
+            task_name="A",
+            coroutine_stack=[MockCoroInfo(task_name="A", call_stack=[MockFrameInfo("a.py", 1, "a")])],
+            awaited_by=[MockCoroInfo(task_name=2, call_stack=[MockFrameInfo("b.py", 5, "b")])]
+        )
+
+        task_b = MockTaskInfo(
+            task_id=2,
+            task_name="B",
+            coroutine_stack=[MockCoroInfo(task_name="B", call_stack=[MockFrameInfo("b.py", 10, "b")])],
+            awaited_by=[MockCoroInfo(task_name=1, call_stack=[MockFrameInfo("a.py", 15, "a")])]
+        )
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=400, awaited_by=[task_a, task_b])]
+        stacks = list(collector._iter_async_frames(awaited_info_list))
+
+        # No leaves (both have parents), should return empty
+        self.assertEqual(len(stacks), 0)
+
+    def test_orphaned_parent_reference(self):
+        """Test _build_linear_stacks: handles parent ID not in task_map."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        # Task references non-existent parent
+        orphan = MockTaskInfo(
+            task_id=5,
+            task_name="Orphan",
+            coroutine_stack=[MockCoroInfo(task_name="Orphan", call_stack=[MockFrameInfo("o.py", 1, "orphan")])],
+            awaited_by=[MockCoroInfo(task_name=999, call_stack=[])]  # 999 doesn't exist
+        )
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=500, awaited_by=[orphan])]
+        stacks = list(collector._iter_async_frames(awaited_info_list))
+
+        # Stops at missing parent, yields what it has
+        self.assertEqual(len(stacks), 1)
+        frames, _, leaf_id = stacks[0]
+        self.assertEqual(leaf_id, 5)
+
+    def test_multiple_coroutines_per_task(self):
+        """Test _build_linear_stacks: collects frames from all coroutines in task."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        # Task with multiple coroutines (e.g., nested async generators)
+        task = MockTaskInfo(
+            task_id=7,
+            task_name="Multi",
+            coroutine_stack=[
+                MockCoroInfo(task_name="Multi", call_stack=[MockFrameInfo("g.py", 5, "gen1")]),
+                MockCoroInfo(task_name="Multi", call_stack=[MockFrameInfo("g.py", 10, "gen2")]),
+            ],
+            awaited_by=[]
+        )
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=600, awaited_by=[task])]
+        stacks = list(collector._iter_async_frames(awaited_info_list))
+
+        self.assertEqual(len(stacks), 1)
+        frames, _, _ = stacks[0]
+
+        # Both coroutine frames should be present
+        func_names = [f.funcname for f in frames]
+        self.assertIn("gen1", func_names)
+        self.assertIn("gen2", func_names)
+
+    def test_multiple_threads(self):
+        """Test _build_task_graph: handles multiple AwaitedInfo (different threads)."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        # Two threads with separate task trees
+        thread1_task = MockTaskInfo(
+            task_id=100,
+            task_name="T1",
+            coroutine_stack=[MockCoroInfo(task_name="T1", call_stack=[MockFrameInfo("t1.py", 1, "t1")])],
+            awaited_by=[]
+        )
+
+        thread2_task = MockTaskInfo(
+            task_id=200,
+            task_name="T2",
+            coroutine_stack=[MockCoroInfo(task_name="T2", call_stack=[MockFrameInfo("t2.py", 1, "t2")])],
+            awaited_by=[]
+        )
+
+        awaited_info_list = [
+            MockAwaitedInfo(thread_id=1, awaited_by=[thread1_task]),
+            MockAwaitedInfo(thread_id=2, awaited_by=[thread2_task]),
+        ]
+
+        stacks = list(collector._iter_async_frames(awaited_info_list))
+
+        # Two threads = two stacks
+        self.assertEqual(len(stacks), 2)
+
+        # Verify thread IDs preserved
+        thread_ids = {thread_id for _, thread_id, _ in stacks}
+        self.assertEqual(thread_ids, {1, 2})
+
+    def test_collect_public_interface(self):
+        """Test collect() method correctly routes to async frame processing."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        child = MockTaskInfo(
+            task_id=50,
+            task_name="Child",
+            coroutine_stack=[MockCoroInfo(task_name="Child", call_stack=[MockFrameInfo("c.py", 1, "child")])],
+            awaited_by=[MockCoroInfo(task_name=51, call_stack=[])]
+        )
+
+        parent = MockTaskInfo(
+            task_id=51,
+            task_name="Parent",
+            coroutine_stack=[MockCoroInfo(task_name="Parent", call_stack=[MockFrameInfo("p.py", 1, "parent")])],
+            awaited_by=[]
+        )
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=999, awaited_by=[child, parent])]
+
+        # Public interface: collect()
+        collector.collect(awaited_info_list)
+
+        # Verify stats collected
+        self.assertGreater(len(collector.result), 0)
+        func_names = [loc[2] for loc in collector.result.keys()]
+        self.assertIn("child", func_names)
+        self.assertIn("parent", func_names)
+
+    def test_diamond_pattern_multiple_parents(self):
+        """Test _build_linear_stacks: task with 2+ parents picks one deterministically.
+
+        CRITICAL: Tests that when a task has multiple parents, we pick one parent
+        deterministically (sorted, first one) and annotate the task name with parent count.
+        """
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        # Diamond pattern: Root spawns A and B, both await Child
+        #
+        #         Root (id=1)
+        #         /        \
+        #     A (id=2)    B (id=3)
+        #         \        /
+        #        Child (id=4)
+        #
+
+        child = MockTaskInfo(
+            task_id=4,
+            task_name="Child",
+            coroutine_stack=[MockCoroInfo(task_name="Child", call_stack=[MockFrameInfo("c.py", 1, "child_work")])],
+            awaited_by=[
+                MockCoroInfo(task_name=2, call_stack=[MockFrameInfo("a.py", 5, "a_await")]),  # Parent A
+                MockCoroInfo(task_name=3, call_stack=[MockFrameInfo("b.py", 5, "b_await")]),  # Parent B
+            ]
+        )
+
+        parent_a = MockTaskInfo(
+            task_id=2,
+            task_name="A",
+            coroutine_stack=[MockCoroInfo(task_name="A", call_stack=[MockFrameInfo("a.py", 10, "a_work")])],
+            awaited_by=[MockCoroInfo(task_name=1, call_stack=[MockFrameInfo("root.py", 5, "root_spawn")])]
+        )
+
+        parent_b = MockTaskInfo(
+            task_id=3,
+            task_name="B",
+            coroutine_stack=[MockCoroInfo(task_name="B", call_stack=[MockFrameInfo("b.py", 10, "b_work")])],
+            awaited_by=[MockCoroInfo(task_name=1, call_stack=[MockFrameInfo("root.py", 5, "root_spawn")])]
+        )
+
+        root = MockTaskInfo(
+            task_id=1,
+            task_name="Root",
+            coroutine_stack=[MockCoroInfo(task_name="Root", call_stack=[MockFrameInfo("root.py", 20, "main")])],
+            awaited_by=[]
+        )
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=777, awaited_by=[child, parent_a, parent_b, root])]
+        stacks = list(collector._iter_async_frames(awaited_info_list))
+
+        # Should get 1 stack: Child->A->Root (picks parent with lowest ID: 2)
+        self.assertEqual(len(stacks), 1, "Diamond should create only 1 path, picking first sorted parent")
+
+        # Verify the single stack
+        frames, thread_id, leaf_id = stacks[0]
+        self.assertEqual(leaf_id, 4)
+        self.assertEqual(thread_id, 777)
+
+        func_names = [f.funcname for f in frames]
+        # Stack should contain child, parent A (id=2, first when sorted), and root
+        self.assertIn("child_work", func_names)
+        self.assertIn("a_work", func_names, "Should use parent A (id=2, first when sorted)")
+        self.assertNotIn("b_work", func_names, "Should not include parent B")
+        self.assertIn("main", func_names)
+
+        # Verify Child task is annotated with parent count
+        self.assertIn("Child (2 parents)", func_names, "Child task should be annotated with parent count")
+
+    def test_empty_coroutine_stack(self):
+        """Test _build_linear_stacks: handles empty coroutine_stack (line 109 condition false)."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        # Task with no coroutine_stack
+        task = MockTaskInfo(
+            task_id=99,
+            task_name="EmptyStack",
+            coroutine_stack=[],  # Empty!
+            awaited_by=[]
+        )
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=111, awaited_by=[task])]
+        stacks = list(collector._iter_async_frames(awaited_info_list))
+
+        self.assertEqual(len(stacks), 1)
+        frames, _, _ = stacks[0]
+
+        # Should only have task marker, no function frames
+        func_names = [f.funcname for f in frames]
+        self.assertEqual(len(func_names), 1, "Should only have task marker")
+        self.assertIn("EmptyStack", func_names)
+
+    def test_orphaned_parent_with_no_frames_collected(self):
+        """Test _build_linear_stacks: orphaned parent at start with empty frames (line 94-96)."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        # Leaf that doesn't exist in task_map (should not happen normally, but test robustness)
+        # We'll create a scenario where the leaf_id is present but empty
+
+        # Task references non-existent parent, and has no coroutine_stack
+        orphan = MockTaskInfo(
+            task_id=88,
+            task_name="Orphan",
+            coroutine_stack=[],  # No frames
+            awaited_by=[MockCoroInfo(task_name=999, call_stack=[])]  # Parent doesn't exist
+        )
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=222, awaited_by=[orphan])]
+        stacks = list(collector._iter_async_frames(awaited_info_list))
+
+        # Should yield because we have the task marker even with no function frames
+        self.assertEqual(len(stacks), 1)
+        frames, _, leaf_id = stacks[0]
+        self.assertEqual(leaf_id, 88)
+        # Has task marker but no function frames
+        self.assertGreater(len(frames), 0, "Should have at least task marker")
+
+    def test_frame_ordering(self):
+        """Test _build_linear_stacks: frames are collected in correct order (leaf->root).
+
+        Task graph (3-level chain):
+
+            Root (id=1)   <- root_bottom, root_top
+                |
+            Middle (id=2) <- mid_bottom, mid_top
+                |
+            Leaf (id=3)   <- leaf_bottom, leaf_top
+
+        Expected frame order: leaf_bottom, leaf_top, mid_bottom, mid_top, root_bottom, root_top
+        (stack is built bottom-up: leaf frames first, then parent frames).
+        """
+        collector = PstatsCollector(sample_interval_usec=1000)
+        leaf = MockTaskInfo(
+            task_id=3,
+            task_name="Leaf",
+            coroutine_stack=[
+                MockCoroInfo(task_name="Leaf", call_stack=[
+                    MockFrameInfo("leaf.py", 1, "leaf_bottom"),
+                    MockFrameInfo("leaf.py", 2, "leaf_top"),
+                ])
+            ],
+            awaited_by=[MockCoroInfo(task_name=2, call_stack=[])]
+        )
+
+        middle = MockTaskInfo(
+            task_id=2,
+            task_name="Middle",
+            coroutine_stack=[
+                MockCoroInfo(task_name="Middle", call_stack=[
+                    MockFrameInfo("mid.py", 1, "mid_bottom"),
+                    MockFrameInfo("mid.py", 2, "mid_top"),
+                ])
+            ],
+            awaited_by=[MockCoroInfo(task_name=1, call_stack=[])]
+        )
+
+        root = MockTaskInfo(
+            task_id=1,
+            task_name="Root",
+            coroutine_stack=[
+                MockCoroInfo(task_name="Root", call_stack=[
+                    MockFrameInfo("root.py", 1, "root_bottom"),
+                    MockFrameInfo("root.py", 2, "root_top"),
+                ])
+            ],
+            awaited_by=[]
+        )
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=333, awaited_by=[leaf, middle, root])]
+        stacks = list(collector._iter_async_frames(awaited_info_list))
+
+        self.assertEqual(len(stacks), 1)
+        frames, _, _ = stacks[0]
+
+        func_names = [f.funcname for f in frames]
+
+        # Order should be: leaf frames, leaf marker, middle frames, middle marker, root frames, root marker
+        leaf_bottom_idx = func_names.index("leaf_bottom")
+        leaf_top_idx = func_names.index("leaf_top")
+        mid_bottom_idx = func_names.index("mid_bottom")
+        root_bottom_idx = func_names.index("root_bottom")
+
+        # Verify leaf comes before middle comes before root
+        self.assertLess(leaf_bottom_idx, leaf_top_idx, "Leaf frames in order")
+        self.assertLess(leaf_top_idx, mid_bottom_idx, "Leaf before middle")
+        self.assertLess(mid_bottom_idx, root_bottom_idx, "Middle before root")
+
+    def test_complex_multi_parent_convergence(self):
+        """Test _build_linear_stacks: multiple leaves with same parents pick deterministically.
+
+        Tests that when multiple leaves have multiple parents, each leaf picks the same
+        parent (sorted, first one) and all leaves are annotated with parent count.
+
+        Task graph structure (both leaves awaited by both A and B)::
+
+                    Root (id=1)
+                    /        \\
+               A (id=2)    B (id=3)
+                  |  \\    /  |
+                  |   \\  /   |
+                  |    \\/    |
+                  |    /\\    |
+                  |   /  \\   |
+            LeafX (id=4)  LeafY (id=5)
+
+        Expected behavior: Both leaves pick parent A (lowest id=2) for their stack path.
+        Result: 2 stacks, both going through A -> Root (B is skipped).
+        """
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        leaf_x = MockTaskInfo(
+            task_id=4,
+            task_name="LeafX",
+            coroutine_stack=[MockCoroInfo(task_name="LeafX", call_stack=[MockFrameInfo("x.py", 1, "x")])],
+            awaited_by=[
+                MockCoroInfo(task_name=2, call_stack=[]),
+                MockCoroInfo(task_name=3, call_stack=[]),
+            ]
+        )
+
+        leaf_y = MockTaskInfo(
+            task_id=5,
+            task_name="LeafY",
+            coroutine_stack=[MockCoroInfo(task_name="LeafY", call_stack=[MockFrameInfo("y.py", 1, "y")])],
+            awaited_by=[
+                MockCoroInfo(task_name=2, call_stack=[]),
+                MockCoroInfo(task_name=3, call_stack=[]),
+            ]
+        )
+
+        parent_a = MockTaskInfo(
+            task_id=2,
+            task_name="A",
+            coroutine_stack=[MockCoroInfo(task_name="A", call_stack=[MockFrameInfo("a.py", 1, "a")])],
+            awaited_by=[MockCoroInfo(task_name=1, call_stack=[])]
+        )
+
+        parent_b = MockTaskInfo(
+            task_id=3,
+            task_name="B",
+            coroutine_stack=[MockCoroInfo(task_name="B", call_stack=[MockFrameInfo("b.py", 1, "b")])],
+            awaited_by=[MockCoroInfo(task_name=1, call_stack=[])]
+        )
+
+        root = MockTaskInfo(
+            task_id=1,
+            task_name="Root",
+            coroutine_stack=[MockCoroInfo(task_name="Root", call_stack=[MockFrameInfo("r.py", 1, "root")])],
+            awaited_by=[]
+        )
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=444, awaited_by=[leaf_x, leaf_y, parent_a, parent_b, root])]
+        stacks = list(collector._iter_async_frames(awaited_info_list))
+
+        # 2 leaves, each picks same parent (A, id=2) = 2 paths
+        self.assertEqual(len(stacks), 2, "Should create 2 paths: X->A->Root, Y->A->Root")
+
+        # Verify both leaves pick parent A (id=2, first when sorted)
+        leaf_ids_seen = set()
+        for frames, _, leaf_id in stacks:
+            leaf_ids_seen.add(leaf_id)
+            func_names = [f.funcname for f in frames]
+
+            # Both stacks should go through parent A only
+            self.assertIn("a", func_names, "Should use parent A (id=2, first when sorted)")
+            self.assertNotIn("b", func_names, "Should not include parent B")
+            self.assertIn("root", func_names, "Should reach root")
+
+            # Check for parent count annotation on the leaf
+            if leaf_id == 4:
+                self.assertIn("x", func_names)
+                self.assertIn("LeafX (2 parents)", func_names, "LeafX should be annotated with parent count")
+            elif leaf_id == 5:
+                self.assertIn("y", func_names)
+                self.assertIn("LeafY (2 parents)", func_names, "LeafY should be annotated with parent count")
+
+        # Both leaves should be represented
+        self.assertEqual(leaf_ids_seen, {4, 5}, "Both LeafX and LeafY should have paths")
+
+
+class TestFlamegraphCollectorAsync(unittest.TestCase):
+    """Test FlamegraphCollector with async frames."""
+
+    def test_flamegraph_with_async_frames(self):
+        """Test FlamegraphCollector correctly processes async task frames."""
+        from profiling.sampling.stack_collector import FlamegraphCollector
+
+        collector = FlamegraphCollector(sample_interval_usec=1000)
+
+        # Build async task tree: Root -> Child
+        child = MockTaskInfo(
+            task_id=2,
+            task_name="ChildTask",
+            coroutine_stack=[
+                MockCoroInfo(
+                    task_name="ChildTask",
+                    call_stack=[MockFrameInfo("child.py", 10, "child_work")]
+                )
+            ],
+            awaited_by=[MockCoroInfo(task_name=1, call_stack=[])]
+        )
+
+        root = MockTaskInfo(
+            task_id=1,
+            task_name="RootTask",
+            coroutine_stack=[
+                MockCoroInfo(
+                    task_name="RootTask",
+                    call_stack=[MockFrameInfo("root.py", 20, "root_work")]
+                )
+            ],
+            awaited_by=[]
+        )
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=100, awaited_by=[child, root])]
+
+        # Collect async frames
+        collector.collect(awaited_info_list)
+
+        # Verify samples were collected
+        self.assertGreater(collector._total_samples, 0)
+
+        # Verify the flamegraph tree structure contains our functions
+        root_node = collector._root
+        self.assertGreater(root_node["samples"], 0)
+
+        # Check that thread ID was tracked
+        self.assertIn(100, collector._all_threads)
+
+    def test_flamegraph_with_task_markers(self):
+        """Test FlamegraphCollector includes <task> boundary markers."""
+        from profiling.sampling.stack_collector import FlamegraphCollector
+
+        collector = FlamegraphCollector(sample_interval_usec=1000)
+
+        task = MockTaskInfo(
+            task_id=42,
+            task_name="MyTask",
+            coroutine_stack=[
+                MockCoroInfo(
+                    task_name="MyTask",
+                    call_stack=[MockFrameInfo("work.py", 5, "do_work")]
+                )
+            ],
+            awaited_by=[]
+        )
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=200, awaited_by=[task])]
+        collector.collect(awaited_info_list)
+
+        # Find <task> marker in the tree
+        def find_task_marker(node, depth=0):
+            for func, child in node.get("children", {}).items():
+                if func[0] == "<task>":
+                    return func
+                result = find_task_marker(child, depth + 1)
+                if result:
+                    return result
+            return None
+
+        task_marker = find_task_marker(collector._root)
+        self.assertIsNotNone(task_marker, "Should have <task> marker in tree")
+        self.assertEqual(task_marker[0], "<task>")
+        self.assertIn("MyTask", task_marker[2])
+
+    def test_flamegraph_multiple_async_samples(self):
+        """Test FlamegraphCollector aggregates multiple async samples correctly."""
+        from profiling.sampling.stack_collector import FlamegraphCollector
+
+        collector = FlamegraphCollector(sample_interval_usec=1000)
+
+        task = MockTaskInfo(
+            task_id=1,
+            task_name="Task",
+            coroutine_stack=[
+                MockCoroInfo(
+                    task_name="Task",
+                    call_stack=[MockFrameInfo("work.py", 10, "work")]
+                )
+            ],
+            awaited_by=[]
+        )
+
+        awaited_info_list = [MockAwaitedInfo(thread_id=300, awaited_by=[task])]
+
+        # Collect multiple samples
+        for _ in range(5):
+            collector.collect(awaited_info_list)
+
+        # Verify sample count
+        self.assertEqual(collector._sample_count, 5)
+        self.assertEqual(collector._total_samples, 5)
+
+
+class TestAsyncAwareParameterFlow(unittest.TestCase):
+    """Integration tests for async_aware parameter flow from CLI to unwinder."""
+
+    def test_sample_function_accepts_async_aware(self):
+        """Test that sample() function accepts async_aware parameter."""
+        from profiling.sampling.sample import sample
+        import inspect
+
+        sig = inspect.signature(sample)
+        self.assertIn("async_aware", sig.parameters)
+
+    def test_sample_live_function_accepts_async_aware(self):
+        """Test that sample_live() function accepts async_aware parameter."""
+        from profiling.sampling.sample import sample_live
+        import inspect
+
+        sig = inspect.signature(sample_live)
+        self.assertIn("async_aware", sig.parameters)
+
+    def test_sample_profiler_sample_accepts_async_aware(self):
+        """Test that SampleProfiler.sample() accepts async_aware parameter."""
+        from profiling.sampling.sample import SampleProfiler
+        import inspect
+
+        sig = inspect.signature(SampleProfiler.sample)
+        self.assertIn("async_aware", sig.parameters)
+
+    def test_async_aware_all_sees_sleeping_and_running_tasks(self):
+        """Test async_aware='all' captures both sleeping and CPU-running tasks."""
+        # Sleeping task (awaiting)
+        sleeping_task = MockTaskInfo(
+            task_id=1,
+            task_name="SleepingTask",
+            coroutine_stack=[
+                MockCoroInfo(
+                    task_name="SleepingTask",
+                    call_stack=[MockFrameInfo("sleeper.py", 10, "sleep_work")]
+                )
+            ],
+            awaited_by=[]
+        )
+
+        # CPU-running task (active)
+        running_task = MockTaskInfo(
+            task_id=2,
+            task_name="RunningTask",
+            coroutine_stack=[
+                MockCoroInfo(
+                    task_name="RunningTask",
+                    call_stack=[MockFrameInfo("runner.py", 20, "cpu_work")]
+                )
+            ],
+            awaited_by=[]
+        )
+
+        # Both tasks returned by get_all_awaited_by
+        awaited_info_list = [MockAwaitedInfo(thread_id=100, awaited_by=[sleeping_task, running_task])]
+
+        collector = PstatsCollector(sample_interval_usec=1000)
+        collector.collect(awaited_info_list)
+        collector.create_stats()
+
+        # Both tasks should be visible
+        sleeping_key = ("sleeper.py", 10, "sleep_work")
+        running_key = ("runner.py", 20, "cpu_work")
+
+        self.assertIn(sleeping_key, collector.stats)
+        self.assertIn(running_key, collector.stats)
+
+        # Task markers should also be present
+        task_keys = [k for k in collector.stats if k[0] == "<task>"]
+        self.assertGreater(len(task_keys), 0, "Should have <task> markers in stats")
+
+        # Verify task names are in the markers
+        task_names = [k[2] for k in task_keys]
+        self.assertTrue(
+            any("SleepingTask" in name for name in task_names),
+            "SleepingTask should be in task markers"
+        )
+        self.assertTrue(
+            any("RunningTask" in name for name in task_names),
+            "RunningTask should be in task markers"
+        )
+
+    def test_async_aware_running_sees_only_running_task(self):
+        """Test async_aware='running' only shows the currently running task stack."""
+        # Only the running task's stack is returned by get_async_stack_trace
+        running_task = MockTaskInfo(
+            task_id=2,
+            task_name="RunningTask",
+            coroutine_stack=[
+                MockCoroInfo(
+                    task_name="RunningTask",
+                    call_stack=[MockFrameInfo("runner.py", 20, "cpu_work")]
+                )
+            ],
+            awaited_by=[]
+        )
+
+        # get_async_stack_trace only returns the running task
+        awaited_info_list = [MockAwaitedInfo(thread_id=100, awaited_by=[running_task])]
+
+        collector = PstatsCollector(sample_interval_usec=1000)
+        collector.collect(awaited_info_list)
+        collector.create_stats()
+
+        # Only running task should be visible
+        running_key = ("runner.py", 20, "cpu_work")
+        self.assertIn(running_key, collector.stats)
+
+        # Verify we don't see the sleeping task (it wasn't in the input)
+        sleeping_key = ("sleeper.py", 10, "sleep_work")
+        self.assertNotIn(sleeping_key, collector.stats)
+
+        # Task marker for running task should be present
+        task_keys = [k for k in collector.stats if k[0] == "<task>"]
+        self.assertGreater(len(task_keys), 0, "Should have <task> markers in stats")
+
+        task_names = [k[2] for k in task_keys]
+        self.assertTrue(
+            any("RunningTask" in name for name in task_names),
+            "RunningTask should be in task markers"
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_cli.py b/Lib/test/test_profiling/test_sampling_profiler/test_cli.py
new file mode 100644
index 00000000000..e1892ec9155
--- /dev/null
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_cli.py
@@ -0,0 +1,711 @@
+"""Tests for sampling profiler CLI argument parsing and functionality."""
+
+import io
+import subprocess
+import sys
+import unittest
+from unittest import mock
+
+try:
+    import _remote_debugging  # noqa: F401
+except ImportError:
+    raise unittest.SkipTest(
+        "Test only runs when _remote_debugging is available"
+    )
+
+from test.support import is_emscripten
+
+
+class TestSampleProfilerCLI(unittest.TestCase):
+    def _setup_sync_mocks(self, mock_socket, mock_popen):
+        """Helper to set up socket and process mocks for coordinator tests."""
+        # Mock the sync socket with context manager support
+        mock_sock_instance = mock.MagicMock()
+        mock_sock_instance.getsockname.return_value = ("127.0.0.1", 12345)
+
+        # Mock the connection with context manager support
+        mock_conn = mock.MagicMock()
+        mock_conn.recv.return_value = b"ready"
+        mock_conn.__enter__.return_value = mock_conn
+        mock_conn.__exit__.return_value = None
+
+        # Mock accept() to return (connection, address) and support indexing
+        mock_accept_result = mock.MagicMock()
+        mock_accept_result.__getitem__.return_value = (
+            mock_conn  # [0] returns the connection
+        )
+        mock_sock_instance.accept.return_value = mock_accept_result
+
+        # Mock socket with context manager support
+        mock_sock_instance.__enter__.return_value = mock_sock_instance
+        mock_sock_instance.__exit__.return_value = None
+        mock_socket.return_value = mock_sock_instance
+
+        # Mock the subprocess
+        mock_process = mock.MagicMock()
+        mock_process.pid = 12345
+        mock_process.poll.return_value = None
+        mock_popen.return_value = mock_process
+        return mock_process
+
+    def _verify_coordinator_command(self, mock_popen, expected_target_args):
+        """Helper to verify the coordinator command was called correctly."""
+        args, kwargs = mock_popen.call_args
+        coordinator_cmd = args[0]
+        self.assertEqual(coordinator_cmd[0], sys.executable)
+        self.assertEqual(coordinator_cmd[1], "-m")
+        self.assertEqual(
+            coordinator_cmd[2], "profiling.sampling._sync_coordinator"
+        )
+        self.assertEqual(coordinator_cmd[3], "12345")  # port
+        # cwd is coordinator_cmd[4]
+        self.assertEqual(coordinator_cmd[5:], expected_target_args)
+
+    @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
+    def test_cli_module_argument_parsing(self):
+        test_args = ["profiling.sampling.cli", "run", "-m", "mymodule"]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("profiling.sampling.cli.sample") as mock_sample,
+            mock.patch("subprocess.Popen") as mock_popen,
+            mock.patch("socket.socket") as mock_socket,
+        ):
+            from profiling.sampling.cli import main
+            self._setup_sync_mocks(mock_socket, mock_popen)
+            main()
+
+            self._verify_coordinator_command(mock_popen, ("-m", "mymodule"))
+            # Verify sample was called once (exact arguments will vary with the new API)
+            mock_sample.assert_called_once()
+
+    @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
+    def test_cli_module_with_arguments(self):
+        test_args = [
+            "profiling.sampling.cli",
+            "run",
+            "-m",
+            "mymodule",
+            "arg1",
+            "arg2",
+            "--flag",
+        ]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("profiling.sampling.cli.sample") as mock_sample,
+            mock.patch("subprocess.Popen") as mock_popen,
+            mock.patch("socket.socket") as mock_socket,
+        ):
+            self._setup_sync_mocks(mock_socket, mock_popen)
+            from profiling.sampling.cli import main
+            main()
+
+            self._verify_coordinator_command(
+                mock_popen, ("-m", "mymodule", "arg1", "arg2", "--flag")
+            )
+            mock_sample.assert_called_once()
+
+    @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
+    def test_cli_script_argument_parsing(self):
+        test_args = ["profiling.sampling.cli", "run", "myscript.py"]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("profiling.sampling.cli.sample") as mock_sample,
+            mock.patch("subprocess.Popen") as mock_popen,
+            mock.patch("socket.socket") as mock_socket,
+        ):
+            self._setup_sync_mocks(mock_socket, mock_popen)
+            from profiling.sampling.cli import main
+            main()
+
+            self._verify_coordinator_command(mock_popen, ("myscript.py",))
+            mock_sample.assert_called_once()
+
+    @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
+    def test_cli_script_with_arguments(self):
+        test_args = [
+            "profiling.sampling.cli",
+            "run",
+            "myscript.py",
+            "arg1",
+            "arg2",
+            "--flag",
+        ]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("profiling.sampling.cli.sample") as mock_sample,
+            mock.patch("subprocess.Popen") as mock_popen,
+            mock.patch("socket.socket") as mock_socket,
+        ):
+            # Use the helper to set up mocks consistently
+            mock_process = self._setup_sync_mocks(mock_socket, mock_popen)
+            # Override specific behavior for this test
+            mock_process.wait.side_effect = [
+                subprocess.TimeoutExpired(test_args, 0.1),
+                None,
+            ]
+
+            from profiling.sampling.cli import main
+            main()
+
+            # Verify the coordinator command was called
+            args, kwargs = mock_popen.call_args
+            coordinator_cmd = args[0]
+            self.assertEqual(coordinator_cmd[0], sys.executable)
+            self.assertEqual(coordinator_cmd[1], "-m")
+            self.assertEqual(
+                coordinator_cmd[2], "profiling.sampling._sync_coordinator"
+            )
+            self.assertEqual(coordinator_cmd[3], "12345")  # port
+            # cwd is coordinator_cmd[4]
+            self.assertEqual(
+                coordinator_cmd[5:], ("myscript.py", "arg1", "arg2", "--flag")
+            )
+
+    def test_cli_mutually_exclusive_pid_module(self):
+        # In new CLI, attach and run are separate subcommands, so this test
+        # verifies that mixing them causes an error
+        test_args = [
+            "profiling.sampling.cli",
+            "attach",  # attach subcommand uses PID
+            "12345",
+            "-m",  # -m is only for run subcommand
+            "mymodule",
+        ]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
+            self.assertRaises(SystemExit) as cm,
+        ):
+            from profiling.sampling.cli import main
+            main()
+
+        self.assertEqual(cm.exception.code, 2)  # argparse error
+        error_msg = mock_stderr.getvalue()
+        self.assertIn("unrecognized arguments", error_msg)
+
+    def test_cli_mutually_exclusive_pid_script(self):
+        # In new CLI, you can't mix attach (PID) with run (script)
+        # This would be caught by providing a PID to run subcommand
+        test_args = ["profiling.sampling.cli", "run", "12345"]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
+            mock.patch("subprocess.Popen") as mock_popen,
+            mock.patch("socket.socket") as mock_socket,
+            self.assertRaises(FileNotFoundError) as cm,  # Expect FileNotFoundError, not SystemExit
+        ):
+            self._setup_sync_mocks(mock_socket, mock_popen)
+            # Override to raise FileNotFoundError for non-existent script
+            mock_popen.side_effect = FileNotFoundError("12345")
+            from profiling.sampling.cli import main
+            main()
+
+        # Verify the error is about the non-existent script
+        self.assertIn("12345", str(cm.exception))
+
+    def test_cli_no_target_specified(self):
+        # In new CLI, must specify a subcommand
+        test_args = ["profiling.sampling.cli", "-d", "5"]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
+            self.assertRaises(SystemExit) as cm,
+        ):
+            from profiling.sampling.cli import main
+            main()
+
+        self.assertEqual(cm.exception.code, 2)  # argparse error
+        error_msg = mock_stderr.getvalue()
+        self.assertIn("invalid choice", error_msg)
+
+    @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
+    def test_cli_module_with_profiler_options(self):
+        test_args = [
+            "profiling.sampling.cli",
+            "run",
+            "-i",
+            "1000",
+            "-d",
+            "30",
+            "-a",
+            "--sort",
+            "tottime",
+            "-l",
+            "20",
+            "-m",
+            "mymodule",
+        ]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("profiling.sampling.cli.sample") as mock_sample,
+            mock.patch("subprocess.Popen") as mock_popen,
+            mock.patch("socket.socket") as mock_socket,
+        ):
+            self._setup_sync_mocks(mock_socket, mock_popen)
+            from profiling.sampling.cli import main
+            main()
+
+            self._verify_coordinator_command(mock_popen, ("-m", "mymodule"))
+            mock_sample.assert_called_once()
+
+    @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
+    def test_cli_script_with_profiler_options(self):
+        """Test script with various profiler options."""
+        test_args = [
+            "profiling.sampling.cli",
+            "run",
+            "-i",
+            "2000",
+            "-d",
+            "60",
+            "--collapsed",
+            "-o",
+            "output.txt",
+            "myscript.py",
+            "scriptarg",
+        ]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("profiling.sampling.cli.sample") as mock_sample,
+            mock.patch("subprocess.Popen") as mock_popen,
+            mock.patch("socket.socket") as mock_socket,
+        ):
+            self._setup_sync_mocks(mock_socket, mock_popen)
+            from profiling.sampling.cli import main
+            main()
+
+            self._verify_coordinator_command(
+                mock_popen, ("myscript.py", "scriptarg")
+            )
+            # Verify profiler was called
+            mock_sample.assert_called_once()
+
+    def test_cli_empty_module_name(self):
+        test_args = ["profiling.sampling.cli", "run", "-m"]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
+            self.assertRaises(SystemExit) as cm,
+        ):
+            from profiling.sampling.cli import main
+            main()
+
+        self.assertEqual(cm.exception.code, 2)  # argparse error
+        error_msg = mock_stderr.getvalue()
+        self.assertIn("required: target", error_msg)  # argparse error for missing positional arg
+
+    @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist")
+    def test_cli_long_module_option(self):
+        test_args = [
+            "profiling.sampling.cli",
+            "run",
+            "-m",
+            "mymodule",
+            "arg1",
+        ]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("profiling.sampling.cli.sample") as mock_sample,
+            mock.patch("subprocess.Popen") as mock_popen,
+            mock.patch("socket.socket") as mock_socket,
+        ):
+            self._setup_sync_mocks(mock_socket, mock_popen)
+            from profiling.sampling.cli import main
+            main()
+
+            self._verify_coordinator_command(
+                mock_popen, ("-m", "mymodule", "arg1")
+            )
+
+    def test_cli_complex_script_arguments(self):
+        test_args = [
+            "profiling.sampling.cli",
+            "run",
+            "script.py",
+            "--input",
+            "file.txt",
+            "-v",
+            "--output=/tmp/out",
+            "positional",
+        ]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("profiling.sampling.cli.sample") as mock_sample,
+            mock.patch(
+                "profiling.sampling.cli._run_with_sync"
+            ) as mock_run_with_sync,
+        ):
+            mock_process = mock.MagicMock()
+            mock_process.pid = 12345
+            mock_process.wait.side_effect = [
+                subprocess.TimeoutExpired(test_args, 0.1),
+                None,
+            ]
+            mock_process.poll.return_value = None
+            mock_run_with_sync.return_value = mock_process
+
+            from profiling.sampling.cli import main
+            main()
+
+            mock_run_with_sync.assert_called_once_with(
+                (
+                    sys.executable,
+                    "script.py",
+                    "--input",
+                    "file.txt",
+                    "-v",
+                    "--output=/tmp/out",
+                    "positional",
+                ),
+                suppress_output=False
+            )
+
+    def test_cli_collapsed_format_validation(self):
+        """Test that CLI properly validates incompatible options with collapsed format."""
+        test_cases = [
+            # Test sort option is invalid with collapsed
+            (
+                [
+                    "profiling.sampling.cli",
+                    "attach",
+                    "12345",
+                    "--collapsed",
+                    "--sort",
+                    "tottime",  # Changed from nsamples (default) to trigger validation
+                ],
+                "sort",
+            ),
+            # Test limit option is invalid with collapsed
+            (
+                [
+                    "profiling.sampling.cli",
+                    "attach",
+                    "12345",
+                    "--collapsed",
+                    "-l",
+                    "20",
+                ],
+                "limit",
+            ),
+            # Test no-summary option is invalid with collapsed
+            (
+                [
+                    "profiling.sampling.cli",
+                    "attach",
+                    "12345",
+                    "--collapsed",
+                    "--no-summary",
+                ],
+                "summary",
+            ),
+        ]
+
+        from profiling.sampling.cli import main
+
+        for test_args, expected_error_keyword in test_cases:
+            with (
+                mock.patch("sys.argv", test_args),
+                mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
+                mock.patch("profiling.sampling.cli.sample"),  # Prevent actual profiling
+                self.assertRaises(SystemExit) as cm,
+            ):
+                main()
+
+            self.assertEqual(cm.exception.code, 2)  # argparse error code
+            error_msg = mock_stderr.getvalue()
+            self.assertIn("error:", error_msg)
+            self.assertIn("only valid with --pstats", error_msg)
+
+    def test_cli_default_collapsed_filename(self):
+        """Test that collapsed format gets a default filename when not specified."""
+        test_args = ["profiling.sampling.cli", "attach", "12345", "--collapsed"]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("profiling.sampling.cli.sample") as mock_sample,
+        ):
+            from profiling.sampling.cli import main
+            main()
+
+            # Check that sample was called (exact filename depends on implementation)
+            mock_sample.assert_called_once()
+
+    def test_cli_custom_output_filenames(self):
+        """Test custom output filenames for both formats."""
+        test_cases = [
+            (
+                [
+                    "profiling.sampling.cli",
+                    "attach",
+                    "12345",
+                    "--pstats",
+                    "-o",
+                    "custom.pstats",
+                ],
+                "custom.pstats",
+                "pstats",
+            ),
+            (
+                [
+                    "profiling.sampling.cli",
+                    "attach",
+                    "12345",
+                    "--collapsed",
+                    "-o",
+                    "custom.txt",
+                ],
+                "custom.txt",
+                "collapsed",
+            ),
+        ]
+
+        from profiling.sampling.cli import main
+
+        for test_args, expected_filename, expected_format in test_cases:
+            with (
+                mock.patch("sys.argv", test_args),
+                mock.patch("profiling.sampling.cli.sample") as mock_sample,
+            ):
+                main()
+
+                mock_sample.assert_called_once()
+
+    def test_cli_missing_required_arguments(self):
+        """Test that CLI requires subcommand."""
+        with (
+            mock.patch("sys.argv", ["profiling.sampling.cli"]),
+            mock.patch("sys.stderr", io.StringIO()),
+        ):
+            with self.assertRaises(SystemExit):
+                from profiling.sampling.cli import main
+                main()
+
+    def test_cli_mutually_exclusive_format_options(self):
+        """Test that pstats and collapsed options are mutually exclusive."""
+        with (
+            mock.patch(
+                "sys.argv",
+                [
+                    "profiling.sampling.cli",
+                    "attach",
+                    "12345",
+                    "--pstats",
+                    "--collapsed",
+                ],
+            ),
+            mock.patch("sys.stderr", io.StringIO()),
+        ):
+            with self.assertRaises(SystemExit):
+                from profiling.sampling.cli import main
+                main()
+
+    def test_argument_parsing_basic(self):
+        test_args = ["profiling.sampling.cli", "attach", "12345"]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("profiling.sampling.cli.sample") as mock_sample,
+        ):
+            from profiling.sampling.cli import main
+            main()
+
+            mock_sample.assert_called_once()
+
+    def test_sort_options(self):
+        from profiling.sampling.cli import main
+
+        sort_options = [
+            ("nsamples", 0),
+            ("tottime", 1),
+            ("cumtime", 2),
+            ("sample-pct", 3),
+            ("cumul-pct", 4),
+            ("name", -1),
+        ]
+
+        for option, expected_sort_value in sort_options:
+            test_args = ["profiling.sampling.cli", "attach", "12345", "--sort", option]
+
+            with (
+                mock.patch("sys.argv", test_args),
+                mock.patch("profiling.sampling.cli.sample") as mock_sample,
+            ):
+                from profiling.sampling.cli import main
+                main()
+
+                mock_sample.assert_called_once()
+                mock_sample.reset_mock()
+
+    def test_async_aware_flag_defaults_to_running(self):
+        """Test --async-aware flag enables async profiling with default 'running' mode."""
+        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware"]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("profiling.sampling.cli.sample") as mock_sample,
+        ):
+            from profiling.sampling.cli import main
+            main()
+
+            mock_sample.assert_called_once()
+            # Verify async_aware was passed with default "running" mode
+            call_kwargs = mock_sample.call_args[1]
+            self.assertEqual(call_kwargs.get("async_aware"), "running")
+
+    def test_async_aware_with_async_mode_all(self):
+        """Test --async-aware with --async-mode all."""
+        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware", "--async-mode", "all"]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("profiling.sampling.cli.sample") as mock_sample,
+        ):
+            from profiling.sampling.cli import main
+            main()
+
+            mock_sample.assert_called_once()
+            call_kwargs = mock_sample.call_args[1]
+            self.assertEqual(call_kwargs.get("async_aware"), "all")
+
+    def test_async_aware_default_is_none(self):
+        """Test async_aware defaults to None when --async-aware not specified."""
+        test_args = ["profiling.sampling.cli", "attach", "12345"]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("profiling.sampling.cli.sample") as mock_sample,
+        ):
+            from profiling.sampling.cli import main
+            main()
+
+            mock_sample.assert_called_once()
+            call_kwargs = mock_sample.call_args[1]
+            self.assertIsNone(call_kwargs.get("async_aware"))
+
+    def test_async_mode_invalid_choice(self):
+        """Test --async-mode with invalid choice raises error."""
+        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware", "--async-mode", "invalid"]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("sys.stderr", io.StringIO()),
+            self.assertRaises(SystemExit) as cm,
+        ):
+            from profiling.sampling.cli import main
+            main()
+
+        self.assertEqual(cm.exception.code, 2)  # argparse error
+
+    def test_async_mode_requires_async_aware(self):
+        """Test --async-mode without --async-aware raises error."""
+        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-mode", "all"]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
+            self.assertRaises(SystemExit) as cm,
+        ):
+            from profiling.sampling.cli import main
+            main()
+
+        self.assertEqual(cm.exception.code, 2)  # argparse error
+        error_msg = mock_stderr.getvalue()
+        self.assertIn("--async-mode requires --async-aware", error_msg)
+
+    def test_async_aware_incompatible_with_native(self):
+        """Test --async-aware is incompatible with --native."""
+        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware", "--native"]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
+            self.assertRaises(SystemExit) as cm,
+        ):
+            from profiling.sampling.cli import main
+            main()
+
+        self.assertEqual(cm.exception.code, 2)  # argparse error
+        error_msg = mock_stderr.getvalue()
+        self.assertIn("--native", error_msg)
+        self.assertIn("incompatible with --async-aware", error_msg)
+
+    def test_async_aware_incompatible_with_no_gc(self):
+        """Test --async-aware is incompatible with --no-gc."""
+        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware", "--no-gc"]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
+            self.assertRaises(SystemExit) as cm,
+        ):
+            from profiling.sampling.cli import main
+            main()
+
+        self.assertEqual(cm.exception.code, 2)  # argparse error
+        error_msg = mock_stderr.getvalue()
+        self.assertIn("--no-gc", error_msg)
+        self.assertIn("incompatible with --async-aware", error_msg)
+
+    def test_async_aware_incompatible_with_both_native_and_no_gc(self):
+        """Test --async-aware is incompatible with both --native and --no-gc."""
+        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware", "--native", "--no-gc"]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
+            self.assertRaises(SystemExit) as cm,
+        ):
+            from profiling.sampling.cli import main
+            main()
+
+        self.assertEqual(cm.exception.code, 2)  # argparse error
+        error_msg = mock_stderr.getvalue()
+        self.assertIn("--native", error_msg)
+        self.assertIn("--no-gc", error_msg)
+        self.assertIn("incompatible with --async-aware", error_msg)
+
+    def test_async_aware_incompatible_with_mode(self):
+        """Test --async-aware is incompatible with --mode (non-wall)."""
+        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware", "--mode", "cpu"]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
+            self.assertRaises(SystemExit) as cm,
+        ):
+            from profiling.sampling.cli import main
+            main()
+
+        self.assertEqual(cm.exception.code, 2)  # argparse error
+        error_msg = mock_stderr.getvalue()
+        self.assertIn("--mode=cpu", error_msg)
+        self.assertIn("incompatible with --async-aware", error_msg)
+
+    def test_async_aware_incompatible_with_all_threads(self):
+        """Test --async-aware is incompatible with --all-threads."""
+        test_args = ["profiling.sampling.cli", "attach", "12345", "--async-aware", "--all-threads"]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
+            self.assertRaises(SystemExit) as cm,
+        ):
+            from profiling.sampling.cli import main
+            main()
+
+        self.assertEqual(cm.exception.code, 2)  # argparse error
+        error_msg = mock_stderr.getvalue()
+        self.assertIn("--all-threads", error_msg)
+        self.assertIn("incompatible with --async-aware", error_msg)
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
new file mode 100644
index 00000000000..e8c12c22215
--- /dev/null
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_collectors.py
@@ -0,0 +1,1203 @@
+"""Tests for sampling profiler collector components."""
+
+import json
+import marshal
+import os
+import tempfile
+import unittest
+
+try:
+    import _remote_debugging  # noqa: F401
+    from profiling.sampling.pstats_collector import PstatsCollector
+    from profiling.sampling.stack_collector import (
+        CollapsedStackCollector,
+        FlamegraphCollector,
+    )
+    from profiling.sampling.gecko_collector import GeckoCollector
+    from profiling.sampling.constants import (
+        PROFILING_MODE_WALL,
+        PROFILING_MODE_CPU,
+    )
+    from _remote_debugging import (
+        THREAD_STATUS_HAS_GIL,
+        THREAD_STATUS_ON_CPU,
+        THREAD_STATUS_GIL_REQUESTED,
+    )
+except ImportError:
+    raise unittest.SkipTest(
+        "Test only runs when _remote_debugging is available"
+    )
+
+from test.support import captured_stdout, captured_stderr
+
+from .mocks import MockFrameInfo, MockThreadInfo, MockInterpreterInfo
+from .helpers import close_and_unlink
+
+
+class TestSampleProfilerComponents(unittest.TestCase):
+    """Unit tests for individual profiler components."""
+
+    def test_mock_frame_info_with_empty_and_unicode_values(self):
+        """Test MockFrameInfo handles empty strings, unicode characters, and very long names correctly."""
+        # Test with empty strings
+        frame = MockFrameInfo("", 0, "")
+        self.assertEqual(frame.filename, "")
+        self.assertEqual(frame.lineno, 0)
+        self.assertEqual(frame.funcname, "")
+        self.assertIn("filename=''", repr(frame))
+
+        # Test with unicode characters
+        frame = MockFrameInfo("文件.py", 42, "函数名")
+        self.assertEqual(frame.filename, "文件.py")
+        self.assertEqual(frame.funcname, "函数名")
+
+        # Test with very long names
+        long_filename = "x" * 1000 + ".py"
+        long_funcname = "func_" + "x" * 1000
+        frame = MockFrameInfo(long_filename, 999999, long_funcname)
+        self.assertEqual(frame.filename, long_filename)
+        self.assertEqual(frame.lineno, 999999)
+        self.assertEqual(frame.funcname, long_funcname)
+
+    def test_pstats_collector_with_extreme_intervals_and_empty_data(self):
+        """Test PstatsCollector handles zero/large intervals, empty frames, None thread IDs, and duplicate frames."""
+        # Test with zero interval
+        collector = PstatsCollector(sample_interval_usec=0)
+        self.assertEqual(collector.sample_interval_usec, 0)
+
+        # Test with very large interval
+        collector = PstatsCollector(sample_interval_usec=1000000000)
+        self.assertEqual(collector.sample_interval_usec, 1000000000)
+
+        # Test collecting empty frames list
+        collector = PstatsCollector(sample_interval_usec=1000)
+        collector.collect([])
+        self.assertEqual(len(collector.result), 0)
+
+        # Test collecting frames with None thread id
+        test_frames = [
+            MockInterpreterInfo(
+                0,
+                [MockThreadInfo(None, [MockFrameInfo("file.py", 10, "func")])],
+            )
+        ]
+        collector.collect(test_frames)
+        # Should still process the frames
+        self.assertEqual(len(collector.result), 1)
+
+        # Test collecting duplicate frames in same sample
+        test_frames = [
+            MockInterpreterInfo(
+                0,  # interpreter_id
+                [
+                    MockThreadInfo(
+                        1,
+                        [
+                            MockFrameInfo("file.py", 10, "func1"),
+                            MockFrameInfo("file.py", 10, "func1"),  # Duplicate
+                        ],
+                    )
+                ],
+            )
+        ]
+        collector = PstatsCollector(sample_interval_usec=1000)
+        collector.collect(test_frames)
+        # Should count both occurrences
+        self.assertEqual(
+            collector.result[("file.py", 10, "func1")]["cumulative_calls"], 2
+        )
+
+    def test_pstats_collector_single_frame_stacks(self):
+        """Test PstatsCollector with single-frame call stacks to trigger len(frames) <= 1 branch."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        # Test with exactly one frame (should trigger the <= 1 condition)
+        single_frame = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1, [MockFrameInfo("single.py", 10, "single_func")]
+                    )
+                ],
+            )
+        ]
+        collector.collect(single_frame)
+
+        # Should record the single frame with inline call
+        self.assertEqual(len(collector.result), 1)
+        single_key = ("single.py", 10, "single_func")
+        self.assertIn(single_key, collector.result)
+        self.assertEqual(collector.result[single_key]["direct_calls"], 1)
+        self.assertEqual(collector.result[single_key]["cumulative_calls"], 1)
+
+        # Test with empty frames (should also trigger <= 1 condition)
+        empty_frames = [MockInterpreterInfo(0, [MockThreadInfo(1, [])])]
+        collector.collect(empty_frames)
+
+        # Should not add any new entries
+        self.assertEqual(
+            len(collector.result), 1
+        )  # Still just the single frame
+
+        # Test mixed single and multi-frame stacks
+        mixed_frames = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1,
+                        [MockFrameInfo("single2.py", 20, "single_func2")],
+                    ),  # Single frame
+                    MockThreadInfo(
+                        2,
+                        [  # Multi-frame stack
+                            MockFrameInfo("multi.py", 30, "multi_func1"),
+                            MockFrameInfo("multi.py", 40, "multi_func2"),
+                        ],
+                    ),
+                ],
+            ),
+        ]
+        collector.collect(mixed_frames)
+
+        # Should have recorded all functions
+        self.assertEqual(
+            len(collector.result), 4
+        )  # single + single2 + multi1 + multi2
+
+        # Verify single frame handling
+        single2_key = ("single2.py", 20, "single_func2")
+        self.assertIn(single2_key, collector.result)
+        self.assertEqual(collector.result[single2_key]["direct_calls"], 1)
+        self.assertEqual(collector.result[single2_key]["cumulative_calls"], 1)
+
+        # Verify multi-frame handling still works
+        multi1_key = ("multi.py", 30, "multi_func1")
+        multi2_key = ("multi.py", 40, "multi_func2")
+        self.assertIn(multi1_key, collector.result)
+        self.assertIn(multi2_key, collector.result)
+        self.assertEqual(collector.result[multi1_key]["direct_calls"], 1)
+        self.assertEqual(
+            collector.result[multi2_key]["cumulative_calls"], 1
+        )  # Called from multi1
+
+    def test_collapsed_stack_collector_with_empty_and_deep_stacks(self):
+        """Test CollapsedStackCollector handles empty frames, single-frame stacks, and very deep call stacks."""
+        collector = CollapsedStackCollector(1000)
+
+        # Test with empty frames
+        collector.collect([])
+        self.assertEqual(len(collector.stack_counter), 0)
+
+        # Test with single frame stack
+        test_frames = [
+            MockInterpreterInfo(
+                0, [MockThreadInfo(1, [("file.py", 10, "func")])]
+            )
+        ]
+        collector.collect(test_frames)
+        self.assertEqual(len(collector.stack_counter), 1)
+        (((path, thread_id), count),) = collector.stack_counter.items()
+        self.assertEqual(path, (("file.py", 10, "func"),))
+        self.assertEqual(thread_id, 1)
+        self.assertEqual(count, 1)
+
+        # Test with very deep stack
+        deep_stack = [(f"file{i}.py", i, f"func{i}") for i in range(100)]
+        test_frames = [MockInterpreterInfo(0, [MockThreadInfo(1, deep_stack)])]
+        collector = CollapsedStackCollector(1000)
+        collector.collect(test_frames)
+        # One aggregated path with 100 frames (reversed)
+        (((path_tuple, thread_id),),) = (collector.stack_counter.keys(),)
+        self.assertEqual(len(path_tuple), 100)
+        self.assertEqual(path_tuple[0], ("file99.py", 99, "func99"))
+        self.assertEqual(path_tuple[-1], ("file0.py", 0, "func0"))
+        self.assertEqual(thread_id, 1)
+
+    def test_pstats_collector_basic(self):
+        """Test basic PstatsCollector functionality."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        # Test empty state
+        self.assertEqual(len(collector.result), 0)
+        self.assertEqual(len(collector.stats), 0)
+
+        # Test collecting sample data
+        test_frames = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1,
+                        [
+                            MockFrameInfo("file.py", 10, "func1"),
+                            MockFrameInfo("file.py", 20, "func2"),
+                        ],
+                    )
+                ],
+            )
+        ]
+        collector.collect(test_frames)
+
+        # Should have recorded calls for both functions
+        self.assertEqual(len(collector.result), 2)
+        self.assertIn(("file.py", 10, "func1"), collector.result)
+        self.assertIn(("file.py", 20, "func2"), collector.result)
+
+        # Top-level function should have direct call
+        self.assertEqual(
+            collector.result[("file.py", 10, "func1")]["direct_calls"], 1
+        )
+        self.assertEqual(
+            collector.result[("file.py", 10, "func1")]["cumulative_calls"], 1
+        )
+
+        # Calling function should have cumulative call but no direct calls
+        self.assertEqual(
+            collector.result[("file.py", 20, "func2")]["cumulative_calls"], 1
+        )
+        self.assertEqual(
+            collector.result[("file.py", 20, "func2")]["direct_calls"], 0
+        )
+
+    def test_pstats_collector_create_stats(self):
+        """Test PstatsCollector stats creation."""
+        collector = PstatsCollector(
+            sample_interval_usec=1000000
+        )  # 1 second intervals
+
+        test_frames = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1,
+                        [
+                            MockFrameInfo("file.py", 10, "func1"),
+                            MockFrameInfo("file.py", 20, "func2"),
+                        ],
+                    )
+                ],
+            )
+        ]
+        collector.collect(test_frames)
+        collector.collect(test_frames)  # Collect twice
+
+        collector.create_stats()
+
+        # Check stats format: (direct_calls, cumulative_calls, tt, ct, callers)
+        func1_stats = collector.stats[("file.py", 10, "func1")]
+        self.assertEqual(func1_stats[0], 2)  # direct_calls (top of stack)
+        self.assertEqual(func1_stats[1], 2)  # cumulative_calls
+        self.assertEqual(
+            func1_stats[2], 2.0
+        )  # tt (total time - 2 samples * 1 sec)
+        self.assertEqual(func1_stats[3], 2.0)  # ct (cumulative time)
+
+        func2_stats = collector.stats[("file.py", 20, "func2")]
+        self.assertEqual(
+            func2_stats[0], 0
+        )  # direct_calls (never top of stack)
+        self.assertEqual(
+            func2_stats[1], 2
+        )  # cumulative_calls (appears in stack)
+        self.assertEqual(func2_stats[2], 0.0)  # tt (no direct calls)
+        self.assertEqual(func2_stats[3], 2.0)  # ct (cumulative time)
+
+    def test_collapsed_stack_collector_basic(self):
+        collector = CollapsedStackCollector(1000)
+
+        # Test empty state
+        self.assertEqual(len(collector.stack_counter), 0)
+
+        # Test collecting sample data
+        test_frames = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1, [("file.py", 10, "func1"), ("file.py", 20, "func2")]
+                    )
+                ],
+            )
+        ]
+        collector.collect(test_frames)
+
+        # Should store one reversed path
+        self.assertEqual(len(collector.stack_counter), 1)
+        (((path, thread_id), count),) = collector.stack_counter.items()
+        expected_tree = (("file.py", 20, "func2"), ("file.py", 10, "func1"))
+        self.assertEqual(path, expected_tree)
+        self.assertEqual(thread_id, 1)
+        self.assertEqual(count, 1)
+
+    def test_collapsed_stack_collector_export(self):
+        collapsed_out = tempfile.NamedTemporaryFile(delete=False)
+        self.addCleanup(close_and_unlink, collapsed_out)
+
+        collector = CollapsedStackCollector(1000)
+
+        test_frames1 = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1, [("file.py", 10, "func1"), ("file.py", 20, "func2")]
+                    )
+                ],
+            )
+        ]
+        test_frames2 = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1, [("file.py", 10, "func1"), ("file.py", 20, "func2")]
+                    )
+                ],
+            )
+        ]  # Same stack
+        test_frames3 = [
+            MockInterpreterInfo(
+                0, [MockThreadInfo(1, [("other.py", 5, "other_func")])]
+            )
+        ]
+
+        collector.collect(test_frames1)
+        collector.collect(test_frames2)
+        collector.collect(test_frames3)
+
+        with captured_stdout(), captured_stderr():
+            collector.export(collapsed_out.name)
+        # Check file contents
+        with open(collapsed_out.name, "r") as f:
+            content = f.read()
+
+        lines = content.strip().split("\n")
+        self.assertEqual(len(lines), 2)  # Two unique stacks
+
+        # Check collapsed format: tid:X;file:func:line;file:func:line count
+        stack1_expected = "tid:1;file.py:func2:20;file.py:func1:10 2"
+        stack2_expected = "tid:1;other.py:other_func:5 1"
+
+        self.assertIn(stack1_expected, lines)
+        self.assertIn(stack2_expected, lines)
+
+    def test_flamegraph_collector_basic(self):
+        """Test basic FlamegraphCollector functionality."""
+        collector = FlamegraphCollector(1000)
+
+        # Empty collector should produce 'No Data'
+        data = collector._convert_to_flamegraph_format()
+        # With string table, name is now an index - resolve it using the strings array
+        strings = data.get("strings", [])
+        name_index = data.get("name", 0)
+        resolved_name = (
+            strings[name_index]
+            if isinstance(name_index, int) and 0 <= name_index < len(strings)
+            else str(name_index)
+        )
+        self.assertIn(resolved_name, ("No Data", "No significant data"))
+
+        # Test collecting sample data
+        test_frames = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1, [("file.py", 10, "func1"), ("file.py", 20, "func2")]
+                    )
+                ],
+            )
+        ]
+        collector.collect(test_frames)
+
+        # Convert and verify structure: func2 -> func1 with counts = 1
+        data = collector._convert_to_flamegraph_format()
+        # Expect promotion: root is the single child (func2), with func1 as its only child
+        strings = data.get("strings", [])
+        name_index = data.get("name", 0)
+        name = (
+            strings[name_index]
+            if isinstance(name_index, int) and 0 <= name_index < len(strings)
+            else str(name_index)
+        )
+        self.assertIsInstance(name, str)
+        self.assertTrue(name.startswith("Program Root: "))
+        self.assertIn("func2 (file.py:20)", name)  # formatted name
+        children = data.get("children", [])
+        self.assertEqual(len(children), 1)
+        child = children[0]
+        child_name_index = child.get("name", 0)
+        child_name = (
+            strings[child_name_index]
+            if isinstance(child_name_index, int)
+            and 0 <= child_name_index < len(strings)
+            else str(child_name_index)
+        )
+        self.assertIn("func1 (file.py:10)", child_name)  # formatted name
+        self.assertEqual(child["value"], 1)
+
+    def test_flamegraph_collector_export(self):
+        """Test flamegraph HTML export functionality."""
+        flamegraph_out = tempfile.NamedTemporaryFile(
+            suffix=".html", delete=False
+        )
+        self.addCleanup(close_and_unlink, flamegraph_out)
+
+        collector = FlamegraphCollector(1000)
+
+        # Create some test data (use Interpreter/Thread objects like runtime)
+        test_frames1 = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1, [("file.py", 10, "func1"), ("file.py", 20, "func2")]
+                    )
+                ],
+            )
+        ]
+        test_frames2 = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1, [("file.py", 10, "func1"), ("file.py", 20, "func2")]
+                    )
+                ],
+            )
+        ]  # Same stack
+        test_frames3 = [
+            MockInterpreterInfo(
+                0, [MockThreadInfo(1, [("other.py", 5, "other_func")])]
+            )
+        ]
+
+        collector.collect(test_frames1)
+        collector.collect(test_frames2)
+        collector.collect(test_frames3)
+
+        # Export flamegraph
+        with captured_stdout(), captured_stderr():
+            collector.export(flamegraph_out.name)
+
+        # Verify file was created and contains valid data
+        self.assertTrue(os.path.exists(flamegraph_out.name))
+        self.assertGreater(os.path.getsize(flamegraph_out.name), 0)
+
+        # Check file contains HTML content
+        with open(flamegraph_out.name, "r", encoding="utf-8") as f:
+            content = f.read()
+
+        # Should be valid HTML
+        self.assertIn("<!doctype html>", content.lower())
+        self.assertIn("<html", content)
+        self.assertIn("Tachyon Profiler - Flamegraph", content)
+        self.assertIn("d3-flame-graph", content)
+
+        # Should contain the data
+        self.assertIn('"name":', content)
+        self.assertIn('"value":', content)
+        self.assertIn('"children":', content)
+
+    def test_gecko_collector_basic(self):
+        """Test basic GeckoCollector functionality."""
+        collector = GeckoCollector(1000)
+
+        # Test empty state
+        self.assertEqual(len(collector.threads), 0)
+        self.assertEqual(collector.sample_count, 0)
+        self.assertEqual(len(collector.global_strings), 1)  # "(root)"
+
+        # Test collecting sample data
+        test_frames = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1,
+                        [("file.py", 10, "func1"), ("file.py", 20, "func2")],
+                    )
+                ],
+            )
+        ]
+        collector.collect(test_frames)
+
+        # Should have recorded one thread and one sample
+        self.assertEqual(len(collector.threads), 1)
+        self.assertEqual(collector.sample_count, 1)
+        self.assertIn(1, collector.threads)
+
+        profile_data = collector._build_profile()
+
+        # Verify profile structure
+        self.assertIn("meta", profile_data)
+        self.assertIn("threads", profile_data)
+        self.assertIn("shared", profile_data)
+
+        # Check shared string table
+        shared = profile_data["shared"]
+        self.assertIn("stringArray", shared)
+        string_array = shared["stringArray"]
+        self.assertGreater(len(string_array), 0)
+
+        # Should contain our functions in the string array
+        self.assertIn("func1", string_array)
+        self.assertIn("func2", string_array)
+
+        # Check thread data structure
+        threads = profile_data["threads"]
+        self.assertEqual(len(threads), 1)
+        thread_data = threads[0]
+
+        # Verify thread structure
+        self.assertIn("samples", thread_data)
+        self.assertIn("funcTable", thread_data)
+        self.assertIn("frameTable", thread_data)
+        self.assertIn("stackTable", thread_data)
+
+        # Verify samples
+        samples = thread_data["samples"]
+        self.assertEqual(len(samples["stack"]), 1)
+        self.assertEqual(len(samples["time"]), 1)
+        self.assertEqual(samples["length"], 1)
+
+        # Verify function table structure and content
+        func_table = thread_data["funcTable"]
+        self.assertIn("name", func_table)
+        self.assertIn("fileName", func_table)
+        self.assertIn("lineNumber", func_table)
+        self.assertEqual(func_table["length"], 2)  # Should have 2 functions
+
+        # Verify actual function content through string array indices
+        func_names = []
+        for idx in func_table["name"]:
+            func_name = (
+                string_array[idx]
+                if isinstance(idx, int) and 0 <= idx < len(string_array)
+                else str(idx)
+            )
+            func_names.append(func_name)
+
+        self.assertIn("func1", func_names, f"func1 not found in {func_names}")
+        self.assertIn("func2", func_names, f"func2 not found in {func_names}")
+
+        # Verify frame table
+        frame_table = thread_data["frameTable"]
+        self.assertEqual(
+            frame_table["length"], 2
+        )  # Should have frames for both functions
+        self.assertEqual(len(frame_table["func"]), 2)
+
+        # Verify stack structure
+        stack_table = thread_data["stackTable"]
+        self.assertGreater(stack_table["length"], 0)
+        self.assertGreater(len(stack_table["frame"]), 0)
+
+    def test_gecko_collector_export(self):
+        """Test Gecko profile export functionality."""
+        gecko_out = tempfile.NamedTemporaryFile(suffix=".json", delete=False)
+        self.addCleanup(close_and_unlink, gecko_out)
+
+        collector = GeckoCollector(1000)
+
+        test_frames1 = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1, [("file.py", 10, "func1"), ("file.py", 20, "func2")]
+                    )
+                ],
+            )
+        ]
+        test_frames2 = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1, [("file.py", 10, "func1"), ("file.py", 20, "func2")]
+                    )
+                ],
+            )
+        ]  # Same stack
+        test_frames3 = [
+            MockInterpreterInfo(
+                0, [MockThreadInfo(1, [("other.py", 5, "other_func")])]
+            )
+        ]
+
+        collector.collect(test_frames1)
+        collector.collect(test_frames2)
+        collector.collect(test_frames3)
+
+        # Export gecko profile
+        with captured_stdout(), captured_stderr():
+            collector.export(gecko_out.name)
+
+        # Verify file was created and contains valid data
+        self.assertTrue(os.path.exists(gecko_out.name))
+        self.assertGreater(os.path.getsize(gecko_out.name), 0)
+
+        # Check file contains valid JSON
+        with open(gecko_out.name, "r") as f:
+            profile_data = json.load(f)
+
+        # Should be valid Gecko profile format
+        self.assertIn("meta", profile_data)
+        self.assertIn("threads", profile_data)
+        self.assertIn("shared", profile_data)
+
+        # Check meta information
+        self.assertIn("categories", profile_data["meta"])
+        self.assertIn("interval", profile_data["meta"])
+
+        # Check shared string table
+        self.assertIn("stringArray", profile_data["shared"])
+        self.assertGreater(len(profile_data["shared"]["stringArray"]), 0)
+
+        # Should contain our functions
+        string_array = profile_data["shared"]["stringArray"]
+        self.assertIn("func1", string_array)
+        self.assertIn("func2", string_array)
+        self.assertIn("other_func", string_array)
+
+    def test_gecko_collector_markers(self):
+        """Test Gecko profile markers for GIL and CPU state tracking."""
+        collector = GeckoCollector(1000)
+
+        # Status combinations for different thread states
+        HAS_GIL_ON_CPU = (
+            THREAD_STATUS_HAS_GIL | THREAD_STATUS_ON_CPU
+        )  # Running Python code
+        NO_GIL_ON_CPU = THREAD_STATUS_ON_CPU  # Running native code
+        WAITING_FOR_GIL = THREAD_STATUS_GIL_REQUESTED  # Waiting for GIL
+
+        # Simulate thread state transitions
+        collector.collect(
+            [
+                MockInterpreterInfo(
+                    0,
+                    [
+                        MockThreadInfo(
+                            1,
+                            [("test.py", 10, "python_func")],
+                            status=HAS_GIL_ON_CPU,
+                        )
+                    ],
+                )
+            ]
+        )
+
+        collector.collect(
+            [
+                MockInterpreterInfo(
+                    0,
+                    [
+                        MockThreadInfo(
+                            1,
+                            [("test.py", 15, "wait_func")],
+                            status=WAITING_FOR_GIL,
+                        )
+                    ],
+                )
+            ]
+        )
+
+        collector.collect(
+            [
+                MockInterpreterInfo(
+                    0,
+                    [
+                        MockThreadInfo(
+                            1,
+                            [("test.py", 20, "python_func2")],
+                            status=HAS_GIL_ON_CPU,
+                        )
+                    ],
+                )
+            ]
+        )
+
+        collector.collect(
+            [
+                MockInterpreterInfo(
+                    0,
+                    [
+                        MockThreadInfo(
+                            1,
+                            [("native.c", 100, "native_func")],
+                            status=NO_GIL_ON_CPU,
+                        )
+                    ],
+                )
+            ]
+        )
+
+        profile_data = collector._build_profile()
+
+        # Verify we have threads with markers
+        self.assertIn("threads", profile_data)
+        self.assertEqual(len(profile_data["threads"]), 1)
+        thread_data = profile_data["threads"][0]
+
+        # Check markers exist
+        self.assertIn("markers", thread_data)
+        markers = thread_data["markers"]
+
+        # Should have marker arrays
+        self.assertIn("name", markers)
+        self.assertIn("startTime", markers)
+        self.assertIn("endTime", markers)
+        self.assertIn("category", markers)
+        self.assertGreater(
+            markers["length"], 0, "Should have generated markers"
+        )
+
+        # Get marker names from string table
+        string_array = profile_data["shared"]["stringArray"]
+        marker_names = [string_array[idx] for idx in markers["name"]]
+
+        # Verify we have different marker types
+        marker_name_set = set(marker_names)
+
+        # Should have "Has GIL" markers (when thread had GIL)
+        self.assertIn(
+            "Has GIL", marker_name_set, "Should have 'Has GIL' markers"
+        )
+
+        # Should have "No GIL" markers (when thread didn't have GIL)
+        self.assertIn(
+            "No GIL", marker_name_set, "Should have 'No GIL' markers"
+        )
+
+        # Should have "On CPU" markers (when thread was on CPU)
+        self.assertIn(
+            "On CPU", marker_name_set, "Should have 'On CPU' markers"
+        )
+
+        # Should have "Waiting for GIL" markers (when thread was waiting)
+        self.assertIn(
+            "Waiting for GIL",
+            marker_name_set,
+            "Should have 'Waiting for GIL' markers",
+        )
+
+        # Verify marker structure
+        for i in range(markers["length"]):
+            # All markers should be interval markers (phase = 1)
+            self.assertEqual(
+                markers["phase"][i], 1, f"Marker {i} should be interval marker"
+            )
+
+            # All markers should have valid time range
+            start_time = markers["startTime"][i]
+            end_time = markers["endTime"][i]
+            self.assertLessEqual(
+                start_time,
+                end_time,
+                f"Marker {i} should have valid time range",
+            )
+
+            # All markers should have valid category
+            self.assertGreaterEqual(
+                markers["category"][i],
+                0,
+                f"Marker {i} should have valid category",
+            )
+
+    def test_pstats_collector_export(self):
+        collector = PstatsCollector(
+            sample_interval_usec=1000000
+        )  # 1 second intervals
+
+        test_frames1 = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1,
+                        [
+                            MockFrameInfo("file.py", 10, "func1"),
+                            MockFrameInfo("file.py", 20, "func2"),
+                        ],
+                    )
+                ],
+            )
+        ]
+        test_frames2 = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1,
+                        [
+                            MockFrameInfo("file.py", 10, "func1"),
+                            MockFrameInfo("file.py", 20, "func2"),
+                        ],
+                    )
+                ],
+            )
+        ]  # Same stack
+        test_frames3 = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1, [MockFrameInfo("other.py", 5, "other_func")]
+                    )
+                ],
+            )
+        ]
+
+        collector.collect(test_frames1)
+        collector.collect(test_frames2)
+        collector.collect(test_frames3)
+
+        pstats_out = tempfile.NamedTemporaryFile(
+            suffix=".pstats", delete=False
+        )
+        self.addCleanup(close_and_unlink, pstats_out)
+        collector.export(pstats_out.name)
+
+        # Check file can be loaded with marshal
+        with open(pstats_out.name, "rb") as f:
+            stats_data = marshal.load(f)
+
+        # Should be a dictionary with the sampled marker
+        self.assertIsInstance(stats_data, dict)
+        self.assertIn(("__sampled__",), stats_data)
+        self.assertTrue(stats_data[("__sampled__",)])
+
+        # Should have function data
+        function_entries = [
+            k for k in stats_data.keys() if k != ("__sampled__",)
+        ]
+        self.assertGreater(len(function_entries), 0)
+
+        # Check specific function stats format: (cc, nc, tt, ct, callers)
+        func1_key = ("file.py", 10, "func1")
+        func2_key = ("file.py", 20, "func2")
+        other_key = ("other.py", 5, "other_func")
+
+        self.assertIn(func1_key, stats_data)
+        self.assertIn(func2_key, stats_data)
+        self.assertIn(other_key, stats_data)
+
+        # Check func1 stats (should have 2 samples)
+        func1_stats = stats_data[func1_key]
+        self.assertEqual(func1_stats[0], 2)  # total_calls
+        self.assertEqual(func1_stats[1], 2)  # nc (non-recursive calls)
+        self.assertEqual(func1_stats[2], 2.0)  # tt (total time)
+        self.assertEqual(func1_stats[3], 2.0)  # ct (cumulative time)
+
+    def test_flamegraph_collector_stats_accumulation(self):
+        """Test that FlamegraphCollector accumulates stats across samples."""
+        collector = FlamegraphCollector(sample_interval_usec=1000)
+
+        # First sample
+        stack_frames_1 = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(1, [("a.py", 1, "func_a")], status=THREAD_STATUS_HAS_GIL),
+                    MockThreadInfo(2, [("b.py", 2, "func_b")], status=THREAD_STATUS_ON_CPU),
+                ],
+            )
+        ]
+        collector.collect(stack_frames_1)
+        self.assertEqual(collector.thread_status_counts["has_gil"], 1)
+        self.assertEqual(collector.thread_status_counts["on_cpu"], 1)
+        self.assertEqual(collector.thread_status_counts["total"], 2)
+
+        # Second sample
+        stack_frames_2 = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(1, [("a.py", 1, "func_a")], status=THREAD_STATUS_GIL_REQUESTED),
+                    MockThreadInfo(2, [("b.py", 2, "func_b")], status=THREAD_STATUS_HAS_GIL),
+                    MockThreadInfo(3, [("c.py", 3, "func_c")], status=THREAD_STATUS_ON_CPU),
+                ],
+            )
+        ]
+        collector.collect(stack_frames_2)
+
+        # Should accumulate
+        self.assertEqual(collector.thread_status_counts["has_gil"], 2)  # 1 + 1
+        self.assertEqual(collector.thread_status_counts["on_cpu"], 2)   # 1 + 1
+        self.assertEqual(collector.thread_status_counts["gil_requested"], 1)  # 0 + 1
+        self.assertEqual(collector.thread_status_counts["total"], 5)  # 2 + 3
+
+        # Test GC sample tracking
+        stack_frames_gc = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(1, [("~", 0, "<GC>")], status=THREAD_STATUS_HAS_GIL),
+                ],
+            )
+        ]
+        collector.collect(stack_frames_gc)
+        self.assertEqual(collector.samples_with_gc_frames, 1)
+
+        # Another sample without GC
+        collector.collect(stack_frames_1)
+        self.assertEqual(collector.samples_with_gc_frames, 1)  # Still 1
+
+        # Another GC sample
+        collector.collect(stack_frames_gc)
+        self.assertEqual(collector.samples_with_gc_frames, 2)
+
+    def test_flamegraph_collector_per_thread_stats(self):
+        """Test per-thread statistics tracking in FlamegraphCollector."""
+        collector = FlamegraphCollector(sample_interval_usec=1000)
+
+        # Multiple threads with different states
+        stack_frames = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(1, [("a.py", 1, "func_a")], status=THREAD_STATUS_HAS_GIL),
+                    MockThreadInfo(2, [("b.py", 2, "func_b")], status=THREAD_STATUS_ON_CPU),
+                    MockThreadInfo(3, [("c.py", 3, "func_c")], status=THREAD_STATUS_GIL_REQUESTED),
+                ],
+            )
+        ]
+        collector.collect(stack_frames)
+
+        # Check per-thread stats
+        self.assertIn(1, collector.per_thread_stats)
+        self.assertIn(2, collector.per_thread_stats)
+        self.assertIn(3, collector.per_thread_stats)
+
+        # Thread 1: has GIL
+        self.assertEqual(collector.per_thread_stats[1]["has_gil"], 1)
+        self.assertEqual(collector.per_thread_stats[1]["on_cpu"], 0)
+        self.assertEqual(collector.per_thread_stats[1]["total"], 1)
+
+        # Thread 2: on CPU
+        self.assertEqual(collector.per_thread_stats[2]["has_gil"], 0)
+        self.assertEqual(collector.per_thread_stats[2]["on_cpu"], 1)
+        self.assertEqual(collector.per_thread_stats[2]["total"], 1)
+
+        # Thread 3: waiting
+        self.assertEqual(collector.per_thread_stats[3]["gil_requested"], 1)
+        self.assertEqual(collector.per_thread_stats[3]["total"], 1)
+
+        # Test accumulation across samples
+        stack_frames_2 = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(1, [("a.py", 2, "func_b")], status=THREAD_STATUS_ON_CPU),
+                ],
+            )
+        ]
+        collector.collect(stack_frames_2)
+
+        self.assertEqual(collector.per_thread_stats[1]["has_gil"], 1)
+        self.assertEqual(collector.per_thread_stats[1]["on_cpu"], 1)
+        self.assertEqual(collector.per_thread_stats[1]["total"], 2)
+
+    def test_flamegraph_collector_percentage_calculations(self):
+        """Test that percentage calculations are correct in exported data."""
+        collector = FlamegraphCollector(sample_interval_usec=1000)
+
+        # Create scenario: 60% GIL held, 40% not held
+        for i in range(6):
+            stack_frames = [
+                MockInterpreterInfo(
+                    0,
+                    [
+                        MockThreadInfo(1, [("a.py", 1, "func")], status=THREAD_STATUS_HAS_GIL),
+                    ],
+                )
+            ]
+            collector.collect(stack_frames)
+
+        for i in range(4):
+            stack_frames = [
+                MockInterpreterInfo(
+                    0,
+                    [
+                        MockThreadInfo(1, [("a.py", 1, "func")], status=THREAD_STATUS_ON_CPU),
+                    ],
+                )
+            ]
+            collector.collect(stack_frames)
+
+        # Export to get calculated percentages
+        data = collector._convert_to_flamegraph_format()
+        thread_stats = data["stats"]["thread_stats"]
+
+        self.assertAlmostEqual(thread_stats["has_gil_pct"], 60.0, places=1)
+        self.assertAlmostEqual(thread_stats["on_cpu_pct"], 40.0, places=1)
+        self.assertEqual(thread_stats["total"], 10)
+
+    def test_flamegraph_collector_mode_handling(self):
+        """Test that profiling mode is correctly passed through to exported data."""
+        collector = FlamegraphCollector(sample_interval_usec=1000)
+
+        # Collect some data
+        stack_frames = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(1, [("a.py", 1, "func")], status=THREAD_STATUS_HAS_GIL),
+                ],
+            )
+        ]
+        collector.collect(stack_frames)
+
+        # Set stats with mode
+        collector.set_stats(
+            sample_interval_usec=1000,
+            duration_sec=1.0,
+            sample_rate=1000.0,
+            mode=PROFILING_MODE_CPU
+        )
+
+        data = collector._convert_to_flamegraph_format()
+        self.assertEqual(data["stats"]["mode"], PROFILING_MODE_CPU)
+
+    def test_flamegraph_collector_zero_samples_edge_case(self):
+        """Test that collector handles zero samples gracefully."""
+        collector = FlamegraphCollector(sample_interval_usec=1000)
+
+        # Export without collecting any samples
+        data = collector._convert_to_flamegraph_format()
+
+        # Should return a valid structure with no data
+        self.assertIn("name", data)
+        self.assertEqual(data["value"], 0)
+        self.assertIn("children", data)
+        self.assertEqual(len(data["children"]), 0)
+
+    def test_flamegraph_collector_json_structure_includes_stats(self):
+        """Test that exported JSON includes thread_stats and per_thread_stats."""
+        collector = FlamegraphCollector(sample_interval_usec=1000)
+
+        # Collect some data with multiple threads
+        stack_frames = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(1, [("a.py", 1, "func_a")], status=THREAD_STATUS_HAS_GIL),
+                    MockThreadInfo(2, [("b.py", 2, "func_b")], status=THREAD_STATUS_ON_CPU),
+                ],
+            )
+        ]
+        collector.collect(stack_frames)
+
+        # Set stats
+        collector.set_stats(
+            sample_interval_usec=1000,
+            duration_sec=1.0,
+            sample_rate=1000.0,
+            mode=PROFILING_MODE_WALL
+        )
+
+        # Export and verify structure
+        data = collector._convert_to_flamegraph_format()
+
+        # Check that stats object exists and contains expected fields
+        self.assertIn("stats", data)
+        stats = data["stats"]
+
+        # Verify thread_stats exists and has expected structure
+        self.assertIn("thread_stats", stats)
+        thread_stats = stats["thread_stats"]
+        self.assertIn("has_gil_pct", thread_stats)
+        self.assertIn("on_cpu_pct", thread_stats)
+        self.assertIn("gil_requested_pct", thread_stats)
+        self.assertIn("gc_pct", thread_stats)
+        self.assertIn("total", thread_stats)
+
+        # Verify per_thread_stats exists and has data for both threads
+        self.assertIn("per_thread_stats", stats)
+        per_thread_stats = stats["per_thread_stats"]
+        self.assertIn(1, per_thread_stats)
+        self.assertIn(2, per_thread_stats)
+
+        # Check per-thread structure
+        for thread_id in [1, 2]:
+            thread_data = per_thread_stats[thread_id]
+            self.assertIn("has_gil_pct", thread_data)
+            self.assertIn("on_cpu_pct", thread_data)
+            self.assertIn("gil_requested_pct", thread_data)
+            self.assertIn("gc_pct", thread_data)
+            self.assertIn("total", thread_data)
+
+    def test_flamegraph_collector_per_thread_gc_percentage(self):
+        """Test that per-thread GC percentage uses total samples as denominator."""
+        collector = FlamegraphCollector(sample_interval_usec=1000)
+
+        # Create 10 samples total:
+        # - Thread 1 appears in all 10 samples, has GC in 2 of them
+        # - Thread 2 appears in only 5 samples, has GC in 1 of them
+
+        # First 5 samples: both threads, thread 1 has GC in 2
+        for i in range(5):
+            has_gc = i < 2  # First 2 samples have GC for thread 1
+            frames_1 = [("~", 0, "<GC>")] if has_gc else [("a.py", 1, "func_a")]
+            stack_frames = [
+                MockInterpreterInfo(
+                    0,
+                    [
+                        MockThreadInfo(1, frames_1, status=THREAD_STATUS_HAS_GIL),
+                        MockThreadInfo(2, [("b.py", 2, "func_b")], status=THREAD_STATUS_ON_CPU),
+                    ],
+                )
+            ]
+            collector.collect(stack_frames)
+
+        # Next 5 samples: only thread 1, thread 2 appears in first of these with GC
+        for i in range(5):
+            if i == 0:
+                # Thread 2 appears in this sample with GC
+                stack_frames = [
+                    MockInterpreterInfo(
+                        0,
+                        [
+                            MockThreadInfo(1, [("a.py", 1, "func_a")], status=THREAD_STATUS_HAS_GIL),
+                            MockThreadInfo(2, [("~", 0, "<GC>")], status=THREAD_STATUS_ON_CPU),
+                        ],
+                    )
+                ]
+            else:
+                # Only thread 1
+                stack_frames = [
+                    MockInterpreterInfo(
+                        0,
+                        [
+                            MockThreadInfo(1, [("a.py", 1, "func_a")], status=THREAD_STATUS_HAS_GIL),
+                        ],
+                    )
+                ]
+            collector.collect(stack_frames)
+
+        # Set stats and export
+        collector.set_stats(
+            sample_interval_usec=1000,
+            duration_sec=1.0,
+            sample_rate=1000.0,
+            mode=PROFILING_MODE_WALL
+        )
+
+        data = collector._convert_to_flamegraph_format()
+        per_thread_stats = data["stats"]["per_thread_stats"]
+
+        # Thread 1: appeared in 10 samples, had GC in 2
+        # GC percentage should be 2/10 = 20% (using total samples, not thread appearances)
+        self.assertEqual(collector.per_thread_stats[1]["gc_samples"], 2)
+        self.assertEqual(collector.per_thread_stats[1]["total"], 10)
+        self.assertAlmostEqual(per_thread_stats[1]["gc_pct"], 20.0, places=1)
+
+        # Thread 2: appeared in 6 samples, had GC in 1
+        # GC percentage should be 1/10 = 10% (using total samples, not thread appearances)
+        self.assertEqual(collector.per_thread_stats[2]["gc_samples"], 1)
+        self.assertEqual(collector.per_thread_stats[2]["total"], 6)
+        self.assertAlmostEqual(per_thread_stats[2]["gc_pct"], 10.0, places=1)
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_integration.py b/Lib/test/test_profiling/test_sampling_profiler/test_integration.py
new file mode 100644
index 00000000000..e92b3f45fbc
--- /dev/null
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_integration.py
@@ -0,0 +1,926 @@
+"""Tests for sampling profiler integration and error handling."""
+
+import contextlib
+import io
+import marshal
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+import unittest
+from unittest import mock
+
+try:
+    import _remote_debugging
+    import profiling.sampling
+    import profiling.sampling.sample
+    from profiling.sampling.pstats_collector import PstatsCollector
+    from profiling.sampling.stack_collector import CollapsedStackCollector
+    from profiling.sampling.sample import SampleProfiler
+except ImportError:
+    raise unittest.SkipTest(
+        "Test only runs when _remote_debugging is available"
+    )
+
+from test.support import (
+    requires_subprocess,
+    SHORT_TIMEOUT,
+)
+
+from .helpers import (
+    test_subprocess,
+    close_and_unlink,
+    skip_if_not_supported,
+    PROCESS_VM_READV_SUPPORTED,
+)
+from .mocks import MockFrameInfo, MockThreadInfo, MockInterpreterInfo
+
+# Duration for profiling tests - long enough for process to complete naturally
+PROFILING_TIMEOUT = str(int(SHORT_TIMEOUT))
+
+# Duration for profiling in tests - short enough to complete quickly
+PROFILING_DURATION_SEC = 2
+
+
+@skip_if_not_supported
+@unittest.skipIf(
+    sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
+    "Test only runs on Linux with process_vm_readv support",
+)
+class TestRecursiveFunctionProfiling(unittest.TestCase):
+    """Test profiling of recursive functions and complex call patterns."""
+
+    def test_recursive_function_call_counting(self):
+        """Test that recursive function calls are counted correctly."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        # Simulate a recursive call pattern: fibonacci(5) calling itself
+        recursive_frames = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1,
+                        [  # First sample: deep in recursion
+                            MockFrameInfo("fib.py", 10, "fibonacci"),
+                            MockFrameInfo(
+                                "fib.py", 10, "fibonacci"
+                            ),  # recursive call
+                            MockFrameInfo(
+                                "fib.py", 10, "fibonacci"
+                            ),  # deeper recursion
+                            MockFrameInfo(
+                                "fib.py", 10, "fibonacci"
+                            ),  # even deeper
+                            MockFrameInfo("main.py", 5, "main"),  # main caller
+                        ],
+                    )
+                ],
+            ),
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1,
+                        [  # Second sample: different recursion depth
+                            MockFrameInfo("fib.py", 10, "fibonacci"),
+                            MockFrameInfo(
+                                "fib.py", 10, "fibonacci"
+                            ),  # recursive call
+                            MockFrameInfo("main.py", 5, "main"),  # main caller
+                        ],
+                    )
+                ],
+            ),
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1,
+                        [  # Third sample: back to deeper recursion
+                            MockFrameInfo("fib.py", 10, "fibonacci"),
+                            MockFrameInfo("fib.py", 10, "fibonacci"),
+                            MockFrameInfo("fib.py", 10, "fibonacci"),
+                            MockFrameInfo("main.py", 5, "main"),
+                        ],
+                    )
+                ],
+            ),
+        ]
+
+        for frames in recursive_frames:
+            collector.collect([frames])
+
+        collector.create_stats()
+
+        # Check that recursive calls are counted properly
+        fib_key = ("fib.py", 10, "fibonacci")
+        main_key = ("main.py", 5, "main")
+
+        self.assertIn(fib_key, collector.stats)
+        self.assertIn(main_key, collector.stats)
+
+        # Fibonacci should have many calls due to recursion
+        fib_stats = collector.stats[fib_key]
+        direct_calls, cumulative_calls, tt, ct, callers = fib_stats
+
+        # Should have recorded multiple calls (9 total appearances in samples)
+        self.assertEqual(cumulative_calls, 9)
+        self.assertGreater(tt, 0)  # Should have some total time
+        self.assertGreater(ct, 0)  # Should have some cumulative time
+
+        # Main should have fewer calls
+        main_stats = collector.stats[main_key]
+        main_direct_calls, main_cumulative_calls = main_stats[0], main_stats[1]
+        self.assertEqual(main_direct_calls, 0)  # Never directly executing
+        self.assertEqual(main_cumulative_calls, 3)  # Appears in all 3 samples
+
+    def test_nested_function_hierarchy(self):
+        """Test profiling of deeply nested function calls."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        # Simulate a deep call hierarchy
+        deep_call_frames = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1,
+                        [
+                            MockFrameInfo("level1.py", 10, "level1_func"),
+                            MockFrameInfo("level2.py", 20, "level2_func"),
+                            MockFrameInfo("level3.py", 30, "level3_func"),
+                            MockFrameInfo("level4.py", 40, "level4_func"),
+                            MockFrameInfo("level5.py", 50, "level5_func"),
+                            MockFrameInfo("main.py", 5, "main"),
+                        ],
+                    )
+                ],
+            ),
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1,
+                        [  # Same hierarchy sampled again
+                            MockFrameInfo("level1.py", 10, "level1_func"),
+                            MockFrameInfo("level2.py", 20, "level2_func"),
+                            MockFrameInfo("level3.py", 30, "level3_func"),
+                            MockFrameInfo("level4.py", 40, "level4_func"),
+                            MockFrameInfo("level5.py", 50, "level5_func"),
+                            MockFrameInfo("main.py", 5, "main"),
+                        ],
+                    )
+                ],
+            ),
+        ]
+
+        for frames in deep_call_frames:
+            collector.collect([frames])
+
+        collector.create_stats()
+
+        # All levels should be recorded
+        for level in range(1, 6):
+            key = (f"level{level}.py", level * 10, f"level{level}_func")
+            self.assertIn(key, collector.stats)
+
+            stats = collector.stats[key]
+            direct_calls, cumulative_calls, tt, ct, callers = stats
+
+            # Each level should appear in stack twice (2 samples)
+            self.assertEqual(cumulative_calls, 2)
+
+            # Only level1 (deepest) should have direct calls
+            if level == 1:
+                self.assertEqual(direct_calls, 2)
+            else:
+                self.assertEqual(direct_calls, 0)
+
+            # Deeper levels should have lower cumulative time than higher levels
+            # (since they don't include time from functions they call)
+            if level == 1:  # Deepest level with most time
+                self.assertGreater(ct, 0)
+
+    def test_alternating_call_patterns(self):
+        """Test profiling with alternating call patterns."""
+        collector = PstatsCollector(sample_interval_usec=1000)
+
+        # Simulate alternating execution paths
+        pattern_frames = [
+            # Pattern A: path through func_a
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1,
+                        [
+                            MockFrameInfo("module.py", 10, "func_a"),
+                            MockFrameInfo("module.py", 30, "shared_func"),
+                            MockFrameInfo("main.py", 5, "main"),
+                        ],
+                    )
+                ],
+            ),
+            # Pattern B: path through func_b
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1,
+                        [
+                            MockFrameInfo("module.py", 20, "func_b"),
+                            MockFrameInfo("module.py", 30, "shared_func"),
+                            MockFrameInfo("main.py", 5, "main"),
+                        ],
+                    )
+                ],
+            ),
+            # Pattern A again
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1,
+                        [
+                            MockFrameInfo("module.py", 10, "func_a"),
+                            MockFrameInfo("module.py", 30, "shared_func"),
+                            MockFrameInfo("main.py", 5, "main"),
+                        ],
+                    )
+                ],
+            ),
+            # Pattern B again
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1,
+                        [
+                            MockFrameInfo("module.py", 20, "func_b"),
+                            MockFrameInfo("module.py", 30, "shared_func"),
+                            MockFrameInfo("main.py", 5, "main"),
+                        ],
+                    )
+                ],
+            ),
+        ]
+
+        for frames in pattern_frames:
+            collector.collect([frames])
+
+        collector.create_stats()
+
+        # Check that both paths are recorded equally
+        func_a_key = ("module.py", 10, "func_a")
+        func_b_key = ("module.py", 20, "func_b")
+        shared_key = ("module.py", 30, "shared_func")
+        main_key = ("main.py", 5, "main")
+
+        # func_a and func_b should each be directly executing twice
+        self.assertEqual(collector.stats[func_a_key][0], 2)  # direct_calls
+        self.assertEqual(collector.stats[func_a_key][1], 2)  # cumulative_calls
+        self.assertEqual(collector.stats[func_b_key][0], 2)  # direct_calls
+        self.assertEqual(collector.stats[func_b_key][1], 2)  # cumulative_calls
+
+        # shared_func should appear in all samples (4 times) but never directly executing
+        self.assertEqual(collector.stats[shared_key][0], 0)  # direct_calls
+        self.assertEqual(collector.stats[shared_key][1], 4)  # cumulative_calls
+
+        # main should appear in all samples but never directly executing
+        self.assertEqual(collector.stats[main_key][0], 0)  # direct_calls
+        self.assertEqual(collector.stats[main_key][1], 4)  # cumulative_calls
+
+    def test_collapsed_stack_with_recursion(self):
+        """Test collapsed stack collector with recursive patterns."""
+        collector = CollapsedStackCollector(1000)
+
+        # Recursive call pattern
+        recursive_frames = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1,
+                        [
+                            ("factorial.py", 10, "factorial"),
+                            ("factorial.py", 10, "factorial"),  # recursive
+                            ("factorial.py", 10, "factorial"),  # deeper
+                            ("main.py", 5, "main"),
+                        ],
+                    )
+                ],
+            ),
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfo(
+                        1,
+                        [
+                            ("factorial.py", 10, "factorial"),
+                            (
+                                "factorial.py",
+                                10,
+                                "factorial",
+                            ),  # different depth
+                            ("main.py", 5, "main"),
+                        ],
+                    )
+                ],
+            ),
+        ]
+
+        for frames in recursive_frames:
+            collector.collect([frames])
+
+        # Should capture both call paths
+        self.assertEqual(len(collector.stack_counter), 2)
+
+        # First path should be longer (deeper recursion) than the second
+        path_tuples = list(collector.stack_counter.keys())
+        paths = [p[0] for p in path_tuples]  # Extract just the call paths
+        lengths = [len(p) for p in paths]
+        self.assertNotEqual(lengths[0], lengths[1])
+
+        # Both should contain factorial calls
+        self.assertTrue(
+            any(any(f[2] == "factorial" for f in p) for p in paths)
+        )
+
+        # Verify total occurrences via aggregation
+        factorial_key = ("factorial.py", 10, "factorial")
+        main_key = ("main.py", 5, "main")
+
+        def total_occurrences(func):
+            total = 0
+            for (path, thread_id), count in collector.stack_counter.items():
+                total += sum(1 for f in path if f == func) * count
+            return total
+
+        self.assertEqual(total_occurrences(factorial_key), 5)
+        self.assertEqual(total_occurrences(main_key), 2)
+
+
+# Shared workload functions for test scripts
+_WORKLOAD_FUNCTIONS = '''
+def slow_fibonacci(n):
+    if n <= 1:
+        return n
+    return slow_fibonacci(n-1) + slow_fibonacci(n-2)
+
+def cpu_intensive_work():
+    result = 0
+    for i in range(10000):
+        result += i * i
+        if i % 100 == 0:
+            result = result % 1000000
+    return result
+
+def do_work():
+    iteration = 0
+    while True:
+        if iteration % 2 == 0:
+            slow_fibonacci(15)
+        else:
+            cpu_intensive_work()
+        iteration += 1
+'''
+
+
+@requires_subprocess()
+@skip_if_not_supported
+class TestSampleProfilerIntegration(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        # Test script for use with test_subprocess() - signals when work starts
+        cls.test_script = _WORKLOAD_FUNCTIONS + '''
+_test_sock.sendall(b"working")
+do_work()
+'''
+        # CLI test script - runs for fixed duration (no socket sync)
+        cls.cli_test_script = '''
+import time
+''' + _WORKLOAD_FUNCTIONS.replace(
+    'while True:', 'end_time = time.time() + 30\n    while time.time() < end_time:'
+) + '''
+do_work()
+'''
+
+    def test_sampling_basic_functionality(self):
+        with (
+            test_subprocess(self.test_script, wait_for_working=True) as subproc,
+            io.StringIO() as captured_output,
+            mock.patch("sys.stdout", captured_output),
+        ):
+            try:
+                collector = PstatsCollector(sample_interval_usec=1000, skip_idle=False)
+                profiling.sampling.sample.sample(
+                    subproc.process.pid,
+                    collector,
+                    duration_sec=PROFILING_DURATION_SEC,
+                )
+                collector.print_stats(show_summary=False)
+            except PermissionError:
+                self.skipTest("Insufficient permissions for remote profiling")
+
+            output = captured_output.getvalue()
+
+        # Basic checks on output
+        self.assertIn("Captured", output)
+        self.assertIn("samples", output)
+        self.assertIn("Profile Stats", output)
+
+        # Should see some of our test functions
+        self.assertIn("slow_fibonacci", output)
+
+    def test_sampling_with_pstats_export(self):
+        pstats_out = tempfile.NamedTemporaryFile(
+            suffix=".pstats", delete=False
+        )
+        self.addCleanup(close_and_unlink, pstats_out)
+
+        with test_subprocess(self.test_script, wait_for_working=True) as subproc:
+            # Suppress profiler output when testing file export
+            with (
+                io.StringIO() as captured_output,
+                mock.patch("sys.stdout", captured_output),
+            ):
+                try:
+                    collector = PstatsCollector(sample_interval_usec=10000, skip_idle=False)
+                    profiling.sampling.sample.sample(
+                        subproc.process.pid,
+                        collector,
+                        duration_sec=PROFILING_DURATION_SEC,
+                    )
+                    collector.export(pstats_out.name)
+                except PermissionError:
+                    self.skipTest(
+                        "Insufficient permissions for remote profiling"
+                    )
+
+            # Verify file was created and contains valid data
+            self.assertTrue(os.path.exists(pstats_out.name))
+            self.assertGreater(os.path.getsize(pstats_out.name), 0)
+
+            # Try to load the stats file
+            with open(pstats_out.name, "rb") as f:
+                stats_data = marshal.load(f)
+
+            # Should be a dictionary with the sampled marker
+            self.assertIsInstance(stats_data, dict)
+            self.assertIn(("__sampled__",), stats_data)
+            self.assertTrue(stats_data[("__sampled__",)])
+
+            # Should have some function data
+            function_entries = [
+                k for k in stats_data.keys() if k != ("__sampled__",)
+            ]
+            self.assertGreater(len(function_entries), 0)
+
+    def test_sampling_with_collapsed_export(self):
+        collapsed_file = tempfile.NamedTemporaryFile(
+            suffix=".txt", delete=False
+        )
+        self.addCleanup(close_and_unlink, collapsed_file)
+
+        with (
+            test_subprocess(self.test_script, wait_for_working=True) as subproc,
+        ):
+            # Suppress profiler output when testing file export
+            with (
+                io.StringIO() as captured_output,
+                mock.patch("sys.stdout", captured_output),
+            ):
+                try:
+                    collector = CollapsedStackCollector(1000, skip_idle=False)
+                    profiling.sampling.sample.sample(
+                        subproc.process.pid,
+                        collector,
+                        duration_sec=PROFILING_DURATION_SEC,
+                    )
+                    collector.export(collapsed_file.name)
+                except PermissionError:
+                    self.skipTest(
+                        "Insufficient permissions for remote profiling"
+                    )
+
+            # Verify file was created and contains valid data
+            self.assertTrue(os.path.exists(collapsed_file.name))
+            self.assertGreater(os.path.getsize(collapsed_file.name), 0)
+
+            # Check file format
+            with open(collapsed_file.name, "r") as f:
+                content = f.read()
+
+            lines = content.strip().split("\n")
+            self.assertGreater(len(lines), 0)
+
+            # Each line should have format: stack_trace count
+            for line in lines:
+                parts = line.rsplit(" ", 1)
+                self.assertEqual(len(parts), 2)
+
+                stack_trace, count_str = parts
+                self.assertGreater(len(stack_trace), 0)
+                self.assertTrue(count_str.isdigit())
+                self.assertGreater(int(count_str), 0)
+
+                # Stack trace should contain semicolon-separated entries
+                if ";" in stack_trace:
+                    stack_parts = stack_trace.split(";")
+                    for part in stack_parts:
+                        # Each part should be file:function:line
+                        self.assertIn(":", part)
+
+    def test_sampling_all_threads(self):
+        with (
+            test_subprocess(self.test_script, wait_for_working=True) as subproc,
+            # Suppress profiler output
+            io.StringIO() as captured_output,
+            mock.patch("sys.stdout", captured_output),
+        ):
+            try:
+                collector = PstatsCollector(sample_interval_usec=10000, skip_idle=False)
+                profiling.sampling.sample.sample(
+                    subproc.process.pid,
+                    collector,
+                    duration_sec=PROFILING_DURATION_SEC,
+                    all_threads=True,
+                )
+                collector.print_stats(show_summary=False)
+            except PermissionError:
+                self.skipTest("Insufficient permissions for remote profiling")
+
+        # Just verify that sampling completed without error
+        # We're not testing output format here
+
+    def test_sample_target_script(self):
+        script_file = tempfile.NamedTemporaryFile(delete=False)
+        script_file.write(self.cli_test_script.encode("utf-8"))
+        script_file.flush()
+        self.addCleanup(close_and_unlink, script_file)
+
+        # Sample for PROFILING_DURATION_SEC seconds
+        test_args = [
+            "profiling.sampling.sample", "run",
+            "-d", str(PROFILING_DURATION_SEC),
+            script_file.name
+        ]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            io.StringIO() as captured_output,
+            mock.patch("sys.stdout", captured_output),
+        ):
+            try:
+                from profiling.sampling.cli import main
+                main()
+            except PermissionError:
+                self.skipTest("Insufficient permissions for remote profiling")
+
+            output = captured_output.getvalue()
+
+        # Basic checks on output
+        self.assertIn("Captured", output)
+        self.assertIn("samples", output)
+        self.assertIn("Profile Stats", output)
+
+        # Should see some of our test functions
+        self.assertIn("slow_fibonacci", output)
+
+    def test_sample_target_module(self):
+        tempdir = tempfile.TemporaryDirectory(delete=False)
+        self.addCleanup(lambda x: shutil.rmtree(x), tempdir.name)
+
+        module_path = os.path.join(tempdir.name, "test_module.py")
+
+        with open(module_path, "w") as f:
+            f.write(self.cli_test_script)
+
+        test_args = [
+            "profiling.sampling.cli",
+            "run",
+            "-d",
+            str(PROFILING_DURATION_SEC),
+            "-m",
+            "test_module",
+        ]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            io.StringIO() as captured_output,
+            mock.patch("sys.stdout", captured_output),
+            # Change to temp directory so subprocess can find the module
+            contextlib.chdir(tempdir.name),
+        ):
+            try:
+                from profiling.sampling.cli import main
+                main()
+            except PermissionError:
+                self.skipTest("Insufficient permissions for remote profiling")
+
+            output = captured_output.getvalue()
+
+        # Basic checks on output
+        self.assertIn("Captured", output)
+        self.assertIn("samples", output)
+        self.assertIn("Profile Stats", output)
+
+        # Should see some of our test functions
+        self.assertIn("slow_fibonacci", output)
+
+
+@skip_if_not_supported
+@unittest.skipIf(
+    sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
+    "Test only runs on Linux with process_vm_readv support",
+)
+class TestSampleProfilerErrorHandling(unittest.TestCase):
+    def test_invalid_pid(self):
+        with self.assertRaises((OSError, RuntimeError)):
+            collector = PstatsCollector(sample_interval_usec=100, skip_idle=False)
+            profiling.sampling.sample.sample(-1, collector, duration_sec=1)
+
+    def test_process_dies_during_sampling(self):
+        # Use wait_for_working=False since this simple script doesn't send "working"
+        with test_subprocess(
+            "import time; time.sleep(0.5); exit()",
+            wait_for_working=False
+        ) as subproc:
+            with (
+                io.StringIO() as captured_output,
+                mock.patch("sys.stdout", captured_output),
+            ):
+                try:
+                    collector = PstatsCollector(sample_interval_usec=50000, skip_idle=False)
+                    profiling.sampling.sample.sample(
+                        subproc.process.pid,
+                        collector,
+                        duration_sec=2,  # Longer than process lifetime
+                    )
+                except PermissionError:
+                    self.skipTest(
+                        "Insufficient permissions for remote profiling"
+                    )
+
+                output = captured_output.getvalue()
+
+            self.assertIn("Error rate", output)
+
+    def test_is_process_running(self):
+        # Use wait_for_working=False since this simple script doesn't send "working"
+        with test_subprocess(
+            "import time; time.sleep(1000)",
+            wait_for_working=False
+        ) as subproc:
+            try:
+                profiler = SampleProfiler(
+                    pid=subproc.process.pid,
+                    sample_interval_usec=1000,
+                    all_threads=False,
+                )
+            except PermissionError:
+                self.skipTest(
+                    "Insufficient permissions to read the stack trace"
+                )
+            self.assertTrue(profiler._is_process_running())
+            self.assertIsNotNone(profiler.unwinder.get_stack_trace())
+            subproc.process.kill()
+            subproc.process.wait()
+            self.assertRaises(
+                ProcessLookupError, profiler.unwinder.get_stack_trace
+            )
+
+        # Exit the context manager to ensure the process is terminated
+        self.assertFalse(profiler._is_process_running())
+        self.assertRaises(
+            ProcessLookupError, profiler.unwinder.get_stack_trace
+        )
+
+    @unittest.skipUnless(sys.platform == "linux", "Only valid on Linux")
+    def test_esrch_signal_handling(self):
+        # Use wait_for_working=False since this simple script doesn't send "working"
+        with test_subprocess(
+            "import time; time.sleep(1000)",
+            wait_for_working=False
+        ) as subproc:
+            try:
+                unwinder = _remote_debugging.RemoteUnwinder(
+                    subproc.process.pid
+                )
+            except PermissionError:
+                self.skipTest(
+                    "Insufficient permissions to read the stack trace"
+                )
+            initial_trace = unwinder.get_stack_trace()
+            self.assertIsNotNone(initial_trace)
+
+            subproc.process.kill()
+
+            # Wait for the process to die and try to get another trace
+            subproc.process.wait()
+
+            with self.assertRaises(ProcessLookupError):
+                unwinder.get_stack_trace()
+
+    def test_script_error_treatment(self):
+        script_file = tempfile.NamedTemporaryFile(
+            "w", delete=False, suffix=".py"
+        )
+        script_file.write("open('nonexistent_file.txt')\n")
+        script_file.close()
+        self.addCleanup(os.unlink, script_file.name)
+
+        result = subprocess.run(
+            [
+                sys.executable,
+                "-m",
+                "profiling.sampling.cli",
+                "run",
+                "-d",
+                "1",
+                script_file.name,
+            ],
+            capture_output=True,
+            text=True,
+        )
+        output = result.stdout + result.stderr
+
+        if "PermissionError" in output:
+            self.skipTest("Insufficient permissions for remote profiling")
+        self.assertNotIn("Script file not found", output)
+        self.assertIn(
+            "No such file or directory: 'nonexistent_file.txt'", output
+        )
+
+    def test_live_incompatible_with_pstats_options(self):
+        """Test that --live is incompatible with individual pstats options."""
+        test_cases = [
+            (["--sort", "tottime"], "--sort"),
+            (["--limit", "30"], "--limit"),
+            (["--no-summary"], "--no-summary"),
+        ]
+
+        for args, expected_flag in test_cases:
+            with self.subTest(args=args):
+                test_args = ["profiling.sampling.cli", "run", "--live"] + args + ["test.py"]
+                with mock.patch("sys.argv", test_args):
+                    with self.assertRaises(SystemExit) as cm:
+                        from profiling.sampling.cli import main
+                        main()
+                    self.assertNotEqual(cm.exception.code, 0)
+
+    def test_live_incompatible_with_multiple_pstats_options(self):
+        """Test that --live is incompatible with multiple pstats options."""
+        test_args = [
+            "profiling.sampling.cli", "run", "--live",
+            "--sort", "cumtime", "--limit", "25", "--no-summary", "test.py"
+        ]
+
+        with mock.patch("sys.argv", test_args):
+            with self.assertRaises(SystemExit) as cm:
+                from profiling.sampling.cli import main
+                main()
+            self.assertNotEqual(cm.exception.code, 0)
+
+    def test_live_incompatible_with_pstats_default_values(self):
+        """Test that --live blocks pstats options even with default values."""
+        # Test with --sort=nsamples (the default value)
+        test_args = ["profiling.sampling.cli", "run", "--live", "--sort=nsamples", "test.py"]
+
+        with mock.patch("sys.argv", test_args):
+            with self.assertRaises(SystemExit) as cm:
+                from profiling.sampling.cli import main
+                main()
+            self.assertNotEqual(cm.exception.code, 0)
+
+        # Test with --limit=15 (the default value)
+        test_args = ["profiling.sampling.cli", "run", "--live", "--limit=15", "test.py"]
+
+        with mock.patch("sys.argv", test_args):
+            with self.assertRaises(SystemExit) as cm:
+                from profiling.sampling.cli import main
+                main()
+            self.assertNotEqual(cm.exception.code, 0)
+
+
+@requires_subprocess()
+@skip_if_not_supported
+@unittest.skipIf(
+    sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
+    "Test only runs on Linux with process_vm_readv support",
+)
+class TestAsyncAwareProfilingIntegration(unittest.TestCase):
+    """Integration tests for async-aware profiling mode."""
+
+    @classmethod
+    def setUpClass(cls):
+        # Async test script that runs indefinitely until killed.
+        # Sends "working" signal AFTER tasks are created and scheduled.
+        cls.async_script = '''
+import asyncio
+
+async def sleeping_leaf():
+    while True:
+        await asyncio.sleep(0.02)
+
+async def cpu_leaf():
+    total = 0
+    while True:
+        for i in range(10000):
+            total += i * i
+        await asyncio.sleep(0)
+
+async def supervisor():
+    tasks = [
+        asyncio.create_task(sleeping_leaf(), name="Sleeper-0"),
+        asyncio.create_task(sleeping_leaf(), name="Sleeper-1"),
+        asyncio.create_task(sleeping_leaf(), name="Sleeper-2"),
+        asyncio.create_task(cpu_leaf(), name="Worker"),
+    ]
+    await asyncio.sleep(0)  # Let tasks get scheduled
+    _test_sock.sendall(b"working")
+    await asyncio.gather(*tasks)
+
+asyncio.run(supervisor())
+'''
+
+    def _collect_async_samples(self, async_aware_mode):
+        """Helper to collect samples and count function occurrences.
+
+        Returns a dict mapping function names to their sample counts.
+        """
+        with test_subprocess(self.async_script, wait_for_working=True) as subproc:
+            try:
+                collector = CollapsedStackCollector(1000, skip_idle=False)
+                profiling.sampling.sample.sample(
+                    subproc.process.pid,
+                    collector,
+                    duration_sec=PROFILING_DURATION_SEC,
+                    async_aware=async_aware_mode,
+                )
+            except PermissionError:
+                self.skipTest("Insufficient permissions for remote profiling")
+
+        # Count samples per function from collapsed stacks
+        # stack_counter keys are (call_tree, thread_id) where call_tree
+        # is a tuple of (file, line, func) tuples
+        func_samples = {}
+        total = 0
+        for (call_tree, _thread_id), count in collector.stack_counter.items():
+            total += count
+            for _file, _line, func in call_tree:
+                func_samples[func] = func_samples.get(func, 0) + count
+
+        func_samples["_total"] = total
+        return func_samples
+
+    def test_async_aware_all_sees_sleeping_and_running_tasks(self):
+        """Test that async_aware='all' captures both sleeping and CPU-running tasks.
+
+        Task tree structure:
+            main
+              └── supervisor
+                    ├── Sleeper-0 (sleeping_leaf)
+                    ├── Sleeper-1 (sleeping_leaf)
+                    ├── Sleeper-2 (sleeping_leaf)
+                    └── Worker (cpu_leaf)
+
+        async_aware='all' should see ALL 4 leaf tasks in the output.
+        """
+        samples = self._collect_async_samples("all")
+
+        self.assertGreater(samples["_total"], 0, "Should have collected samples")
+        self.assertIn("sleeping_leaf", samples)
+        self.assertIn("cpu_leaf", samples)
+        self.assertIn("supervisor", samples)
+
+    def test_async_aware_running_sees_only_cpu_task(self):
+        """Test that async_aware='running' only captures the actively running task.
+
+        Task tree structure:
+            main
+              └── supervisor
+                    ├── Sleeper-0 (sleeping_leaf) - NOT visible in 'running'
+                    ├── Sleeper-1 (sleeping_leaf) - NOT visible in 'running'
+                    ├── Sleeper-2 (sleeping_leaf) - NOT visible in 'running'
+                    └── Worker (cpu_leaf) - VISIBLE in 'running'
+
+        async_aware='running' should only see the Worker task doing CPU work.
+        """
+        samples = self._collect_async_samples("running")
+
+        total = samples["_total"]
+        cpu_leaf_samples = samples.get("cpu_leaf", 0)
+
+        self.assertGreater(total, 0, "Should have collected some samples")
+        self.assertGreater(cpu_leaf_samples, 0, "cpu_leaf should appear in samples")
+
+        # cpu_leaf should have at least 90% of samples (typically 99%+)
+        # sleeping_leaf may occasionally appear with very few samples (< 1%)
+        # when tasks briefly wake up to check sleep timers
+        cpu_percentage = (cpu_leaf_samples / total) * 100
+        self.assertGreater(cpu_percentage, 90.0,
+            f"cpu_leaf should dominate samples in 'running' mode, "
+            f"got {cpu_percentage:.1f}% ({cpu_leaf_samples}/{total})")
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_live_collector_core.py b/Lib/test/test_profiling/test_sampling_profiler/test_live_collector_core.py
new file mode 100644
index 00000000000..04e6cd2f1fc
--- /dev/null
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_live_collector_core.py
@@ -0,0 +1,477 @@
+"""Core functionality tests for LiveStatsCollector.
+
+Tests for path simplification, frame processing, collect method,
+statistics building, sorting, and formatting.
+"""
+
+import os
+import unittest
+from test.support import requires
+from test.support.import_helper import import_module
+
+# Only run these tests if curses is available
+requires("curses")
+curses = import_module("curses")
+
+from profiling.sampling.live_collector import LiveStatsCollector, MockDisplay
+from profiling.sampling.constants import (
+    THREAD_STATUS_HAS_GIL,
+    THREAD_STATUS_ON_CPU,
+)
+from ._live_collector_helpers import (
+    MockFrameInfo,
+    MockThreadInfo,
+    MockInterpreterInfo,
+)
+
+
+class TestLiveStatsCollectorPathSimplification(unittest.TestCase):
+    """Tests for path simplification functionality."""
+
+    def test_simplify_stdlib_path(self):
+        """Test simplification of standard library paths."""
+        collector = LiveStatsCollector(1000)
+        # Get actual os module path
+        os_file = os.__file__
+        if os_file:
+            stdlib_dir = os.path.dirname(os.path.abspath(os_file))
+            test_path = os.path.join(stdlib_dir, "json", "decoder.py")
+            simplified = collector.simplify_path(test_path)
+            # Should remove the stdlib prefix
+            self.assertNotIn(stdlib_dir, simplified)
+            self.assertIn("json", simplified)
+
+    def test_simplify_unknown_path(self):
+        """Test that unknown paths are returned unchanged."""
+        collector = LiveStatsCollector(1000)
+        test_path = "/some/unknown/path/file.py"
+        simplified = collector.simplify_path(test_path)
+        self.assertEqual(simplified, test_path)
+
+
+class TestLiveStatsCollectorFrameProcessing(unittest.TestCase):
+    """Tests for frame processing functionality."""
+
+    def test_process_single_frame(self):
+        """Test processing a single frame."""
+        collector = LiveStatsCollector(1000)
+        frames = [MockFrameInfo("test.py", 10, "test_func")]
+        collector.process_frames(frames)
+
+        location = ("test.py", 10, "test_func")
+        self.assertEqual(collector.result[location]["direct_calls"], 1)
+        self.assertEqual(collector.result[location]["cumulative_calls"], 1)
+
+    def test_process_multiple_frames(self):
+        """Test processing a stack of multiple frames."""
+        collector = LiveStatsCollector(1000)
+        frames = [
+            MockFrameInfo("test.py", 10, "inner_func"),
+            MockFrameInfo("test.py", 20, "middle_func"),
+            MockFrameInfo("test.py", 30, "outer_func"),
+        ]
+        collector.process_frames(frames)
+
+        # Top frame (inner_func) should have both direct and cumulative
+        inner_loc = ("test.py", 10, "inner_func")
+        self.assertEqual(collector.result[inner_loc]["direct_calls"], 1)
+        self.assertEqual(collector.result[inner_loc]["cumulative_calls"], 1)
+
+        # Other frames should only have cumulative
+        middle_loc = ("test.py", 20, "middle_func")
+        self.assertEqual(collector.result[middle_loc]["direct_calls"], 0)
+        self.assertEqual(collector.result[middle_loc]["cumulative_calls"], 1)
+
+        outer_loc = ("test.py", 30, "outer_func")
+        self.assertEqual(collector.result[outer_loc]["direct_calls"], 0)
+        self.assertEqual(collector.result[outer_loc]["cumulative_calls"], 1)
+
+    def test_process_empty_frames(self):
+        """Test processing empty frames list."""
+        collector = LiveStatsCollector(1000)
+        collector.process_frames([])
+        # Should not raise an error and result should remain empty
+        self.assertEqual(len(collector.result), 0)
+
+    def test_process_frames_accumulation(self):
+        """Test that multiple calls accumulate correctly."""
+        collector = LiveStatsCollector(1000)
+        frames = [MockFrameInfo("test.py", 10, "test_func")]
+
+        collector.process_frames(frames)
+        collector.process_frames(frames)
+        collector.process_frames(frames)
+
+        location = ("test.py", 10, "test_func")
+        self.assertEqual(collector.result[location]["direct_calls"], 3)
+        self.assertEqual(collector.result[location]["cumulative_calls"], 3)
+
+    def test_process_frames_with_thread_id(self):
+        """Test processing frames with per-thread tracking."""
+        collector = LiveStatsCollector(1000)
+        frames = [MockFrameInfo("test.py", 10, "test_func")]
+
+        # Process frames with thread_id
+        collector.process_frames(frames, thread_id=123)
+
+        # Check aggregated result
+        location = ("test.py", 10, "test_func")
+        self.assertEqual(collector.result[location]["direct_calls"], 1)
+        self.assertEqual(collector.result[location]["cumulative_calls"], 1)
+
+        # Check per-thread result
+        self.assertIn(123, collector.per_thread_data)
+        self.assertEqual(
+            collector.per_thread_data[123].result[location]["direct_calls"], 1
+        )
+        self.assertEqual(
+            collector.per_thread_data[123].result[location]["cumulative_calls"], 1
+        )
+
+    def test_process_frames_multiple_threads(self):
+        """Test processing frames from multiple threads."""
+        collector = LiveStatsCollector(1000)
+        frames1 = [MockFrameInfo("test.py", 10, "test_func")]
+        frames2 = [MockFrameInfo("test.py", 20, "other_func")]
+
+        # Process frames from different threads
+        collector.process_frames(frames1, thread_id=123)
+        collector.process_frames(frames2, thread_id=456)
+
+        # Check that both threads have their own data
+        self.assertIn(123, collector.per_thread_data)
+        self.assertIn(456, collector.per_thread_data)
+
+        loc1 = ("test.py", 10, "test_func")
+        loc2 = ("test.py", 20, "other_func")
+
+        # Thread 123 should only have func1
+        self.assertEqual(
+            collector.per_thread_data[123].result[loc1]["direct_calls"], 1
+        )
+        self.assertNotIn(loc2, collector.per_thread_data[123].result)
+
+        # Thread 456 should only have func2
+        self.assertEqual(
+            collector.per_thread_data[456].result[loc2]["direct_calls"], 1
+        )
+        self.assertNotIn(loc1, collector.per_thread_data[456].result)
+
+
+class TestLiveStatsCollectorCollect(unittest.TestCase):
+    """Tests for the collect method."""
+
+    def test_collect_initializes_start_time(self):
+        """Test that collect initializes start_time on first call."""
+        collector = LiveStatsCollector(1000)
+        self.assertIsNone(collector.start_time)
+
+        # Create mock stack frames
+        thread_info = MockThreadInfo(123, [])
+        interpreter_info = MockInterpreterInfo(0, [thread_info])
+        stack_frames = [interpreter_info]
+
+        collector.collect(stack_frames)
+        self.assertIsNotNone(collector.start_time)
+
+    def test_collect_increments_sample_count(self):
+        """Test that collect increments total_samples."""
+        collector = LiveStatsCollector(1000)
+        thread_info = MockThreadInfo(123, [])
+        interpreter_info = MockInterpreterInfo(0, [thread_info])
+        stack_frames = [interpreter_info]
+
+        self.assertEqual(collector.total_samples, 0)
+        collector.collect(stack_frames)
+        self.assertEqual(collector.total_samples, 1)
+        collector.collect(stack_frames)
+        self.assertEqual(collector.total_samples, 2)
+
+    def test_collect_with_frames(self):
+        """Test collect with actual frame data."""
+        collector = LiveStatsCollector(1000)
+        frames = [MockFrameInfo("test.py", 10, "test_func")]
+        thread_info = MockThreadInfo(123, frames)
+        interpreter_info = MockInterpreterInfo(0, [thread_info])
+        stack_frames = [interpreter_info]
+
+        collector.collect(stack_frames)
+
+        location = ("test.py", 10, "test_func")
+        self.assertEqual(collector.result[location]["direct_calls"], 1)
+        self.assertEqual(collector.successful_samples, 1)
+        self.assertEqual(collector.failed_samples, 0)
+
+    def test_collect_with_empty_frames(self):
+        """Test collect with empty frames."""
+        collector = LiveStatsCollector(1000)
+        thread_info = MockThreadInfo(123, [])
+        interpreter_info = MockInterpreterInfo(0, [thread_info])
+        stack_frames = [interpreter_info]
+
+        collector.collect(stack_frames)
+
+        # Empty frames still count as successful since collect() was called successfully
+        self.assertEqual(collector.successful_samples, 1)
+        self.assertEqual(collector.failed_samples, 0)
+
+    def test_collect_skip_idle_threads(self):
+        """Test that idle threads are skipped when skip_idle=True."""
+        collector = LiveStatsCollector(1000, skip_idle=True)
+
+        frames = [MockFrameInfo("test.py", 10, "test_func")]
+        running_thread = MockThreadInfo(
+            123, frames, status=THREAD_STATUS_HAS_GIL | THREAD_STATUS_ON_CPU
+        )
+        idle_thread = MockThreadInfo(124, frames, status=0)  # No flags = idle
+        interpreter_info = MockInterpreterInfo(
+            0, [running_thread, idle_thread]
+        )
+        stack_frames = [interpreter_info]
+
+        collector.collect(stack_frames)
+
+        # Only one thread should be processed
+        location = ("test.py", 10, "test_func")
+        self.assertEqual(collector.result[location]["direct_calls"], 1)
+
+    def test_collect_multiple_threads(self):
+        """Test collect with multiple threads."""
+        collector = LiveStatsCollector(1000)
+
+        frames1 = [MockFrameInfo("test1.py", 10, "func1")]
+        frames2 = [MockFrameInfo("test2.py", 20, "func2")]
+        thread1 = MockThreadInfo(123, frames1)
+        thread2 = MockThreadInfo(124, frames2)
+        interpreter_info = MockInterpreterInfo(0, [thread1, thread2])
+        stack_frames = [interpreter_info]
+
+        collector.collect(stack_frames)
+
+        loc1 = ("test1.py", 10, "func1")
+        loc2 = ("test2.py", 20, "func2")
+        self.assertEqual(collector.result[loc1]["direct_calls"], 1)
+        self.assertEqual(collector.result[loc2]["direct_calls"], 1)
+
+        # Check thread IDs are tracked
+        self.assertIn(123, collector.thread_ids)
+        self.assertIn(124, collector.thread_ids)
+
+
+class TestLiveStatsCollectorStatisticsBuilding(unittest.TestCase):
+    """Tests for statistics building and sorting."""
+
+    def setUp(self):
+        """Set up test fixtures."""
+        self.collector = LiveStatsCollector(1000)
+        # Add some test data
+        self.collector.result[("file1.py", 10, "func1")] = {
+            "direct_calls": 100,
+            "cumulative_calls": 150,
+            "total_rec_calls": 0,
+        }
+        self.collector.result[("file2.py", 20, "func2")] = {
+            "direct_calls": 50,
+            "cumulative_calls": 200,
+            "total_rec_calls": 0,
+        }
+        self.collector.result[("file3.py", 30, "func3")] = {
+            "direct_calls": 75,
+            "cumulative_calls": 75,
+            "total_rec_calls": 0,
+        }
+        self.collector.total_samples = 300
+
+    def test_build_stats_list(self):
+        """Test that stats list is built correctly."""
+        stats_list = self.collector.build_stats_list()
+        self.assertEqual(len(stats_list), 3)
+
+        # Check that all expected keys are present
+        for stat in stats_list:
+            self.assertIn("func", stat)
+            self.assertIn("direct_calls", stat)
+            self.assertIn("cumulative_calls", stat)
+            self.assertIn("total_time", stat)
+            self.assertIn("cumulative_time", stat)
+
+    def test_sort_by_nsamples(self):
+        """Test sorting by number of samples."""
+        self.collector.sort_by = "nsamples"
+        stats_list = self.collector.build_stats_list()
+
+        # Should be sorted by direct_calls descending
+        self.assertEqual(stats_list[0]["func"][2], "func1")  # 100 samples
+        self.assertEqual(stats_list[1]["func"][2], "func3")  # 75 samples
+        self.assertEqual(stats_list[2]["func"][2], "func2")  # 50 samples
+
+    def test_sort_by_tottime(self):
+        """Test sorting by total time."""
+        self.collector.sort_by = "tottime"
+        stats_list = self.collector.build_stats_list()
+
+        # Should be sorted by total_time descending
+        # total_time = direct_calls * sample_interval_sec
+        self.assertEqual(stats_list[0]["func"][2], "func1")
+        self.assertEqual(stats_list[1]["func"][2], "func3")
+        self.assertEqual(stats_list[2]["func"][2], "func2")
+
+    def test_sort_by_cumtime(self):
+        """Test sorting by cumulative time."""
+        self.collector.sort_by = "cumtime"
+        stats_list = self.collector.build_stats_list()
+
+        # Should be sorted by cumulative_time descending
+        self.assertEqual(stats_list[0]["func"][2], "func2")  # 200 cumulative
+        self.assertEqual(stats_list[1]["func"][2], "func1")  # 150 cumulative
+        self.assertEqual(stats_list[2]["func"][2], "func3")  # 75 cumulative
+
+    def test_sort_by_sample_pct(self):
+        """Test sorting by sample percentage."""
+        self.collector.sort_by = "sample_pct"
+        stats_list = self.collector.build_stats_list()
+
+        # Should be sorted by percentage of direct_calls
+        self.assertEqual(stats_list[0]["func"][2], "func1")  # 33.3%
+        self.assertEqual(stats_list[1]["func"][2], "func3")  # 25%
+        self.assertEqual(stats_list[2]["func"][2], "func2")  # 16.7%
+
+    def test_sort_by_cumul_pct(self):
+        """Test sorting by cumulative percentage."""
+        self.collector.sort_by = "cumul_pct"
+        stats_list = self.collector.build_stats_list()
+
+        # Should be sorted by percentage of cumulative_calls
+        self.assertEqual(stats_list[0]["func"][2], "func2")  # 66.7%
+        self.assertEqual(stats_list[1]["func"][2], "func1")  # 50%
+        self.assertEqual(stats_list[2]["func"][2], "func3")  # 25%
+
+
+class TestLiveStatsCollectorSortCycle(unittest.TestCase):
+    """Tests for sort mode cycling."""
+
+    def test_cycle_sort_from_nsamples(self):
+        """Test cycling from nsamples."""
+        collector = LiveStatsCollector(1000, sort_by="nsamples")
+        collector._cycle_sort()
+        self.assertEqual(collector.sort_by, "sample_pct")
+
+    def test_cycle_sort_from_sample_pct(self):
+        """Test cycling from sample_pct."""
+        collector = LiveStatsCollector(1000, sort_by="sample_pct")
+        collector._cycle_sort()
+        self.assertEqual(collector.sort_by, "tottime")
+
+    def test_cycle_sort_from_tottime(self):
+        """Test cycling from tottime."""
+        collector = LiveStatsCollector(1000, sort_by="tottime")
+        collector._cycle_sort()
+        self.assertEqual(collector.sort_by, "cumul_pct")
+
+    def test_cycle_sort_from_cumul_pct(self):
+        """Test cycling from cumul_pct."""
+        collector = LiveStatsCollector(1000, sort_by="cumul_pct")
+        collector._cycle_sort()
+        self.assertEqual(collector.sort_by, "cumtime")
+
+    def test_cycle_sort_from_cumtime(self):
+        """Test cycling from cumtime back to nsamples."""
+        collector = LiveStatsCollector(1000, sort_by="cumtime")
+        collector._cycle_sort()
+        self.assertEqual(collector.sort_by, "nsamples")
+
+    def test_cycle_sort_invalid_mode(self):
+        """Test cycling from invalid mode resets to nsamples."""
+        collector = LiveStatsCollector(1000)
+        collector.sort_by = "invalid_mode"
+        collector._cycle_sort()
+        self.assertEqual(collector.sort_by, "nsamples")
+
+    def test_cycle_sort_backward_from_nsamples(self):
+        """Test cycling backward from nsamples goes to cumtime."""
+        collector = LiveStatsCollector(1000, sort_by="nsamples")
+        collector._cycle_sort(reverse=True)
+        self.assertEqual(collector.sort_by, "cumtime")
+
+    def test_cycle_sort_backward_from_cumtime(self):
+        """Test cycling backward from cumtime goes to cumul_pct."""
+        collector = LiveStatsCollector(1000, sort_by="cumtime")
+        collector._cycle_sort(reverse=True)
+        self.assertEqual(collector.sort_by, "cumul_pct")
+
+    def test_cycle_sort_backward_from_sample_pct(self):
+        """Test cycling backward from sample_pct goes to nsamples."""
+        collector = LiveStatsCollector(1000, sort_by="sample_pct")
+        collector._cycle_sort(reverse=True)
+        self.assertEqual(collector.sort_by, "nsamples")
+
+    def test_input_lowercase_s_cycles_forward(self):
+        """Test that lowercase 's' cycles forward."""
+        display = MockDisplay()
+        collector = LiveStatsCollector(
+            1000, sort_by="nsamples", display=display
+        )
+
+        display.simulate_input(ord("s"))
+        collector._handle_input()
+
+        self.assertEqual(collector.sort_by, "sample_pct")
+
+    def test_input_uppercase_s_cycles_backward(self):
+        """Test that uppercase 'S' cycles backward."""
+        display = MockDisplay()
+        collector = LiveStatsCollector(
+            1000, sort_by="nsamples", display=display
+        )
+
+        display.simulate_input(ord("S"))
+        collector._handle_input()
+
+        self.assertEqual(collector.sort_by, "cumtime")
+
+
+class TestLiveStatsCollectorFormatting(unittest.TestCase):
+    """Tests for formatting methods."""
+
+    def test_format_uptime_seconds(self):
+        """Test uptime formatting for seconds only."""
+        collector = LiveStatsCollector(1000, display=MockDisplay())
+        colors = collector._setup_colors()
+        collector._initialize_widgets(colors)
+        self.assertEqual(collector.header_widget.format_uptime(45), "0m45s")
+
+    def test_format_uptime_minutes(self):
+        """Test uptime formatting for minutes."""
+        collector = LiveStatsCollector(1000, display=MockDisplay())
+        colors = collector._setup_colors()
+        collector._initialize_widgets(colors)
+        self.assertEqual(collector.header_widget.format_uptime(125), "2m05s")
+
+    def test_format_uptime_hours(self):
+        """Test uptime formatting for hours."""
+        collector = LiveStatsCollector(1000, display=MockDisplay())
+        colors = collector._setup_colors()
+        collector._initialize_widgets(colors)
+        self.assertEqual(
+            collector.header_widget.format_uptime(3661), "1h01m01s"
+        )
+
+    def test_format_uptime_large_values(self):
+        """Test uptime formatting for large time values."""
+        collector = LiveStatsCollector(1000, display=MockDisplay())
+        colors = collector._setup_colors()
+        collector._initialize_widgets(colors)
+        self.assertEqual(
+            collector.header_widget.format_uptime(86400), "24h00m00s"
+        )
+
+    def test_format_uptime_zero(self):
+        """Test uptime formatting for zero."""
+        collector = LiveStatsCollector(1000, display=MockDisplay())
+        colors = collector._setup_colors()
+        collector._initialize_widgets(colors)
+        self.assertEqual(collector.header_widget.format_uptime(0), "0m00s")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_live_collector_interaction.py b/Lib/test/test_profiling/test_sampling_profiler/test_live_collector_interaction.py
new file mode 100644
index 00000000000..a5870366552
--- /dev/null
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_live_collector_interaction.py
@@ -0,0 +1,1238 @@
+"""Interactive controls tests for LiveStatsCollector.
+
+Tests for interactive controls, filtering, filter input, and thread navigation.
+"""
+
+import time
+import unittest
+from test.support import requires
+from test.support.import_helper import import_module
+
+# Only run these tests if curses is available
+requires("curses")
+curses = import_module("curses")
+
+from profiling.sampling.live_collector import LiveStatsCollector, MockDisplay
+from profiling.sampling.constants import (
+    THREAD_STATUS_HAS_GIL,
+    THREAD_STATUS_ON_CPU,
+)
+from ._live_collector_helpers import (
+    MockFrameInfo,
+    MockThreadInfo,
+    MockInterpreterInfo,
+)
+
+
+class TestLiveCollectorInteractiveControls(unittest.TestCase):
+    """Tests for interactive control features."""
+
+    def setUp(self):
+        """Set up collector with mock display."""
+        self.display = MockDisplay(height=40, width=160)
+        self.collector = LiveStatsCollector(
+            1000, pid=12345, display=self.display
+        )
+        self.collector.start_time = time.perf_counter()
+        # Set a consistent display update interval for tests
+        self.collector.display_update_interval = 0.1
+
+    def tearDown(self):
+        """Clean up after test."""
+        pass
+
+    def test_pause_functionality(self):
+        """Test pause/resume functionality."""
+        self.assertFalse(self.collector.paused)
+
+        # Simulate 'p' key press
+        self.display.simulate_input(ord("p"))
+        self.collector._handle_input()
+
+        self.assertTrue(self.collector.paused)
+
+        # Press 'p' again to resume
+        self.display.simulate_input(ord("p"))
+        self.collector._handle_input()
+
+        self.assertFalse(self.collector.paused)
+
+    def test_pause_stops_ui_updates(self):
+        """Test that pausing stops UI updates but profiling continues."""
+        # Add some data
+        self.collector.total_samples = 10
+        self.collector.result[("test.py", 1, "func")] = {
+            "direct_calls": 5,
+            "cumulative_calls": 10,
+            "total_rec_calls": 0,
+        }
+
+        # Pause
+        self.collector.paused = True
+
+        # Simulate a collect call (profiling continues)
+        thread_info = MockThreadInfo(123, [])
+        interpreter_info = MockInterpreterInfo(0, [thread_info])
+        stack_frames = [interpreter_info]
+
+        initial_samples = self.collector.total_samples
+        self.collector.collect(stack_frames)
+
+        # Samples should still increment
+        self.assertEqual(self.collector.total_samples, initial_samples + 1)
+
+        # But display should not have been updated (buffer stays clear)
+        self.display.cleared = False
+        self.collector.collect(stack_frames)
+        self.assertFalse(
+            self.display.cleared, "Display should not update when paused"
+        )
+
+    def test_reset_stats(self):
+        """Test reset statistics functionality."""
+        # Add some stats
+        self.collector.total_samples = 100
+        self.collector.successful_samples = 90
+        self.collector.failed_samples = 10
+        self.collector.result[("test.py", 1, "func")] = {
+            "direct_calls": 50,
+            "cumulative_calls": 75,
+            "total_rec_calls": 0,
+        }
+
+        # Reset
+        self.collector.reset_stats()
+
+        self.assertEqual(self.collector.total_samples, 0)
+        self.assertEqual(self.collector.successful_samples, 0)
+        self.assertEqual(self.collector.failed_samples, 0)
+        self.assertEqual(len(self.collector.result), 0)
+
+    def test_increase_refresh_rate(self):
+        """Test increasing refresh rate (faster updates)."""
+        initial_interval = self.collector.display_update_interval
+
+        # Simulate '+' key press (faster = smaller interval)
+        self.display.simulate_input(ord("+"))
+        self.collector._handle_input()
+
+        self.assertLess(self.collector.display_update_interval, initial_interval)
+
+    def test_decrease_refresh_rate(self):
+        """Test decreasing refresh rate (slower updates)."""
+        initial_interval = self.collector.display_update_interval
+
+        # Simulate '-' key press (slower = larger interval)
+        self.display.simulate_input(ord("-"))
+        self.collector._handle_input()
+
+        self.assertGreater(self.collector.display_update_interval, initial_interval)
+
+    def test_refresh_rate_minimum(self):
+        """Test that refresh rate has a minimum (max speed)."""
+        self.collector.display_update_interval = 0.05  # Set to minimum
+
+        # Try to go faster
+        self.display.simulate_input(ord("+"))
+        self.collector._handle_input()
+
+        # Should stay at minimum
+        self.assertEqual(self.collector.display_update_interval, 0.05)
+
+    def test_refresh_rate_maximum(self):
+        """Test that refresh rate has a maximum (min speed)."""
+        self.collector.display_update_interval = 1.0  # Set to maximum
+
+        # Try to go slower
+        self.display.simulate_input(ord("-"))
+        self.collector._handle_input()
+
+        # Should stay at maximum
+        self.assertEqual(self.collector.display_update_interval, 1.0)
+
+    def test_help_toggle(self):
+        """Test help screen toggle."""
+        self.assertFalse(self.collector.show_help)
+
+        # Show help
+        self.display.simulate_input(ord("h"))
+        self.collector._handle_input()
+
+        self.assertTrue(self.collector.show_help)
+
+        # Pressing any key closes help
+        self.display.simulate_input(ord("x"))
+        self.collector._handle_input()
+
+        self.assertFalse(self.collector.show_help)
+
+    def test_help_with_question_mark(self):
+        """Test help screen with '?' key."""
+        self.display.simulate_input(ord("?"))
+        self.collector._handle_input()
+
+        self.assertTrue(self.collector.show_help)
+
+    def test_help_dismiss_with_q_does_not_quit(self):
+        """Test that pressing 'q' while help is shown only closes help, not quit"""
+        self.assertFalse(self.collector.show_help)
+        self.display.simulate_input(ord("h"))
+        self.collector._handle_input()
+        self.assertTrue(self.collector.show_help)
+
+        self.display.simulate_input(ord("q"))
+        self.collector._handle_input()
+
+        self.assertFalse(self.collector.show_help)
+        self.assertTrue(self.collector.running)
+
+    def test_filter_clear(self):
+        """Test clearing filter."""
+        self.collector.filter_pattern = "test"
+
+        # Clear filter
+        self.display.simulate_input(ord("c"))
+        self.collector._handle_input()
+
+        self.assertIsNone(self.collector.filter_pattern)
+
+    def test_filter_clear_when_none(self):
+        """Test clearing filter when no filter is set."""
+        self.assertIsNone(self.collector.filter_pattern)
+
+        # Should not crash
+        self.display.simulate_input(ord("c"))
+        self.collector._handle_input()
+
+        self.assertIsNone(self.collector.filter_pattern)
+
+    def test_paused_status_in_footer(self):
+        """Test that paused status appears in footer."""
+        self.collector.total_samples = 10
+        self.collector.paused = True
+
+        self.collector._update_display()
+
+        # Check that PAUSED appears in display
+        self.assertTrue(self.display.contains_text("PAUSED"))
+
+    def test_filter_status_in_footer(self):
+        """Test that filter status appears in footer."""
+        self.collector.total_samples = 10
+        self.collector.filter_pattern = "mytest"
+
+        self.collector._update_display()
+
+        # Check that filter info appears
+        self.assertTrue(self.display.contains_text("Filter"))
+
+    def test_help_screen_display(self):
+        """Test that help screen is displayed."""
+        self.collector.show_help = True
+
+        self.collector._update_display()
+
+        # Check for help content
+        self.assertTrue(self.display.contains_text("Interactive Commands"))
+
+    def test_pause_uppercase(self):
+        """Test pause with uppercase 'P' key."""
+        self.assertFalse(self.collector.paused)
+
+        self.display.simulate_input(ord("P"))
+        self.collector._handle_input()
+
+        self.assertTrue(self.collector.paused)
+
+    def test_help_uppercase(self):
+        """Test help with uppercase 'H' key."""
+        self.assertFalse(self.collector.show_help)
+
+        self.display.simulate_input(ord("H"))
+        self.collector._handle_input()
+
+        self.assertTrue(self.collector.show_help)
+
+    def test_reset_lowercase(self):
+        """Test reset with lowercase 'r' key."""
+        # Add some stats
+        self.collector.total_samples = 100
+        self.collector.result[("test.py", 1, "func")] = {
+            "direct_calls": 50,
+            "cumulative_calls": 75,
+            "total_rec_calls": 0,
+        }
+
+        self.display.simulate_input(ord("r"))
+        self.collector._handle_input()
+
+        self.assertEqual(self.collector.total_samples, 0)
+        self.assertEqual(len(self.collector.result), 0)
+
+    def test_reset_uppercase(self):
+        """Test reset with uppercase 'R' key."""
+        self.collector.total_samples = 100
+
+        self.display.simulate_input(ord("R"))
+        self.collector._handle_input()
+
+        self.assertEqual(self.collector.total_samples, 0)
+
+    def test_filter_clear_uppercase(self):
+        """Test clearing filter with uppercase 'C' key."""
+        self.collector.filter_pattern = "test"
+
+        self.display.simulate_input(ord("C"))
+        self.collector._handle_input()
+
+        self.assertIsNone(self.collector.filter_pattern)
+
+    def test_increase_refresh_rate_with_equals(self):
+        """Test increasing refresh rate with '=' key."""
+        initial_interval = self.collector.display_update_interval
+
+        # Simulate '=' key press (alternative to '+')
+        self.display.simulate_input(ord("="))
+        self.collector._handle_input()
+
+        self.assertLess(self.collector.display_update_interval, initial_interval)
+
+    def test_decrease_refresh_rate_with_underscore(self):
+        """Test decreasing refresh rate with '_' key."""
+        initial_interval = self.collector.display_update_interval
+
+        # Simulate '_' key press (alternative to '-')
+        self.display.simulate_input(ord("_"))
+        self.collector._handle_input()
+
+        self.assertGreater(self.collector.display_update_interval, initial_interval)
+
+    def test_finished_state_displays_banner(self):
+        """Test that finished state shows prominent banner."""
+        # Add some sample data
+        thread_info = MockThreadInfo(
+            123,
+            [
+                MockFrameInfo("test.py", 10, "work"),
+                MockFrameInfo("test.py", 20, "main"),
+            ],
+        )
+        interpreter_info = MockInterpreterInfo(0, [thread_info])
+        stack_frames = [interpreter_info]
+        self.collector.collect(stack_frames)
+
+        # Mark as finished
+        self.collector.mark_finished()
+
+        # Check that finished flag is set
+        self.assertTrue(self.collector.finished)
+
+        # Check that the banner message is displayed
+        self.assertTrue(self.display.contains_text("PROFILING COMPLETE"))
+        self.assertTrue(self.display.contains_text("Press 'q' to Quit"))
+
+    def test_finished_state_allows_ui_controls(self):
+        """Test that finished state allows UI controls but prioritizes quit."""
+        self.collector.finished = True
+        self.collector.running = True
+
+        # Try pressing 's' (sort) - should work and trigger display update
+        original_sort = self.collector.sort_by
+        self.display.simulate_input(ord("s"))
+        self.collector._handle_input()
+        self.assertTrue(self.collector.running)  # Still running
+        self.assertNotEqual(self.collector.sort_by, original_sort)  # Sort changed
+
+        # Try pressing 'p' (pause) - should work
+        self.display.simulate_input(ord("p"))
+        self.collector._handle_input()
+        self.assertTrue(self.collector.running)  # Still running
+        self.assertTrue(self.collector.paused)  # Now paused
+
+        # Try pressing 'r' (reset) - should be ignored when finished
+        self.collector.total_samples = 100
+        self.display.simulate_input(ord("r"))
+        self.collector._handle_input()
+        self.assertTrue(self.collector.running)  # Still running
+        self.assertEqual(self.collector.total_samples, 100)  # NOT reset when finished
+
+        # Press 'q' - should stop
+        self.display.simulate_input(ord("q"))
+        self.collector._handle_input()
+        self.assertFalse(self.collector.running)  # Stopped
+
+    def test_finished_state_footer_message(self):
+        """Test that footer shows appropriate message when finished."""
+        # Add some sample data
+        thread_info = MockThreadInfo(
+            123,
+            [
+                MockFrameInfo("test.py", 10, "work"),
+                MockFrameInfo("test.py", 20, "main"),
+            ],
+        )
+        interpreter_info = MockInterpreterInfo(0, [thread_info])
+        stack_frames = [interpreter_info]
+        self.collector.collect(stack_frames)
+
+        # Mark as finished
+        self.collector.mark_finished()
+
+        # Check that footer contains finished message
+        self.assertTrue(self.display.contains_text("PROFILING FINISHED"))
+
+    def test_finished_state_freezes_time(self):
+        """Test that time displays are frozen when finished."""
+        import time as time_module
+
+        # Set up collector with known start time
+        self.collector.start_time = time_module.perf_counter() - 10.0  # 10 seconds ago
+
+        # Mark as finished - this should freeze the time
+        self.collector.mark_finished()
+
+        # Get the frozen elapsed time
+        frozen_elapsed = self.collector.elapsed_time
+        frozen_time_display = self.collector.current_time_display
+
+        # Wait a bit to ensure time would advance
+        time_module.sleep(0.1)
+
+        # Time should remain frozen
+        self.assertEqual(self.collector.elapsed_time, frozen_elapsed)
+        self.assertEqual(self.collector.current_time_display, frozen_time_display)
+
+        # Verify finish timestamp was set
+        self.assertIsNotNone(self.collector.finish_timestamp)
+
+        # Reset should clear the frozen state
+        self.collector.reset_stats()
+        self.assertFalse(self.collector.finished)
+        self.assertIsNone(self.collector.finish_timestamp)
+
+
+class TestLiveCollectorFiltering(unittest.TestCase):
+    """Tests for filtering functionality."""
+
+    def setUp(self):
+        """Set up collector with test data."""
+        self.display = MockDisplay(height=40, width=160)
+        self.collector = LiveStatsCollector(
+            1000, pid=12345, display=self.display
+        )
+        self.collector.start_time = time.perf_counter()
+        self.collector.total_samples = 100
+
+        # Add test data
+        self.collector.result[("app/models.py", 10, "save")] = {
+            "direct_calls": 50,
+            "cumulative_calls": 75,
+            "total_rec_calls": 0,
+        }
+        self.collector.result[("app/views.py", 20, "render")] = {
+            "direct_calls": 30,
+            "cumulative_calls": 40,
+            "total_rec_calls": 0,
+        }
+        self.collector.result[("lib/utils.py", 30, "helper")] = {
+            "direct_calls": 20,
+            "cumulative_calls": 25,
+            "total_rec_calls": 0,
+        }
+
+    def test_filter_by_filename(self):
+        """Test filtering by filename pattern."""
+        self.collector.filter_pattern = "models"
+
+        stats_list = self.collector.build_stats_list()
+
+        # Only models.py should be included
+        self.assertEqual(len(stats_list), 1)
+        self.assertIn("models.py", stats_list[0]["func"][0])
+
+    def test_filter_by_function_name(self):
+        """Test filtering by function name."""
+        self.collector.filter_pattern = "render"
+
+        stats_list = self.collector.build_stats_list()
+
+        self.assertEqual(len(stats_list), 1)
+        self.assertEqual(stats_list[0]["func"][2], "render")
+
+    def test_filter_case_insensitive(self):
+        """Test that filtering is case-insensitive."""
+        self.collector.filter_pattern = "MODELS"
+
+        stats_list = self.collector.build_stats_list()
+
+        # Should still match models.py
+        self.assertEqual(len(stats_list), 1)
+
+    def test_filter_substring_matching(self):
+        """Test substring filtering."""
+        self.collector.filter_pattern = "app/"
+
+        stats_list = self.collector.build_stats_list()
+
+        # Should match both app files
+        self.assertEqual(len(stats_list), 2)
+
+    def test_no_filter(self):
+        """Test with no filter applied."""
+        self.collector.filter_pattern = None
+
+        stats_list = self.collector.build_stats_list()
+
+        # All items should be included
+        self.assertEqual(len(stats_list), 3)
+
+    def test_filter_partial_function_name(self):
+        """Test filtering by partial function name."""
+        self.collector.filter_pattern = "save"
+
+        stats_list = self.collector.build_stats_list()
+
+        self.assertEqual(len(stats_list), 1)
+        self.assertEqual(stats_list[0]["func"][2], "save")
+
+    def test_filter_combined_filename_funcname(self):
+        """Test filtering matches filename:funcname pattern."""
+        self.collector.filter_pattern = "views.py:render"
+
+        stats_list = self.collector.build_stats_list()
+
+        # Should match the combined pattern
+        self.assertEqual(len(stats_list), 1)
+        self.assertEqual(stats_list[0]["func"][2], "render")
+
+    def test_filter_no_matches(self):
+        """Test filter that matches nothing."""
+        self.collector.filter_pattern = "nonexistent"
+
+        stats_list = self.collector.build_stats_list()
+
+        self.assertEqual(len(stats_list), 0)
+
+
+class TestLiveCollectorFilterInput(unittest.TestCase):
+    """Tests for filter input mode."""
+
+    def setUp(self):
+        """Set up collector with mock display."""
+        self.display = MockDisplay(height=40, width=160)
+        self.collector = LiveStatsCollector(
+            1000, pid=12345, display=self.display
+        )
+        self.collector.start_time = time.perf_counter()
+
+    def test_enter_filter_mode(self):
+        """Test entering filter input mode."""
+        self.assertFalse(self.collector.filter_input_mode)
+
+        # Press '/' to enter filter mode
+        self.display.simulate_input(ord("/"))
+        self.collector._handle_input()
+
+        self.assertTrue(self.collector.filter_input_mode)
+
+    def test_filter_input_typing(self):
+        """Test typing characters in filter input mode."""
+        self.collector.filter_input_mode = True
+        self.collector.filter_input_buffer = ""
+
+        # Type 't', 'e', 's', 't'
+        for ch in "test":
+            self.display.simulate_input(ord(ch))
+            self.collector._handle_input()
+
+        self.assertEqual(self.collector.filter_input_buffer, "test")
+
+    def test_filter_input_backspace(self):
+        """Test backspace in filter input mode."""
+        self.collector.filter_input_mode = True
+        self.collector.filter_input_buffer = "test"
+
+        # Press backspace (127)
+        self.display.simulate_input(127)
+        self.collector._handle_input()
+
+        self.assertEqual(self.collector.filter_input_buffer, "tes")
+
+    def test_filter_input_backspace_alt(self):
+        """Test alternative backspace key (263) in filter input mode."""
+        self.collector.filter_input_mode = True
+        self.collector.filter_input_buffer = "test"
+
+        # Press backspace (263)
+        self.display.simulate_input(263)
+        self.collector._handle_input()
+
+        self.assertEqual(self.collector.filter_input_buffer, "tes")
+
+    def test_filter_input_backspace_empty(self):
+        """Test backspace on empty buffer."""
+        self.collector.filter_input_mode = True
+        self.collector.filter_input_buffer = ""
+
+        # Press backspace - should not crash
+        self.display.simulate_input(127)
+        self.collector._handle_input()
+
+        self.assertEqual(self.collector.filter_input_buffer, "")
+
+    def test_filter_input_enter_applies_filter(self):
+        """Test pressing Enter applies the filter."""
+        self.collector.filter_input_mode = True
+        self.collector.filter_input_buffer = "myfilter"
+
+        # Press Enter (10)
+        self.display.simulate_input(10)
+        self.collector._handle_input()
+
+        self.assertFalse(self.collector.filter_input_mode)
+        self.assertEqual(self.collector.filter_pattern, "myfilter")
+        self.assertEqual(self.collector.filter_input_buffer, "")
+
+    def test_filter_input_enter_alt(self):
+        """Test alternative Enter key (13) applies filter."""
+        self.collector.filter_input_mode = True
+        self.collector.filter_input_buffer = "myfilter"
+
+        # Press Enter (13)
+        self.display.simulate_input(13)
+        self.collector._handle_input()
+
+        self.assertFalse(self.collector.filter_input_mode)
+        self.assertEqual(self.collector.filter_pattern, "myfilter")
+
+    def test_filter_input_enter_empty_clears_filter(self):
+        """Test pressing Enter with empty buffer clears filter."""
+        self.collector.filter_input_mode = True
+        self.collector.filter_input_buffer = ""
+        self.collector.filter_pattern = "oldfilter"
+
+        # Press Enter
+        self.display.simulate_input(10)
+        self.collector._handle_input()
+
+        self.assertFalse(self.collector.filter_input_mode)
+        self.assertIsNone(self.collector.filter_pattern)
+
+    def test_filter_input_escape_cancels(self):
+        """Test pressing ESC cancels filter input."""
+        self.collector.filter_input_mode = True
+        self.collector.filter_input_buffer = "newfilter"
+        self.collector.filter_pattern = "oldfilter"
+
+        # Press ESC (27)
+        self.display.simulate_input(27)
+        self.collector._handle_input()
+
+        self.assertFalse(self.collector.filter_input_mode)
+        self.assertEqual(
+            self.collector.filter_pattern, "oldfilter"
+        )  # Unchanged
+        self.assertEqual(self.collector.filter_input_buffer, "")
+
+    def test_filter_input_start_with_existing_filter(self):
+        """Test entering filter mode with existing filter pre-fills buffer."""
+        self.collector.filter_pattern = "existing"
+
+        # Enter filter mode
+        self.display.simulate_input(ord("/"))
+        self.collector._handle_input()
+
+        # Buffer should be pre-filled with existing pattern
+        self.assertEqual(self.collector.filter_input_buffer, "existing")
+
+    def test_filter_input_start_without_filter(self):
+        """Test entering filter mode with no existing filter."""
+        self.collector.filter_pattern = None
+
+        # Enter filter mode
+        self.display.simulate_input(ord("/"))
+        self.collector._handle_input()
+
+        # Buffer should be empty
+        self.assertEqual(self.collector.filter_input_buffer, "")
+
+    def test_filter_input_mode_blocks_other_commands(self):
+        """Test that filter input mode blocks other commands."""
+        self.collector.filter_input_mode = True
+        initial_sort = self.collector.sort_by
+
+        # Try to press 's' (sort) - should be captured as input
+        self.display.simulate_input(ord("s"))
+        self.collector._handle_input()
+
+        # Sort should not change, 's' should be in buffer
+        self.assertEqual(self.collector.sort_by, initial_sort)
+        self.assertEqual(self.collector.filter_input_buffer, "s")
+
+    def test_filter_input_non_printable_ignored(self):
+        """Test that non-printable characters are ignored."""
+        self.collector.filter_input_mode = True
+        self.collector.filter_input_buffer = "test"
+
+        # Try to input a control character (< 32)
+        self.display.simulate_input(1)  # Ctrl-A
+        self.collector._handle_input()
+
+        # Buffer should be unchanged
+        self.assertEqual(self.collector.filter_input_buffer, "test")
+
+    def test_filter_input_high_ascii_ignored(self):
+        """Test that high ASCII characters (>= 127, except backspace) are ignored."""
+        self.collector.filter_input_mode = True
+        self.collector.filter_input_buffer = "test"
+
+        # Try to input high ASCII (128)
+        self.display.simulate_input(128)
+        self.collector._handle_input()
+
+        # Buffer should be unchanged
+        self.assertEqual(self.collector.filter_input_buffer, "test")
+
+    def test_filter_prompt_displayed(self):
+        """Test that filter prompt is displayed when in input mode."""
+        self.collector.filter_input_mode = True
+        self.collector.filter_input_buffer = "myfilter"
+        self.collector.total_samples = 10
+
+        self.collector._update_display()
+
+        # Should show the filter prompt
+        self.assertTrue(self.display.contains_text("Function filter"))
+        self.assertTrue(self.display.contains_text("myfilter"))
+
+
+if __name__ == "__main__":
+    unittest.main()
+
+
+class TestLiveCollectorThreadNavigation(unittest.TestCase):
+    """Tests for thread navigation functionality."""
+
+    def setUp(self):
+        """Set up collector with mock display and multiple threads."""
+        self.mock_display = MockDisplay(height=40, width=160)
+        self.collector = LiveStatsCollector(
+            1000, pid=12345, display=self.mock_display
+        )
+        self.collector.start_time = time.perf_counter()
+
+        # Simulate data from multiple threads
+        frames1 = [MockFrameInfo("file1.py", 10, "func1")]
+        frames2 = [MockFrameInfo("file2.py", 20, "func2")]
+        frames3 = [MockFrameInfo("file3.py", 30, "func3")]
+
+        thread1 = MockThreadInfo(111, frames1)
+        thread2 = MockThreadInfo(222, frames2)
+        thread3 = MockThreadInfo(333, frames3)
+
+        interpreter_info = MockInterpreterInfo(0, [thread1, thread2, thread3])
+        stack_frames = [interpreter_info]
+
+        # Collect data to populate thread IDs
+        self.collector.collect(stack_frames)
+
+    def test_initial_view_mode_is_all(self):
+        """Test that collector starts in ALL mode."""
+        self.assertEqual(self.collector.view_mode, "ALL")
+        self.assertEqual(self.collector.current_thread_index, 0)
+
+    def test_thread_ids_are_tracked(self):
+        """Test that thread IDs are tracked during collection."""
+        self.assertIn(111, self.collector.thread_ids)
+        self.assertIn(222, self.collector.thread_ids)
+        self.assertIn(333, self.collector.thread_ids)
+        self.assertEqual(len(self.collector.thread_ids), 3)
+
+    def test_toggle_to_per_thread_mode(self):
+        """Test toggling from ALL to PER_THREAD mode with 't' key."""
+        self.assertEqual(self.collector.view_mode, "ALL")
+
+        self.mock_display.simulate_input(ord("t"))
+        self.collector._handle_input()
+
+        self.assertEqual(self.collector.view_mode, "PER_THREAD")
+        self.assertEqual(self.collector.current_thread_index, 0)
+
+    def test_toggle_back_to_all_mode(self):
+        """Test toggling back from PER_THREAD to ALL mode."""
+        # Switch to PER_THREAD
+        self.mock_display.simulate_input(ord("t"))
+        self.collector._handle_input()
+        self.assertEqual(self.collector.view_mode, "PER_THREAD")
+
+        # Switch back to ALL
+        self.mock_display.simulate_input(ord("T"))
+        self.collector._handle_input()
+        self.assertEqual(self.collector.view_mode, "ALL")
+
+    def test_arrow_right_navigates_threads_in_per_thread_mode(self):
+        """Test that arrow keys navigate threads in PER_THREAD mode."""
+        # Switch to PER_THREAD mode
+        self.mock_display.simulate_input(ord("t"))
+        self.collector._handle_input()
+
+        # Navigate forward
+        self.assertEqual(self.collector.current_thread_index, 0)
+
+        self.mock_display.simulate_input(curses.KEY_RIGHT)
+        self.collector._handle_input()
+        self.assertEqual(self.collector.current_thread_index, 1)
+
+        self.mock_display.simulate_input(curses.KEY_RIGHT)
+        self.collector._handle_input()
+        self.assertEqual(self.collector.current_thread_index, 2)
+
+    def test_arrow_left_navigates_threads_backward(self):
+        """Test that left arrow navigates threads backward."""
+        # Switch to PER_THREAD mode
+        self.mock_display.simulate_input(ord("t"))
+        self.collector._handle_input()
+
+        # Navigate backward (should wrap around)
+        self.mock_display.simulate_input(curses.KEY_LEFT)
+        self.collector._handle_input()
+        self.assertEqual(
+            self.collector.current_thread_index, 2
+        )  # Wrapped to last
+
+        self.mock_display.simulate_input(curses.KEY_LEFT)
+        self.collector._handle_input()
+        self.assertEqual(self.collector.current_thread_index, 1)
+
+    def test_arrow_down_navigates_like_right(self):
+        """Test that down arrow works like right arrow."""
+        # Switch to PER_THREAD mode
+        self.mock_display.simulate_input(ord("t"))
+        self.collector._handle_input()
+
+        self.mock_display.simulate_input(curses.KEY_DOWN)
+        self.collector._handle_input()
+        self.assertEqual(self.collector.current_thread_index, 1)
+
+    def test_arrow_up_navigates_like_left(self):
+        """Test that up arrow works like left arrow."""
+        # Switch to PER_THREAD mode
+        self.mock_display.simulate_input(ord("t"))
+        self.collector._handle_input()
+
+        self.mock_display.simulate_input(curses.KEY_UP)
+        self.collector._handle_input()
+        self.assertEqual(self.collector.current_thread_index, 2)  # Wrapped
+
+    def test_arrow_keys_switch_to_per_thread_mode(self):
+        """Test that arrow keys switch from ALL mode to PER_THREAD mode."""
+        self.assertEqual(self.collector.view_mode, "ALL")
+
+        self.mock_display.simulate_input(curses.KEY_RIGHT)
+        self.collector._handle_input()
+        self.assertEqual(self.collector.view_mode, "PER_THREAD")
+        self.assertEqual(self.collector.current_thread_index, 0)
+
+    def test_stats_list_in_all_mode(self):
+        """Test that stats list uses aggregated data in ALL mode."""
+        stats_list = self.collector.build_stats_list()
+
+        # Should have all 3 functions
+        self.assertEqual(len(stats_list), 3)
+        func_names = {stat["func"][2] for stat in stats_list}
+        self.assertEqual(func_names, {"func1", "func2", "func3"})
+
+    def test_stats_list_in_per_thread_mode(self):
+        """Test that stats list filters by thread in PER_THREAD mode."""
+        # Switch to PER_THREAD mode
+        self.collector.view_mode = "PER_THREAD"
+        self.collector.current_thread_index = 0  # First thread (111)
+
+        stats_list = self.collector.build_stats_list()
+
+        # Should only have func1 from thread 111
+        self.assertEqual(len(stats_list), 1)
+        self.assertEqual(stats_list[0]["func"][2], "func1")
+
+    def test_stats_list_switches_with_thread_navigation(self):
+        """Test that stats list updates when navigating threads."""
+        self.collector.view_mode = "PER_THREAD"
+
+        # Thread 0 (111) -> func1
+        self.collector.current_thread_index = 0
+        stats_list = self.collector.build_stats_list()
+        self.assertEqual(len(stats_list), 1)
+        self.assertEqual(stats_list[0]["func"][2], "func1")
+
+        # Thread 1 (222) -> func2
+        self.collector.current_thread_index = 1
+        stats_list = self.collector.build_stats_list()
+        self.assertEqual(len(stats_list), 1)
+        self.assertEqual(stats_list[0]["func"][2], "func2")
+
+        # Thread 2 (333) -> func3
+        self.collector.current_thread_index = 2
+        stats_list = self.collector.build_stats_list()
+        self.assertEqual(len(stats_list), 1)
+        self.assertEqual(stats_list[0]["func"][2], "func3")
+
+    def test_reset_stats_clears_thread_data(self):
+        """Test that reset_stats clears thread tracking data."""
+        self.assertGreater(len(self.collector.thread_ids), 0)
+        self.assertGreater(len(self.collector.per_thread_data), 0)
+
+        self.collector.reset_stats()
+
+        self.assertEqual(len(self.collector.thread_ids), 0)
+        self.assertEqual(len(self.collector.per_thread_data), 0)
+        self.assertEqual(self.collector.view_mode, "ALL")
+        self.assertEqual(self.collector.current_thread_index, 0)
+
+    def test_toggle_with_no_threads_stays_in_all_mode(self):
+        """Test that toggle does nothing when no threads exist."""
+        collector = LiveStatsCollector(1000, display=MockDisplay())
+        self.assertEqual(len(collector.thread_ids), 0)
+
+        collector.display.simulate_input(ord("t"))
+        collector._handle_input()
+
+        # Should remain in ALL mode since no threads
+        self.assertEqual(collector.view_mode, "ALL")
+
+    def test_per_thread_data_isolation(self):
+        """Test that per-thread data is properly isolated."""
+        # Check that each thread has its own isolated data
+        self.assertIn(111, self.collector.per_thread_data)
+        self.assertIn(222, self.collector.per_thread_data)
+        self.assertIn(333, self.collector.per_thread_data)
+
+        # Thread 111 should only have func1
+        thread1_funcs = list(self.collector.per_thread_data[111].result.keys())
+        self.assertEqual(len(thread1_funcs), 1)
+        self.assertEqual(thread1_funcs[0][2], "func1")
+
+        # Thread 222 should only have func2
+        thread2_funcs = list(self.collector.per_thread_data[222].result.keys())
+        self.assertEqual(len(thread2_funcs), 1)
+        self.assertEqual(thread2_funcs[0][2], "func2")
+
+    def test_aggregated_data_sums_all_threads(self):
+        """Test that ALL mode shows aggregated data from all threads."""
+        # All three functions should be in the aggregated result
+        self.assertEqual(len(self.collector.result), 3)
+
+        # Each function should have 1 direct call
+        for func_location, counts in self.collector.result.items():
+            self.assertEqual(counts["direct_calls"], 1)
+
+    def test_per_thread_status_tracking(self):
+        """Test that per-thread status statistics are tracked."""
+        # Each thread should have status counts
+        self.assertIn(111, self.collector.per_thread_data)
+        self.assertIn(222, self.collector.per_thread_data)
+        self.assertIn(333, self.collector.per_thread_data)
+
+        # Each thread should have the expected attributes
+        for thread_id in [111, 222, 333]:
+            thread_data = self.collector.per_thread_data[thread_id]
+            self.assertIsNotNone(thread_data.has_gil)
+            self.assertIsNotNone(thread_data.on_cpu)
+            self.assertIsNotNone(thread_data.gil_requested)
+            self.assertIsNotNone(thread_data.unknown)
+            self.assertIsNotNone(thread_data.total)
+            # Each thread was sampled once
+            self.assertEqual(thread_data.total, 1)
+
+    def test_reset_stats_clears_thread_status(self):
+        """Test that reset_stats clears per-thread status data."""
+        self.assertGreater(len(self.collector.per_thread_data), 0)
+
+        self.collector.reset_stats()
+
+        self.assertEqual(len(self.collector.per_thread_data), 0)
+
+    def test_per_thread_sample_counts(self):
+        """Test that per-thread sample counts are tracked correctly."""
+        # Each thread should have exactly 1 sample (we collected once)
+        for thread_id in [111, 222, 333]:
+            self.assertIn(thread_id, self.collector.per_thread_data)
+            self.assertEqual(self.collector.per_thread_data[thread_id].sample_count, 1)
+
+    def test_per_thread_gc_samples(self):
+        """Test that per-thread GC samples are tracked correctly."""
+        # Initially no threads have GC frames
+        for thread_id in [111, 222, 333]:
+            self.assertIn(thread_id, self.collector.per_thread_data)
+            self.assertEqual(
+                self.collector.per_thread_data[thread_id].gc_frame_samples, 0
+            )
+
+        # Now collect a sample with a GC frame in thread 222
+        gc_frames = [MockFrameInfo("gc.py", 100, "gc_collect")]
+        thread_with_gc = MockThreadInfo(222, gc_frames)
+        interpreter_info = MockInterpreterInfo(0, [thread_with_gc])
+        stack_frames = [interpreter_info]
+
+        self.collector.collect(stack_frames)
+
+        # Thread 222 should now have 1 GC sample
+        self.assertEqual(self.collector.per_thread_data[222].gc_frame_samples, 1)
+        # Other threads should still have 0
+        self.assertEqual(self.collector.per_thread_data[111].gc_frame_samples, 0)
+        self.assertEqual(self.collector.per_thread_data[333].gc_frame_samples, 0)
+
+    def test_only_threads_with_frames_are_tracked(self):
+        """Test that only threads with actual frame data are added to thread_ids."""
+        # Create a new collector
+        collector = LiveStatsCollector(1000, display=MockDisplay())
+
+        # Create threads: one with frames, one without
+        frames = [MockFrameInfo("test.py", 10, "test_func")]
+        thread_with_frames = MockThreadInfo(111, frames)
+        thread_without_frames = MockThreadInfo(222, None)  # No frames
+        interpreter_info = MockInterpreterInfo(
+            0, [thread_with_frames, thread_without_frames]
+        )
+        stack_frames = [interpreter_info]
+
+        collector.collect(stack_frames)
+
+        # Only thread 111 should be tracked (it has frames)
+        self.assertIn(111, collector.thread_ids)
+        self.assertNotIn(222, collector.thread_ids)
+
+    def test_per_thread_status_isolation(self):
+        """Test that per-thread status counts are isolated per thread."""
+        # Create threads with different status flags
+
+        frames1 = [MockFrameInfo("file1.py", 10, "func1")]
+        frames2 = [MockFrameInfo("file2.py", 20, "func2")]
+
+        # Thread 444: has GIL but not on CPU
+        thread1 = MockThreadInfo(444, frames1, status=THREAD_STATUS_HAS_GIL)
+        # Thread 555: on CPU but not has GIL
+        thread2 = MockThreadInfo(555, frames2, status=THREAD_STATUS_ON_CPU)
+
+        interpreter_info = MockInterpreterInfo(0, [thread1, thread2])
+        stack_frames = [interpreter_info]
+
+        collector = LiveStatsCollector(1000, display=MockDisplay())
+        collector.collect(stack_frames)
+
+        # Check thread 444 status
+        self.assertEqual(collector.per_thread_data[444].has_gil, 1)
+        self.assertEqual(collector.per_thread_data[444].on_cpu, 0)
+
+        # Check thread 555 status
+        self.assertEqual(collector.per_thread_data[555].has_gil, 0)
+        self.assertEqual(collector.per_thread_data[555].on_cpu, 1)
+
+    def test_display_uses_per_thread_stats_in_per_thread_mode(self):
+        """Test that display widget uses per-thread stats when in PER_THREAD mode."""
+
+        # Create collector with mock display
+        collector = LiveStatsCollector(1000, display=MockDisplay())
+        collector.start_time = time.perf_counter()
+
+        # Create 2 threads with different characteristics
+        # Thread 111: always has GIL (10 samples)
+        # Thread 222: never has GIL (10 samples)
+        for _ in range(10):
+            frames1 = [MockFrameInfo("file1.py", 10, "func1")]
+            frames2 = [MockFrameInfo("file2.py", 20, "func2")]
+            thread1 = MockThreadInfo(
+                111, frames1, status=THREAD_STATUS_HAS_GIL
+            )
+            thread2 = MockThreadInfo(222, frames2, status=0)  # No flags
+            interpreter_info = MockInterpreterInfo(0, [thread1, thread2])
+            collector.collect([interpreter_info])
+
+        # In ALL mode, should show mixed stats (50% on GIL, 50% off GIL)
+        self.assertEqual(collector.view_mode, "ALL")
+        total_has_gil = collector.thread_status_counts["has_gil"]
+        total_threads = collector.thread_status_counts["total"]
+        self.assertEqual(total_has_gil, 10)  # Only thread 111 has GIL
+        self.assertEqual(total_threads, 20)  # 10 samples * 2 threads
+
+        # Switch to PER_THREAD mode and select thread 111
+        collector.view_mode = "PER_THREAD"
+        collector.current_thread_index = 0  # Thread 111
+
+        # Thread 111 should show 100% on GIL
+        thread_111_data = collector.per_thread_data[111]
+        self.assertEqual(thread_111_data.has_gil, 10)
+        self.assertEqual(thread_111_data.total, 10)
+
+        # Switch to thread 222
+        collector.current_thread_index = 1  # Thread 222
+
+        # Thread 222 should show 0% on GIL
+        thread_222_data = collector.per_thread_data[222]
+        self.assertEqual(thread_222_data.has_gil, 0)
+        self.assertEqual(thread_222_data.total, 10)
+
+    def test_display_uses_per_thread_gc_stats_in_per_thread_mode(self):
+        """Test that GC percentage uses per-thread data in PER_THREAD mode."""
+        # Create collector with mock display
+        collector = LiveStatsCollector(1000, display=MockDisplay())
+        collector.start_time = time.perf_counter()
+
+        # Thread 111: 5 samples, 2 with GC
+        # Thread 222: 5 samples, 0 with GC
+        for i in range(5):
+            if i < 2:
+                # First 2 samples for thread 111 have GC
+                frames1 = [MockFrameInfo("gc.py", 100, "gc_collect")]
+            else:
+                frames1 = [MockFrameInfo("file1.py", 10, "func1")]
+
+            frames2 = [MockFrameInfo("file2.py", 20, "func2")]  # No GC
+
+            thread1 = MockThreadInfo(111, frames1)
+            thread2 = MockThreadInfo(222, frames2)
+            interpreter_info = MockInterpreterInfo(0, [thread1, thread2])
+            collector.collect([interpreter_info])
+
+        # Check aggregated GC stats (ALL mode)
+        # 2 GC samples out of 10 total = 20%
+        self.assertEqual(collector.gc_frame_samples, 2)
+        self.assertEqual(collector.total_samples, 5)  # 5 collect() calls
+
+        # Check per-thread GC stats
+        # Thread 111: 2 GC samples out of 5 = 40%
+        self.assertEqual(collector.per_thread_data[111].gc_frame_samples, 2)
+        self.assertEqual(collector.per_thread_data[111].sample_count, 5)
+
+        # Thread 222: 0 GC samples out of 5 = 0%
+        self.assertEqual(collector.per_thread_data[222].gc_frame_samples, 0)
+        self.assertEqual(collector.per_thread_data[222].sample_count, 5)
+
+        # Now verify the display would use the correct stats
+        collector.view_mode = "PER_THREAD"
+
+        # For thread 111
+        collector.current_thread_index = 0
+        thread_id = collector.thread_ids[0]
+        self.assertEqual(thread_id, 111)
+        thread_gc_pct = (
+            collector.per_thread_data[111].gc_frame_samples
+            / collector.per_thread_data[111].sample_count
+        ) * 100
+        self.assertEqual(thread_gc_pct, 40.0)
+
+        # For thread 222
+        collector.current_thread_index = 1
+        thread_id = collector.thread_ids[1]
+        self.assertEqual(thread_id, 222)
+        thread_gc_pct = (
+            collector.per_thread_data[222].gc_frame_samples
+            / collector.per_thread_data[222].sample_count
+        ) * 100
+        self.assertEqual(thread_gc_pct, 0.0)
+
+    def test_function_counts_are_per_thread_in_per_thread_mode(self):
+        """Test that function counts (total/exec/stack) are per-thread in PER_THREAD mode."""
+        # Create collector with mock display
+        collector = LiveStatsCollector(1000, display=MockDisplay())
+        collector.start_time = time.perf_counter()
+
+        # Thread 111: calls func1, func2, func3 (3 functions)
+        # Thread 222: calls func4, func5 (2 functions)
+        frames1 = [
+            MockFrameInfo("file1.py", 10, "func1"),
+            MockFrameInfo("file1.py", 20, "func2"),
+            MockFrameInfo("file1.py", 30, "func3"),
+        ]
+        frames2 = [
+            MockFrameInfo("file2.py", 40, "func4"),
+            MockFrameInfo("file2.py", 50, "func5"),
+        ]
+
+        thread1 = MockThreadInfo(111, frames1)
+        thread2 = MockThreadInfo(222, frames2)
+        interpreter_info = MockInterpreterInfo(0, [thread1, thread2])
+        collector.collect([interpreter_info])
+
+        # In ALL mode, should have 5 total functions
+        self.assertEqual(len(collector.result), 5)
+
+        # In PER_THREAD mode for thread 111, should have 3 functions
+        collector.view_mode = "PER_THREAD"
+        collector.current_thread_index = 0  # Thread 111
+        thread_111_result = collector.per_thread_data[111].result
+        self.assertEqual(len(thread_111_result), 3)
+
+        # Verify the functions are the right ones
+        thread_111_funcs = {loc[2] for loc in thread_111_result.keys()}
+        self.assertEqual(thread_111_funcs, {"func1", "func2", "func3"})
+
+        # In PER_THREAD mode for thread 222, should have 2 functions
+        collector.current_thread_index = 1  # Thread 222
+        thread_222_result = collector.per_thread_data[222].result
+        self.assertEqual(len(thread_222_result), 2)
+
+        # Verify the functions are the right ones
+        thread_222_funcs = {loc[2] for loc in thread_222_result.keys()}
+        self.assertEqual(thread_222_funcs, {"func4", "func5"})
+
+
+class TestLiveCollectorNewFeatures(unittest.TestCase):
+    """Tests for new features added to live collector."""
+
+    def setUp(self):
+        """Set up test fixtures."""
+        self.display = MockDisplay()
+        self.collector = LiveStatsCollector(1000, display=self.display)
+        self.collector.start_time = time.perf_counter()
+
+    def test_filter_input_takes_precedence_over_commands(self):
+        """Test that filter input mode blocks command keys like 'h' and 'p'."""
+        # Enter filter input mode
+        self.collector.filter_input_mode = True
+        self.collector.filter_input_buffer = ""
+
+        # Press 'h' - should add to filter buffer, not show help
+        self.display.simulate_input(ord("h"))
+        self.collector._handle_input()
+
+        self.assertFalse(self.collector.show_help)  # Help not triggered
+        self.assertEqual(self.collector.filter_input_buffer, "h")  # Added to filter
+        self.assertTrue(self.collector.filter_input_mode)  # Still in filter mode
+
+    def test_reset_blocked_when_finished(self):
+        """Test that reset command is blocked when profiling is finished."""
+        # Set up some sample data and mark as finished
+        self.collector.total_samples = 100
+        self.collector.finished = True
+
+        # Press 'r' for reset
+        self.display.simulate_input(ord("r"))
+        self.collector._handle_input()
+
+        # Should NOT have been reset
+        self.assertEqual(self.collector.total_samples, 100)
+        self.assertTrue(self.collector.finished)
+
+    def test_time_display_fix_when_finished(self):
+        """Test that time display shows correct frozen time when finished."""
+        import time as time_module
+
+        # Mark as finished to freeze time
+        self.collector.mark_finished()
+
+        # Should have set both timestamps correctly
+        self.assertIsNotNone(self.collector.finish_timestamp)
+        self.assertIsNotNone(self.collector.finish_wall_time)
+
+        # Get the frozen time display
+        frozen_time = self.collector.current_time_display
+
+        # Wait a bit
+        time_module.sleep(0.1)
+
+        # Should still show the same frozen time (not jump to wrong time)
+        self.assertEqual(self.collector.current_time_display, frozen_time)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_live_collector_ui.py b/Lib/test/test_profiling/test_sampling_profiler/test_live_collector_ui.py
new file mode 100644
index 00000000000..b5a387fa3a3
--- /dev/null
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_live_collector_ui.py
@@ -0,0 +1,819 @@
+"""UI and display tests for LiveStatsCollector.
+
+Tests for MockDisplay, curses integration, display methods,
+edge cases, update display, and display helpers.
+"""
+
+import sys
+import time
+import unittest
+from unittest import mock
+from test.support import requires
+from test.support.import_helper import import_module
+
+# Only run these tests if curses is available
+requires("curses")
+curses = import_module("curses")
+
+from profiling.sampling.live_collector import LiveStatsCollector, MockDisplay
+from ._live_collector_helpers import (
+    MockThreadInfo,
+    MockInterpreterInfo,
+)
+
+
+class TestLiveStatsCollectorWithMockDisplay(unittest.TestCase):
+    """Tests for display functionality using MockDisplay."""
+
+    def setUp(self):
+        """Set up collector with mock display."""
+        self.mock_display = MockDisplay(height=40, width=160)
+        self.collector = LiveStatsCollector(
+            1000, pid=12345, display=self.mock_display
+        )
+        self.collector.start_time = time.perf_counter()
+
+    def test_update_display_with_mock(self):
+        """Test that update_display works with MockDisplay."""
+        self.collector.total_samples = 100
+        self.collector.result[("test.py", 10, "test_func")] = {
+            "direct_calls": 50,
+            "cumulative_calls": 75,
+            "total_rec_calls": 0,
+        }
+
+        self.collector._update_display()
+
+        # Verify display operations were called
+        self.assertTrue(self.mock_display.cleared)
+        self.assertTrue(self.mock_display.refreshed)
+        self.assertTrue(self.mock_display.redrawn)
+
+        # Verify some content was written
+        self.assertGreater(len(self.mock_display.buffer), 0)
+
+    def test_handle_input_quit(self):
+        """Test that 'q' input stops the collector."""
+        self.mock_display.simulate_input(ord("q"))
+        self.collector._handle_input()
+        self.assertFalse(self.collector.running)
+
+    def test_handle_input_sort_cycle(self):
+        """Test that 's' input cycles sort mode."""
+        self.collector.sort_by = "tottime"
+        self.mock_display.simulate_input(ord("s"))
+        self.collector._handle_input()
+        self.assertEqual(self.collector.sort_by, "cumul_pct")
+
+    def test_draw_methods_with_mock_display(self):
+        """Test that draw methods write to mock display."""
+        self.collector.total_samples = 500
+        self.collector.successful_samples = 450
+        self.collector.failed_samples = 50
+
+        colors = self.collector._setup_colors()
+        self.collector._initialize_widgets(colors)
+
+        # Test individual widget methods
+        line = self.collector.header_widget.draw_header_info(0, 160, 100.5)
+        self.assertEqual(line, 2)  # Title + header info line
+        self.assertGreater(len(self.mock_display.buffer), 0)
+
+        # Clear buffer and test next method
+        self.mock_display.buffer.clear()
+        line = self.collector.header_widget.draw_sample_stats(0, 160, 10.0)
+        self.assertEqual(line, 1)
+        self.assertGreater(len(self.mock_display.buffer), 0)
+
+    def test_terminal_too_small_message(self):
+        """Test terminal too small warning."""
+        small_display = MockDisplay(height=10, width=50)
+        self.collector.display = small_display
+
+        self.collector._show_terminal_too_small(10, 50)
+
+        # Should have written warning message
+        text = small_display.get_text_at(3, 15)  # Approximate center
+        self.assertIsNotNone(text)
+
+    def test_full_display_rendering_with_data(self):
+        """Test complete display rendering with realistic data."""
+        # Add multiple functions with different call counts
+        self.collector.total_samples = 1000
+        self.collector.successful_samples = 950
+        self.collector.failed_samples = 50
+
+        self.collector.result[("app.py", 10, "main")] = {
+            "direct_calls": 100,
+            "cumulative_calls": 500,
+            "total_rec_calls": 0,
+        }
+        self.collector.result[("utils.py", 20, "helper")] = {
+            "direct_calls": 300,
+            "cumulative_calls": 400,
+            "total_rec_calls": 0,
+        }
+        self.collector.result[("db.py", 30, "query")] = {
+            "direct_calls": 50,
+            "cumulative_calls": 100,
+            "total_rec_calls": 0,
+        }
+
+        self.collector._update_display()
+
+        # Verify the display has content
+        self.assertGreater(len(self.mock_display.buffer), 10)
+
+        # Verify PID is shown
+        found_pid = False
+        for (line, col), (text, attr) in self.mock_display.buffer.items():
+            if "12345" in text:
+                found_pid = True
+                break
+        self.assertTrue(found_pid, "PID should be displayed")
+
+    def test_efficiency_bar_visualization(self):
+        """Test that efficiency bar shows correct proportions."""
+        self.collector.total_samples = 100
+        self.collector.successful_samples = 75
+        self.collector.failed_samples = 25
+
+        colors = self.collector._setup_colors()
+        self.collector._initialize_widgets(colors)
+        self.collector.header_widget.draw_efficiency_bar(0, 160)
+
+        # Check that something was drawn to the display
+        self.assertGreater(len(self.mock_display.buffer), 0)
+
+    def test_stats_display_with_different_sort_modes(self):
+        """Test that stats are displayed correctly with different sort modes."""
+        self.collector.total_samples = 100
+        self.collector.result[("a.py", 1, "func_a")] = {
+            "direct_calls": 10,
+            "cumulative_calls": 20,
+            "total_rec_calls": 0,
+        }
+        self.collector.result[("b.py", 2, "func_b")] = {
+            "direct_calls": 30,
+            "cumulative_calls": 40,
+            "total_rec_calls": 0,
+        }
+
+        # Test each sort mode
+        for sort_mode in [
+            "nsamples",
+            "tottime",
+            "cumtime",
+            "sample_pct",
+            "cumul_pct",
+        ]:
+            self.mock_display.buffer.clear()
+            self.collector.sort_by = sort_mode
+
+            stats_list = self.collector.build_stats_list()
+            self.assertEqual(len(stats_list), 2)
+
+            # Verify sorting worked (func_b should be first for most modes)
+            if sort_mode in ["nsamples", "tottime", "sample_pct"]:
+                self.assertEqual(stats_list[0]["func"][2], "func_b")
+
+    def test_narrow_terminal_column_hiding(self):
+        """Test that columns are hidden on narrow terminals."""
+        narrow_display = MockDisplay(height=40, width=70)
+        collector = LiveStatsCollector(1000, pid=12345, display=narrow_display)
+        collector.start_time = time.perf_counter()
+
+        colors = collector._setup_colors()
+        collector._initialize_widgets(colors)
+        line, show_sample_pct, show_tottime, show_cumul_pct, show_cumtime = (
+            collector.table_widget.draw_column_headers(0, 70)
+        )
+
+        # On narrow terminal, some columns should be hidden
+        self.assertFalse(
+            show_cumul_pct or show_cumtime,
+            "Some columns should be hidden on narrow terminal",
+        )
+
+    def test_very_narrow_terminal_minimal_columns(self):
+        """Test minimal display on very narrow terminal."""
+        very_narrow = MockDisplay(height=40, width=60)
+        collector = LiveStatsCollector(1000, pid=12345, display=very_narrow)
+        collector.start_time = time.perf_counter()
+
+        colors = collector._setup_colors()
+        collector._initialize_widgets(colors)
+        line, show_sample_pct, show_tottime, show_cumul_pct, show_cumtime = (
+            collector.table_widget.draw_column_headers(0, 60)
+        )
+
+        # Very narrow should hide even more columns
+        self.assertFalse(
+            show_sample_pct,
+            "Sample % should be hidden on very narrow terminal",
+        )
+
+    def test_display_updates_only_at_interval(self):
+        """Test that display updates respect the update interval."""
+        # Create collector with display
+        collector = LiveStatsCollector(1000, display=self.mock_display)
+
+        # Simulate multiple rapid collections
+        thread_info = MockThreadInfo(123, [])
+        interpreter_info = MockInterpreterInfo(0, [thread_info])
+        stack_frames = [interpreter_info]
+
+        # First collect should update display
+        collector.collect(stack_frames)
+        first_cleared = self.mock_display.cleared
+
+        # Reset flags
+        self.mock_display.cleared = False
+        self.mock_display.refreshed = False
+
+        # Immediate second collect should NOT update display (too soon)
+        collector.collect(stack_frames)
+        self.assertFalse(
+            self.mock_display.cleared,
+            "Display should not update too frequently",
+        )
+
+    def test_top_functions_display(self):
+        """Test that top functions are highlighted correctly."""
+        self.collector.total_samples = 1000
+
+        # Create functions with different sample counts
+        for i in range(10):
+            self.collector.result[(f"file{i}.py", i * 10, f"func{i}")] = {
+                "direct_calls": (10 - i) * 10,  # Decreasing counts
+                "cumulative_calls": (10 - i) * 20,
+                "total_rec_calls": 0,
+            }
+
+        colors = self.collector._setup_colors()
+        self.collector._initialize_widgets(colors)
+        stats_list = self.collector.build_stats_list()
+
+        self.collector.header_widget.draw_top_functions(0, 160, stats_list)
+
+        # Top functions section should have written something
+        self.assertGreater(len(self.mock_display.buffer), 0)
+
+
+class TestLiveStatsCollectorCursesIntegration(unittest.TestCase):
+    """Tests for curses-related functionality using mocks."""
+
+    def setUp(self):
+        """Set up mock curses screen."""
+        self.mock_stdscr = mock.MagicMock()
+        self.mock_stdscr.getmaxyx.return_value = (40, 160)  # height, width
+        self.mock_stdscr.getch.return_value = -1  # No input
+        # Save original stdout/stderr
+        self._orig_stdout = sys.stdout
+        self._orig_stderr = sys.stderr
+
+    def tearDown(self):
+        """Restore stdout/stderr if changed."""
+        sys.stdout = self._orig_stdout
+        sys.stderr = self._orig_stderr
+
+    def test_init_curses(self):
+        """Test curses initialization."""
+        collector = LiveStatsCollector(1000)
+
+        with (
+            mock.patch("curses.curs_set"),
+            mock.patch("curses.has_colors", return_value=True),
+            mock.patch("curses.start_color"),
+            mock.patch("curses.use_default_colors"),
+            mock.patch("builtins.open", mock.mock_open()) as mock_open_func,
+        ):
+            collector.init_curses(self.mock_stdscr)
+
+            self.assertIsNotNone(collector.stdscr)
+            self.mock_stdscr.nodelay.assert_called_with(True)
+            self.mock_stdscr.scrollok.assert_called_with(False)
+
+            # Clean up properly
+            if collector._devnull:
+                collector._devnull.close()
+            collector._saved_stdout = None
+            collector._saved_stderr = None
+
+    def test_cleanup_curses(self):
+        """Test curses cleanup."""
+        mock_display = MockDisplay()
+        collector = LiveStatsCollector(1000, display=mock_display)
+        collector.stdscr = self.mock_stdscr
+
+        # Mock devnull file to avoid resource warnings
+        mock_devnull = mock.MagicMock()
+        mock_saved_stdout = mock.MagicMock()
+        mock_saved_stderr = mock.MagicMock()
+
+        collector._devnull = mock_devnull
+        collector._saved_stdout = mock_saved_stdout
+        collector._saved_stderr = mock_saved_stderr
+
+        with mock.patch("curses.curs_set"):
+            collector.cleanup_curses()
+
+        mock_devnull.close.assert_called_once()
+        # Verify stdout/stderr were set back to the saved values
+        self.assertEqual(sys.stdout, mock_saved_stdout)
+        self.assertEqual(sys.stderr, mock_saved_stderr)
+        # Verify the saved values were cleared
+        self.assertIsNone(collector._saved_stdout)
+        self.assertIsNone(collector._saved_stderr)
+        self.assertIsNone(collector._devnull)
+
+    def test_add_str_with_mock_display(self):
+        """Test safe_addstr with MockDisplay."""
+        mock_display = MockDisplay(height=40, width=160)
+        collector = LiveStatsCollector(1000, display=mock_display)
+        colors = collector._setup_colors()
+        collector._initialize_widgets(colors)
+
+        collector.header_widget.add_str(5, 10, "Test", 0)
+        # Verify it was added to the buffer
+        self.assertIn((5, 10), mock_display.buffer)
+
+    def test_setup_colors_with_color_support(self):
+        """Test color setup when colors are supported."""
+        mock_display = MockDisplay(height=40, width=160)
+        mock_display.colors_supported = True
+        collector = LiveStatsCollector(1000, display=mock_display)
+
+        colors = collector._setup_colors()
+
+        self.assertIn("header", colors)
+        self.assertIn("cyan", colors)
+        self.assertIn("yellow", colors)
+        self.assertIn("green", colors)
+        self.assertIn("magenta", colors)
+        self.assertIn("red", colors)
+
+    def test_setup_colors_without_color_support(self):
+        """Test color setup when colors are not supported."""
+        mock_display = MockDisplay(height=40, width=160)
+        mock_display.colors_supported = False
+        collector = LiveStatsCollector(1000, display=mock_display)
+
+        colors = collector._setup_colors()
+
+        # Should still have all keys but with fallback values
+        self.assertIn("header", colors)
+        self.assertIn("cyan", colors)
+
+    def test_handle_input_quit(self):
+        """Test handling 'q' key to quit."""
+        mock_display = MockDisplay()
+        mock_display.simulate_input(ord("q"))
+        collector = LiveStatsCollector(1000, display=mock_display)
+
+        self.assertTrue(collector.running)
+        collector._handle_input()
+        self.assertFalse(collector.running)
+
+    def test_handle_input_quit_uppercase(self):
+        """Test handling 'Q' key to quit."""
+        mock_display = MockDisplay()
+        mock_display.simulate_input(ord("Q"))
+        collector = LiveStatsCollector(1000, display=mock_display)
+
+        self.assertTrue(collector.running)
+        collector._handle_input()
+        self.assertFalse(collector.running)
+
+    def test_handle_input_cycle_sort(self):
+        """Test handling 's' key to cycle sort."""
+        mock_display = MockDisplay()
+        mock_display.simulate_input(ord("s"))
+        collector = LiveStatsCollector(
+            1000, sort_by="nsamples", display=mock_display
+        )
+
+        collector._handle_input()
+        self.assertEqual(collector.sort_by, "sample_pct")
+
+    def test_handle_input_cycle_sort_uppercase(self):
+        """Test handling 'S' key to cycle sort backward."""
+        mock_display = MockDisplay()
+        mock_display.simulate_input(ord("S"))
+        collector = LiveStatsCollector(
+            1000, sort_by="nsamples", display=mock_display
+        )
+
+        collector._handle_input()
+        self.assertEqual(collector.sort_by, "cumtime")
+
+    def test_handle_input_no_key(self):
+        """Test handling when no key is pressed."""
+        mock_display = MockDisplay()
+        collector = LiveStatsCollector(1000, display=mock_display)
+
+        collector._handle_input()
+        # Should not change state
+        self.assertTrue(collector.running)
+
+
+class TestLiveStatsCollectorDisplayMethods(unittest.TestCase):
+    """Tests for display-related methods."""
+
+    def setUp(self):
+        """Set up collector with mock display."""
+        self.mock_display = MockDisplay(height=40, width=160)
+        self.collector = LiveStatsCollector(
+            1000, pid=12345, display=self.mock_display
+        )
+        self.collector.start_time = time.perf_counter()
+
+    def test_show_terminal_too_small(self):
+        """Test terminal too small message display."""
+        self.collector._show_terminal_too_small(10, 50)
+        # Should have written some content to the display buffer
+        self.assertGreater(len(self.mock_display.buffer), 0)
+
+    def test_draw_header_info(self):
+        """Test drawing header information."""
+        colors = {
+            "cyan": curses.A_BOLD,
+            "green": curses.A_BOLD,
+            "yellow": curses.A_BOLD,
+            "magenta": curses.A_BOLD,
+        }
+        self.collector._initialize_widgets(colors)
+
+        line = self.collector.header_widget.draw_header_info(0, 160, 100.5)
+        self.assertEqual(line, 2)  # Title + header info line
+
+    def test_draw_sample_stats(self):
+        """Test drawing sample statistics."""
+        self.collector.total_samples = 1000
+        colors = {"cyan": curses.A_BOLD, "green": curses.A_BOLD}
+        self.collector._initialize_widgets(colors)
+
+        line = self.collector.header_widget.draw_sample_stats(0, 160, 10.0)
+        self.assertEqual(line, 1)
+        self.assertGreater(self.collector.max_sample_rate, 0)
+
+    def test_progress_bar_uses_target_rate(self):
+        """Test that progress bar uses target rate instead of max rate."""
+        # Set up collector with specific sampling interval
+        collector = LiveStatsCollector(
+            10000, pid=12345, display=self.mock_display
+        )  # 10ms = 100Hz target
+        collector.start_time = time.perf_counter()
+        collector.total_samples = 500
+        collector.max_sample_rate = (
+            150  # Higher than target to test we don't use this
+        )
+
+        colors = {"cyan": curses.A_BOLD, "green": curses.A_BOLD}
+        collector._initialize_widgets(colors)
+
+        # Clear the display buffer to capture only our progress bar content
+        self.mock_display.buffer.clear()
+
+        # Draw sample stats with a known elapsed time that gives us a specific sample rate
+        elapsed = 10.0  # 500 samples in 10 seconds = 50 samples/second
+        line = collector.header_widget.draw_sample_stats(0, 160, elapsed)
+
+        # Verify display was updated
+        self.assertEqual(line, 1)
+        self.assertGreater(len(self.mock_display.buffer), 0)
+
+        # Verify the label shows current/target format with units instead of "max"
+        found_current_target_label = False
+        found_max_label = False
+        for (line_num, col), (text, attr) in self.mock_display.buffer.items():
+            # Should show "50.0Hz/100.0Hz (50.0%)" since we're at 50% of target (50/100)
+            if "50.0Hz/100.0Hz" in text and "50.0%" in text:
+                found_current_target_label = True
+            if "max:" in text:
+                found_max_label = True
+
+        self.assertTrue(
+            found_current_target_label,
+            "Should display current/target rate with percentage",
+        )
+        self.assertFalse(found_max_label, "Should not display max rate label")
+
+    def test_progress_bar_different_intervals(self):
+        """Test that progress bar adapts to different sampling intervals."""
+        test_cases = [
+            (
+                1000,
+                "1.0KHz",
+                "100.0Hz",
+            ),  # 1ms interval -> 1000Hz target (1.0KHz), 100Hz current
+            (
+                5000,
+                "200.0Hz",
+                "100.0Hz",
+            ),  # 5ms interval -> 200Hz target, 100Hz current
+            (
+                20000,
+                "50.0Hz",
+                "100.0Hz",
+            ),  # 20ms interval -> 50Hz target, 100Hz current
+            (
+                100000,
+                "10.0Hz",
+                "100.0Hz",
+            ),  # 100ms interval -> 10Hz target, 100Hz current
+        ]
+
+        for (
+            interval_usec,
+            expected_target_formatted,
+            expected_current_formatted,
+        ) in test_cases:
+            with self.subTest(interval=interval_usec):
+                collector = LiveStatsCollector(
+                    interval_usec, display=MockDisplay()
+                )
+                collector.start_time = time.perf_counter()
+                collector.total_samples = 100
+
+                colors = {"cyan": curses.A_BOLD, "green": curses.A_BOLD}
+                collector._initialize_widgets(colors)
+
+                # Clear buffer
+                collector.display.buffer.clear()
+
+                # Draw with 1 second elapsed time (gives us current rate of 100Hz)
+                collector.header_widget.draw_sample_stats(0, 160, 1.0)
+
+                # Check that the current/target format appears in the display with proper units
+                found_current_target_format = False
+                for (line_num, col), (
+                    text,
+                    attr,
+                ) in collector.display.buffer.items():
+                    # Looking for format like "100.0Hz/1.0KHz" or "100.0Hz/200.0Hz"
+                    expected_format = f"{expected_current_formatted}/{expected_target_formatted}"
+                    if expected_format in text and "%" in text:
+                        found_current_target_format = True
+                        break
+
+                self.assertTrue(
+                    found_current_target_format,
+                    f"Should display current/target rate format with units for {interval_usec}µs interval",
+                )
+
+    def test_draw_efficiency_bar(self):
+        """Test drawing efficiency bar."""
+        self.collector.successful_samples = 900
+        self.collector.failed_samples = 100
+        self.collector.total_samples = 1000
+        colors = {"green": curses.A_BOLD, "red": curses.A_BOLD}
+        self.collector._initialize_widgets(colors)
+
+        line = self.collector.header_widget.draw_efficiency_bar(0, 160)
+        self.assertEqual(line, 1)
+
+    def test_draw_function_stats(self):
+        """Test drawing function statistics."""
+        self.collector.result[("test.py", 10, "func1")] = {
+            "direct_calls": 100,
+            "cumulative_calls": 150,
+            "total_rec_calls": 0,
+        }
+        self.collector.result[("test.py", 20, "func2")] = {
+            "direct_calls": 0,
+            "cumulative_calls": 50,
+            "total_rec_calls": 0,
+        }
+
+        stats_list = self.collector.build_stats_list()
+        colors = {
+            "cyan": curses.A_BOLD,
+            "green": curses.A_BOLD,
+            "yellow": curses.A_BOLD,
+            "magenta": curses.A_BOLD,
+        }
+        self.collector._initialize_widgets(colors)
+
+        line = self.collector.header_widget.draw_function_stats(
+            0, 160, stats_list
+        )
+        self.assertEqual(line, 1)
+
+    def test_draw_top_functions(self):
+        """Test drawing top functions."""
+        self.collector.total_samples = 300
+        self.collector.result[("test.py", 10, "hot_func")] = {
+            "direct_calls": 100,
+            "cumulative_calls": 150,
+            "total_rec_calls": 0,
+        }
+
+        stats_list = self.collector.build_stats_list()
+        colors = {
+            "red": curses.A_BOLD,
+            "yellow": curses.A_BOLD,
+            "green": curses.A_BOLD,
+        }
+        self.collector._initialize_widgets(colors)
+
+        line = self.collector.header_widget.draw_top_functions(
+            0, 160, stats_list
+        )
+        self.assertEqual(line, 1)
+
+    def test_draw_column_headers(self):
+        """Test drawing column headers."""
+        colors = {
+            "sorted_header": curses.A_BOLD,
+            "normal_header": curses.A_NORMAL,
+        }
+        self.collector._initialize_widgets(colors)
+
+        (
+            line,
+            show_sample_pct,
+            show_tottime,
+            show_cumul_pct,
+            show_cumtime,
+        ) = self.collector.table_widget.draw_column_headers(0, 160)
+        self.assertEqual(line, 1)
+        self.assertTrue(show_sample_pct)
+        self.assertTrue(show_tottime)
+        self.assertTrue(show_cumul_pct)
+        self.assertTrue(show_cumtime)
+
+    def test_draw_column_headers_narrow_terminal(self):
+        """Test column headers adapt to narrow terminal."""
+        colors = {
+            "sorted_header": curses.A_BOLD,
+            "normal_header": curses.A_NORMAL,
+        }
+        self.collector._initialize_widgets(colors)
+
+        (
+            line,
+            show_sample_pct,
+            show_tottime,
+            show_cumul_pct,
+            show_cumtime,
+        ) = self.collector.table_widget.draw_column_headers(0, 70)
+        self.assertEqual(line, 1)
+        # Some columns should be hidden on narrow terminal
+        self.assertFalse(show_cumul_pct)
+
+    def test_draw_footer(self):
+        """Test drawing footer."""
+        colors = self.collector._setup_colors()
+        self.collector._initialize_widgets(colors)
+        self.collector.footer_widget.render(38, 160)
+        # Should have written some content to the display buffer
+        self.assertGreater(len(self.mock_display.buffer), 0)
+
+    def test_draw_progress_bar(self):
+        """Test progress bar drawing."""
+        colors = self.collector._setup_colors()
+        self.collector._initialize_widgets(colors)
+        bar, length = self.collector.header_widget.progress_bar.render_bar(
+            50, 100, 30
+        )
+
+        self.assertIn("[", bar)
+        self.assertIn("]", bar)
+        self.assertGreater(length, 0)
+        # Should be roughly 50% filled
+        self.assertIn("█", bar)
+        self.assertIn("░", bar)
+
+
+class TestLiveStatsCollectorEdgeCases(unittest.TestCase):
+    """Tests for edge cases and error handling."""
+
+    def test_very_long_function_name(self):
+        """Test handling of very long function names."""
+        collector = LiveStatsCollector(1000)
+        long_name = "x" * 200
+        collector.result[("test.py", 10, long_name)] = {
+            "direct_calls": 10,
+            "cumulative_calls": 20,
+            "total_rec_calls": 0,
+        }
+
+        stats_list = collector.build_stats_list()
+        self.assertEqual(len(stats_list), 1)
+        self.assertEqual(stats_list[0]["func"][2], long_name)
+
+
+class TestLiveStatsCollectorUpdateDisplay(unittest.TestCase):
+    """Tests for the _update_display method."""
+
+    def setUp(self):
+        """Set up collector with mock display."""
+        self.mock_display = MockDisplay(height=40, width=160)
+        self.collector = LiveStatsCollector(
+            1000, pid=12345, display=self.mock_display
+        )
+        self.collector.start_time = time.perf_counter()
+
+    def test_update_display_terminal_too_small(self):
+        """Test update_display when terminal is too small."""
+        small_display = MockDisplay(height=10, width=50)
+        self.collector.display = small_display
+
+        with mock.patch.object(
+            self.collector, "_show_terminal_too_small"
+        ) as mock_show:
+            self.collector._update_display()
+            mock_show.assert_called_once()
+
+    def test_update_display_normal(self):
+        """Test normal update_display operation."""
+        self.collector.total_samples = 100
+        self.collector.successful_samples = 90
+        self.collector.failed_samples = 10
+        self.collector.result[("test.py", 10, "func")] = {
+            "direct_calls": 50,
+            "cumulative_calls": 75,
+            "total_rec_calls": 0,
+        }
+
+        self.collector._update_display()
+
+        self.assertTrue(self.mock_display.cleared)
+        self.assertTrue(self.mock_display.refreshed)
+
+    def test_update_display_handles_exception(self):
+        """Test that update_display handles exceptions gracefully."""
+        # Make one of the methods raise an exception
+        with mock.patch.object(
+            self.collector,
+            "_prepare_display_data",
+            side_effect=Exception("Test error"),
+        ):
+            # Should not raise an exception (it catches and logs via trace_exception)
+            try:
+                self.collector._update_display()
+            except Exception:
+                self.fail(
+                    "_update_display should handle exceptions gracefully"
+                )
+
+
+class TestLiveCollectorWithMockDisplayHelpers(unittest.TestCase):
+    """Tests using the new MockDisplay helper methods."""
+
+    def test_verify_pid_display_with_contains(self):
+        """Test verifying PID is displayed using contains_text helper."""
+        display = MockDisplay(height=40, width=160)
+        collector = LiveStatsCollector(1000, pid=99999, display=display)
+        collector.start_time = time.perf_counter()
+        collector.total_samples = 10
+
+        collector._update_display()
+
+        # Use the helper method
+        self.assertTrue(
+            display.contains_text("99999"), "PID should be visible in display"
+        )
+
+    def test_verify_function_names_displayed(self):
+        """Test verifying function names appear in display."""
+        display = MockDisplay(height=40, width=160)
+        collector = LiveStatsCollector(1000, pid=12345, display=display)
+        collector.start_time = time.perf_counter()
+
+        collector.total_samples = 100
+        collector.result[("mymodule.py", 42, "my_special_function")] = {
+            "direct_calls": 50,
+            "cumulative_calls": 75,
+            "total_rec_calls": 0,
+        }
+
+        collector._update_display()
+
+        # Verify function name appears
+        self.assertTrue(
+            display.contains_text("my_special_function"),
+            "Function name should be visible",
+        )
+
+    def test_get_all_lines_full_display(self):
+        """Test getting all lines from a full display render."""
+        display = MockDisplay(height=40, width=160)
+        collector = LiveStatsCollector(1000, pid=12345, display=display)
+        collector.start_time = time.perf_counter()
+        collector.total_samples = 100
+
+        collector._update_display()
+
+        lines = display.get_all_lines()
+
+        # Should have multiple lines of content
+        self.assertGreater(len(lines), 5)
+
+        # Should have header content
+        self.assertTrue(any("PID" in line for line in lines))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_modes.py b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
new file mode 100644
index 00000000000..c0457ee7eb8
--- /dev/null
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_modes.py
@@ -0,0 +1,433 @@
+"""Tests for sampling profiler mode filtering (CPU and GIL modes)."""
+
+import io
+import unittest
+from unittest import mock
+
+try:
+    import _remote_debugging  # noqa: F401
+    import profiling.sampling
+    import profiling.sampling.sample
+    from profiling.sampling.pstats_collector import PstatsCollector
+except ImportError:
+    raise unittest.SkipTest(
+        "Test only runs when _remote_debugging is available"
+    )
+
+from test.support import requires_subprocess
+
+from .helpers import test_subprocess
+from .mocks import MockFrameInfo, MockInterpreterInfo
+
+
+class TestCpuModeFiltering(unittest.TestCase):
+    """Test CPU mode filtering functionality (--mode=cpu)."""
+
+    def test_mode_validation(self):
+        """Test that CLI validates mode choices correctly."""
+        # Invalid mode choice should raise SystemExit
+        test_args = [
+            "profiling.sampling.cli",
+            "attach",
+            "12345",
+            "--mode",
+            "invalid",
+        ]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("sys.stderr", io.StringIO()) as mock_stderr,
+            self.assertRaises(SystemExit) as cm,
+        ):
+            from profiling.sampling.cli import main
+            main()
+
+        self.assertEqual(cm.exception.code, 2)  # argparse error
+        error_msg = mock_stderr.getvalue()
+        self.assertIn("invalid choice", error_msg)
+
+    def test_frames_filtered_with_skip_idle(self):
+        """Test that frames are actually filtered when skip_idle=True."""
+        # Import thread status flags
+        try:
+            from _remote_debugging import (
+                THREAD_STATUS_HAS_GIL,
+                THREAD_STATUS_ON_CPU,
+            )
+        except ImportError:
+            THREAD_STATUS_HAS_GIL = 1 << 0
+            THREAD_STATUS_ON_CPU = 1 << 1
+
+        # Create mock frames with different thread statuses
+        class MockThreadInfoWithStatus:
+            def __init__(self, thread_id, frame_info, status):
+                self.thread_id = thread_id
+                self.frame_info = frame_info
+                self.status = status
+
+        # Create test data: active thread (HAS_GIL | ON_CPU), idle thread (neither), and another active thread
+        ACTIVE_STATUS = (
+            THREAD_STATUS_HAS_GIL | THREAD_STATUS_ON_CPU
+        )  # Has GIL and on CPU
+        IDLE_STATUS = 0  # Neither has GIL nor on CPU
+
+        test_frames = [
+            MockInterpreterInfo(
+                0,
+                [
+                    MockThreadInfoWithStatus(
+                        1,
+                        [MockFrameInfo("active1.py", 10, "active_func1")],
+                        ACTIVE_STATUS,
+                    ),
+                    MockThreadInfoWithStatus(
+                        2,
+                        [MockFrameInfo("idle.py", 20, "idle_func")],
+                        IDLE_STATUS,
+                    ),
+                    MockThreadInfoWithStatus(
+                        3,
+                        [MockFrameInfo("active2.py", 30, "active_func2")],
+                        ACTIVE_STATUS,
+                    ),
+                ],
+            )
+        ]
+
+        # Test with skip_idle=True - should only process running threads
+        collector_skip = PstatsCollector(
+            sample_interval_usec=1000, skip_idle=True
+        )
+        collector_skip.collect(test_frames)
+
+        # Should only have functions from running threads (status 0)
+        active1_key = ("active1.py", 10, "active_func1")
+        active2_key = ("active2.py", 30, "active_func2")
+        idle_key = ("idle.py", 20, "idle_func")
+
+        self.assertIn(active1_key, collector_skip.result)
+        self.assertIn(active2_key, collector_skip.result)
+        self.assertNotIn(
+            idle_key, collector_skip.result
+        )  # Idle thread should be filtered out
+
+        # Test with skip_idle=False - should process all threads
+        collector_no_skip = PstatsCollector(
+            sample_interval_usec=1000, skip_idle=False
+        )
+        collector_no_skip.collect(test_frames)
+
+        # Should have functions from all threads
+        self.assertIn(active1_key, collector_no_skip.result)
+        self.assertIn(active2_key, collector_no_skip.result)
+        self.assertIn(
+            idle_key, collector_no_skip.result
+        )  # Idle thread should be included
+
+    @requires_subprocess()
+    def test_cpu_mode_integration_filtering(self):
+        """Integration test: CPU mode should only capture active threads, not idle ones."""
+        # Script with one mostly-idle thread and one CPU-active thread
+        cpu_vs_idle_script = """
+import time
+import threading
+
+cpu_ready = threading.Event()
+
+def idle_worker():
+    time.sleep(999999)
+
+def cpu_active_worker():
+    cpu_ready.set()
+    x = 1
+    while True:
+        x += 1
+
+idle_thread = threading.Thread(target=idle_worker)
+cpu_thread = threading.Thread(target=cpu_active_worker)
+idle_thread.start()
+cpu_thread.start()
+cpu_ready.wait()
+_test_sock.sendall(b"working")
+idle_thread.join()
+cpu_thread.join()
+"""
+        with test_subprocess(cpu_vs_idle_script, wait_for_working=True) as subproc:
+
+            with (
+                io.StringIO() as captured_output,
+                mock.patch("sys.stdout", captured_output),
+            ):
+                try:
+                    collector = PstatsCollector(sample_interval_usec=5000, skip_idle=True)
+                    profiling.sampling.sample.sample(
+                        subproc.process.pid,
+                        collector,
+                        duration_sec=2.0,
+                        mode=1,  # CPU mode
+                        all_threads=True,
+                    )
+                    collector.print_stats(show_summary=False, mode=1)
+                except (PermissionError, RuntimeError) as e:
+                    self.skipTest(
+                        "Insufficient permissions for remote profiling"
+                    )
+
+                cpu_mode_output = captured_output.getvalue()
+
+            # Test wall-clock mode (mode=0) - should capture both functions
+            with (
+                io.StringIO() as captured_output,
+                mock.patch("sys.stdout", captured_output),
+            ):
+                try:
+                    collector = PstatsCollector(sample_interval_usec=5000, skip_idle=False)
+                    profiling.sampling.sample.sample(
+                        subproc.process.pid,
+                        collector,
+                        duration_sec=2.0,
+                        mode=0,  # Wall-clock mode
+                        all_threads=True,
+                    )
+                    collector.print_stats(show_summary=False)
+                except (PermissionError, RuntimeError) as e:
+                    self.skipTest(
+                        "Insufficient permissions for remote profiling"
+                    )
+
+                wall_mode_output = captured_output.getvalue()
+
+            # Verify both modes captured samples
+            self.assertIn("Captured", cpu_mode_output)
+            self.assertIn("samples", cpu_mode_output)
+            self.assertIn("Captured", wall_mode_output)
+            self.assertIn("samples", wall_mode_output)
+
+            # CPU mode should strongly favor cpu_active_worker over mostly_idle_worker
+            self.assertIn("cpu_active_worker", cpu_mode_output)
+            self.assertNotIn("idle_worker", cpu_mode_output)
+
+            # Wall-clock mode should capture both types of work
+            self.assertIn("cpu_active_worker", wall_mode_output)
+            self.assertIn("idle_worker", wall_mode_output)
+
+    def test_cpu_mode_with_no_samples(self):
+        """Test that CPU mode handles no samples gracefully when no samples are collected."""
+        # Mock a collector that returns empty stats
+        mock_collector = PstatsCollector(sample_interval_usec=5000, skip_idle=True)
+        mock_collector.stats = {}
+
+        with (
+            io.StringIO() as captured_output,
+            mock.patch("sys.stdout", captured_output),
+            mock.patch(
+                "profiling.sampling.sample.SampleProfiler"
+            ) as mock_profiler_class,
+        ):
+            mock_profiler = mock.MagicMock()
+            mock_profiler_class.return_value = mock_profiler
+
+            profiling.sampling.sample.sample(
+                12345,  # dummy PID
+                mock_collector,
+                duration_sec=0.5,
+                mode=1,  # CPU mode
+                all_threads=True,
+            )
+
+            mock_collector.print_stats(show_summary=False, mode=1)
+
+            output = captured_output.getvalue()
+
+        # Should see the "No samples were collected" message
+        self.assertIn("No samples were collected", output)
+        self.assertIn("CPU mode", output)
+
+
+class TestGilModeFiltering(unittest.TestCase):
+    """Test GIL mode filtering functionality (--mode=gil)."""
+
+    def test_gil_mode_validation(self):
+        """Test that CLI accepts gil mode choice correctly."""
+        from profiling.sampling.cli import main
+
+        test_args = [
+            "profiling.sampling.cli",
+            "attach",
+            "12345",
+            "--mode",
+            "gil",
+        ]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("profiling.sampling.cli.sample") as mock_sample,
+        ):
+            try:
+                main()
+            except (SystemExit, OSError, RuntimeError):
+                pass  # Expected due to invalid PID
+
+        # Should have attempted to call sample with mode=2 (GIL mode)
+        mock_sample.assert_called_once()
+        call_args = mock_sample.call_args
+        # Check the mode parameter (should be in kwargs)
+        self.assertEqual(call_args.kwargs.get("mode"), 2)  # PROFILING_MODE_GIL
+
+    def test_gil_mode_sample_function_call(self):
+        """Test that sample() function correctly uses GIL mode."""
+        with (
+            mock.patch(
+                "profiling.sampling.sample.SampleProfiler"
+            ) as mock_profiler,
+        ):
+            # Mock the profiler instance
+            mock_instance = mock.Mock()
+            mock_profiler.return_value = mock_instance
+
+            # Create a real collector instance
+            collector = PstatsCollector(sample_interval_usec=1000, skip_idle=True)
+
+            # Call sample with GIL mode
+            profiling.sampling.sample.sample(
+                12345,
+                collector,
+                mode=2,  # PROFILING_MODE_GIL
+                duration_sec=1,
+            )
+
+            # Verify SampleProfiler was created with correct mode
+            mock_profiler.assert_called_once()
+            call_args = mock_profiler.call_args
+            self.assertEqual(call_args[1]["mode"], 2)  # mode parameter
+
+            # Verify profiler.sample was called
+            mock_instance.sample.assert_called_once()
+
+    def test_gil_mode_cli_argument_parsing(self):
+        """Test CLI argument parsing for GIL mode with various options."""
+        from profiling.sampling.cli import main
+
+        test_args = [
+            "profiling.sampling.cli",
+            "attach",
+            "12345",
+            "--mode",
+            "gil",
+            "-i",
+            "500",
+            "-d",
+            "5",
+        ]
+
+        with (
+            mock.patch("sys.argv", test_args),
+            mock.patch("profiling.sampling.cli.sample") as mock_sample,
+        ):
+            try:
+                main()
+            except (SystemExit, OSError, RuntimeError):
+                pass  # Expected due to invalid PID
+
+        # Verify all arguments were parsed correctly
+        mock_sample.assert_called_once()
+        call_args = mock_sample.call_args
+        self.assertEqual(call_args.kwargs.get("mode"), 2)  # GIL mode
+        self.assertEqual(call_args.kwargs.get("duration_sec"), 5)
+
+    @requires_subprocess()
+    def test_gil_mode_integration_behavior(self):
+        """Integration test: GIL mode should capture GIL-holding threads."""
+        # Create a test script with GIL-releasing operations
+        gil_test_script = """
+import time
+import threading
+
+gil_ready = threading.Event()
+
+def gil_releasing_work():
+    time.sleep(999999)
+
+def gil_holding_work():
+    gil_ready.set()
+    x = 1
+    while True:
+        x += 1
+
+idle_thread = threading.Thread(target=gil_releasing_work)
+cpu_thread = threading.Thread(target=gil_holding_work)
+idle_thread.start()
+cpu_thread.start()
+gil_ready.wait()
+_test_sock.sendall(b"working")
+idle_thread.join()
+cpu_thread.join()
+"""
+        with test_subprocess(gil_test_script, wait_for_working=True) as subproc:
+
+            with (
+                io.StringIO() as captured_output,
+                mock.patch("sys.stdout", captured_output),
+            ):
+                try:
+                    collector = PstatsCollector(sample_interval_usec=5000, skip_idle=True)
+                    profiling.sampling.sample.sample(
+                        subproc.process.pid,
+                        collector,
+                        duration_sec=2.0,
+                        mode=2,  # GIL mode
+                        all_threads=True,
+                    )
+                    collector.print_stats(show_summary=False)
+                except (PermissionError, RuntimeError) as e:
+                    self.skipTest(
+                        "Insufficient permissions for remote profiling"
+                    )
+
+                gil_mode_output = captured_output.getvalue()
+
+            # Test wall-clock mode for comparison
+            with (
+                io.StringIO() as captured_output,
+                mock.patch("sys.stdout", captured_output),
+            ):
+                try:
+                    collector = PstatsCollector(sample_interval_usec=5000, skip_idle=False)
+                    profiling.sampling.sample.sample(
+                        subproc.process.pid,
+                        collector,
+                        duration_sec=0.5,
+                        mode=0,  # Wall-clock mode
+                        all_threads=True,
+                    )
+                    collector.print_stats(show_summary=False)
+                except (PermissionError, RuntimeError) as e:
+                    self.skipTest(
+                        "Insufficient permissions for remote profiling"
+                    )
+
+                wall_mode_output = captured_output.getvalue()
+
+            # GIL mode should primarily capture GIL-holding work
+            # (Note: actual behavior depends on threading implementation)
+            self.assertIn("gil_holding_work", gil_mode_output)
+
+            # Wall-clock mode should capture both types of work
+            self.assertIn("gil_holding_work", wall_mode_output)
+
+    def test_mode_constants_are_defined(self):
+        """Test that all profiling mode constants are properly defined."""
+        self.assertEqual(profiling.sampling.sample.PROFILING_MODE_WALL, 0)
+        self.assertEqual(profiling.sampling.sample.PROFILING_MODE_CPU, 1)
+        self.assertEqual(profiling.sampling.sample.PROFILING_MODE_GIL, 2)
+
+    def test_parse_mode_function(self):
+        """Test the _parse_mode function with all valid modes."""
+        from profiling.sampling.cli import _parse_mode
+        self.assertEqual(_parse_mode("wall"), 0)
+        self.assertEqual(_parse_mode("cpu"), 1)
+        self.assertEqual(_parse_mode("gil"), 2)
+
+        # Test invalid mode raises KeyError
+        with self.assertRaises(KeyError):
+            _parse_mode("invalid")
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_profiler.py b/Lib/test/test_profiling/test_sampling_profiler/test_profiler.py
new file mode 100644
index 00000000000..822f559561e
--- /dev/null
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_profiler.py
@@ -0,0 +1,714 @@
+"""Tests for sampling profiler core functionality."""
+
+import io
+from unittest import mock
+import unittest
+
+try:
+    import _remote_debugging  # noqa: F401
+    from profiling.sampling.sample import SampleProfiler
+    from profiling.sampling.pstats_collector import PstatsCollector
+except ImportError:
+    raise unittest.SkipTest(
+        "Test only runs when _remote_debugging is available"
+    )
+
+from test.support import force_not_colorized_test_class
+
+
+def print_sampled_stats(stats, sort=-1, limit=None, show_summary=True, sample_interval_usec=100):
+    """Helper function to maintain compatibility with old test API.
+
+    This wraps the new PstatsCollector.print_stats() API to work with the
+    existing test infrastructure.
+    """
+    # Create a mock collector that populates stats correctly
+    collector = PstatsCollector(sample_interval_usec=sample_interval_usec)
+
+    # Override create_stats to populate self.stats with the provided stats
+    def mock_create_stats():
+        collector.stats = stats.stats
+    collector.create_stats = mock_create_stats
+
+    # Call the new print_stats method
+    collector.print_stats(sort=sort, limit=limit, show_summary=show_summary)
+
+
+class TestSampleProfiler(unittest.TestCase):
+    """Test the SampleProfiler class."""
+
+    def test_sample_profiler_initialization(self):
+        """Test SampleProfiler initialization with various parameters."""
+
+        # Mock RemoteUnwinder to avoid permission issues
+        with mock.patch(
+            "_remote_debugging.RemoteUnwinder"
+        ) as mock_unwinder_class:
+            mock_unwinder_class.return_value = mock.MagicMock()
+
+            # Test basic initialization
+            profiler = SampleProfiler(
+                pid=12345, sample_interval_usec=1000, all_threads=False
+            )
+            self.assertEqual(profiler.pid, 12345)
+            self.assertEqual(profiler.sample_interval_usec, 1000)
+            self.assertEqual(profiler.all_threads, False)
+
+            # Test with all_threads=True
+            profiler = SampleProfiler(
+                pid=54321, sample_interval_usec=5000, all_threads=True
+            )
+            self.assertEqual(profiler.pid, 54321)
+            self.assertEqual(profiler.sample_interval_usec, 5000)
+            self.assertEqual(profiler.all_threads, True)
+
+    def test_sample_profiler_sample_method_timing(self):
+        """Test that the sample method respects duration and handles timing correctly."""
+
+        # Mock the unwinder to avoid needing a real process
+        mock_unwinder = mock.MagicMock()
+        mock_unwinder.get_stack_trace.return_value = [
+            (
+                1,
+                [
+                    mock.MagicMock(
+                        filename="test.py", lineno=10, funcname="test_func"
+                    )
+                ],
+            )
+        ]
+
+        with mock.patch(
+            "_remote_debugging.RemoteUnwinder"
+        ) as mock_unwinder_class:
+            mock_unwinder_class.return_value = mock_unwinder
+
+            profiler = SampleProfiler(
+                pid=12345, sample_interval_usec=100000, all_threads=False
+            )  # 100ms interval
+
+            # Mock collector
+            mock_collector = mock.MagicMock()
+
+            # Mock time to control the sampling loop
+            start_time = 1000.0
+            times = [
+                start_time + i * 0.1 for i in range(12)
+            ]  # 0, 0.1, 0.2, ..., 1.1 seconds
+
+            with mock.patch("time.perf_counter", side_effect=times):
+                with io.StringIO() as output:
+                    with mock.patch("sys.stdout", output):
+                        profiler.sample(mock_collector, duration_sec=1)
+
+                    result = output.getvalue()
+
+            # Should have captured approximately 10 samples (1 second / 0.1 second interval)
+            self.assertIn("Captured", result)
+            self.assertIn("samples", result)
+
+            # Verify collector was called multiple times
+            self.assertGreaterEqual(mock_collector.collect.call_count, 5)
+            self.assertLessEqual(mock_collector.collect.call_count, 11)
+
+    def test_sample_profiler_error_handling(self):
+        """Test that the sample method handles errors gracefully."""
+
+        # Mock unwinder that raises errors
+        mock_unwinder = mock.MagicMock()
+        error_sequence = [
+            RuntimeError("Process died"),
+            [
+                (
+                    1,
+                    [
+                        mock.MagicMock(
+                            filename="test.py", lineno=10, funcname="test_func"
+                        )
+                    ],
+                )
+            ],
+            UnicodeDecodeError("utf-8", b"", 0, 1, "invalid"),
+            [
+                (
+                    1,
+                    [
+                        mock.MagicMock(
+                            filename="test.py",
+                            lineno=20,
+                            funcname="test_func2",
+                        )
+                    ],
+                )
+            ],
+            OSError("Permission denied"),
+        ]
+        mock_unwinder.get_stack_trace.side_effect = error_sequence
+
+        with mock.patch(
+            "_remote_debugging.RemoteUnwinder"
+        ) as mock_unwinder_class:
+            mock_unwinder_class.return_value = mock_unwinder
+
+            profiler = SampleProfiler(
+                pid=12345, sample_interval_usec=10000, all_threads=False
+            )
+
+            mock_collector = mock.MagicMock()
+
+            # Control timing to run exactly 5 samples
+            times = [0.0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06]
+
+            with mock.patch("time.perf_counter", side_effect=times):
+                with io.StringIO() as output:
+                    with mock.patch("sys.stdout", output):
+                        profiler.sample(mock_collector, duration_sec=0.05)
+
+                    result = output.getvalue()
+
+            # Should report error rate
+            self.assertIn("Error rate:", result)
+            self.assertIn("%", result)
+
+            # Collector should have been called only for successful samples (should be > 0)
+            self.assertGreater(mock_collector.collect.call_count, 0)
+            self.assertLessEqual(mock_collector.collect.call_count, 3)
+
+    def test_sample_profiler_missed_samples_warning(self):
+        """Test that the profiler warns about missed samples when sampling is too slow."""
+
+        mock_unwinder = mock.MagicMock()
+        mock_unwinder.get_stack_trace.return_value = [
+            (
+                1,
+                [
+                    mock.MagicMock(
+                        filename="test.py", lineno=10, funcname="test_func"
+                    )
+                ],
+            )
+        ]
+
+        with mock.patch(
+            "_remote_debugging.RemoteUnwinder"
+        ) as mock_unwinder_class:
+            mock_unwinder_class.return_value = mock_unwinder
+
+            # Use very short interval that we'll miss
+            profiler = SampleProfiler(
+                pid=12345, sample_interval_usec=1000, all_threads=False
+            )  # 1ms interval
+
+            mock_collector = mock.MagicMock()
+
+            # Simulate slow sampling where we miss many samples
+            times = [
+                0.0,
+                0.1,
+                0.2,
+                0.3,
+                0.4,
+                0.5,
+                0.6,
+                0.7,
+            ]  # Extra time points to avoid StopIteration
+
+            with mock.patch("time.perf_counter", side_effect=times):
+                with io.StringIO() as output:
+                    with mock.patch("sys.stdout", output):
+                        profiler.sample(mock_collector, duration_sec=0.5)
+
+                    result = output.getvalue()
+
+            # Should warn about missed samples
+            self.assertIn("Warning: missed", result)
+            self.assertIn("samples from the expected total", result)
+
+    def test_sample_profiler_keyboard_interrupt(self):
+        mock_unwinder = mock.MagicMock()
+        mock_unwinder.get_stack_trace.side_effect = [
+            [
+                (
+                    1,
+                    [
+                        mock.MagicMock(
+                            filename="test.py", lineno=10, funcname="test_func"
+                        )
+                    ],
+                )
+            ],
+            KeyboardInterrupt(),
+        ]
+
+        with mock.patch(
+            "_remote_debugging.RemoteUnwinder"
+        ) as mock_unwinder_class:
+            mock_unwinder_class.return_value = mock_unwinder
+            profiler = SampleProfiler(
+                pid=12345, sample_interval_usec=10000, all_threads=False
+            )
+            mock_collector = mock.MagicMock()
+            times = [0.0, 0.01, 0.02, 0.03, 0.04]
+            with mock.patch("time.perf_counter", side_effect=times):
+                with io.StringIO() as output:
+                    with mock.patch("sys.stdout", output):
+                        try:
+                            profiler.sample(mock_collector, duration_sec=1.0)
+                        except KeyboardInterrupt:
+                            self.fail(
+                                "KeyboardInterrupt was not handled by the profiler"
+                            )
+                    result = output.getvalue()
+            self.assertIn("Interrupted by user.", result)
+            self.assertIn("Captured", result)
+            self.assertIn("samples", result)
+            self.assertNotIn("Warning: missed", result)
+
+
+@force_not_colorized_test_class
+class TestPrintSampledStats(unittest.TestCase):
+    """Test the print_sampled_stats function."""
+
+    def setUp(self):
+        """Set up test data."""
+        # Mock stats data
+        self.mock_stats = mock.MagicMock()
+        self.mock_stats.stats = {
+            ("file1.py", 10, "func1"): (
+                100,
+                100,
+                0.5,
+                0.5,
+                {},
+            ),  # cc, nc, tt, ct, callers
+            ("file2.py", 20, "func2"): (50, 50, 0.25, 0.3, {}),
+            ("file3.py", 30, "func3"): (200, 200, 1.5, 2.0, {}),
+            ("file4.py", 40, "func4"): (
+                10,
+                10,
+                0.001,
+                0.001,
+                {},
+            ),  # millisecond range
+            ("file5.py", 50, "func5"): (
+                5,
+                5,
+                0.000001,
+                0.000002,
+                {},
+            ),  # microsecond range
+        }
+
+    def test_print_sampled_stats_basic(self):
+        """Test basic print_sampled_stats functionality."""
+
+        # Capture output
+        with io.StringIO() as output:
+            with mock.patch("sys.stdout", output):
+                print_sampled_stats(self.mock_stats, sample_interval_usec=100)
+
+            result = output.getvalue()
+
+        # Check header is present
+        self.assertIn("Profile Stats:", result)
+        self.assertIn("nsamples", result)
+        self.assertIn("tottime", result)
+        self.assertIn("cumtime", result)
+
+        # Check functions are present
+        self.assertIn("func1", result)
+        self.assertIn("func2", result)
+        self.assertIn("func3", result)
+
+    def test_print_sampled_stats_sorting(self):
+        """Test different sorting options."""
+
+        # Test sort by calls
+        with io.StringIO() as output:
+            with mock.patch("sys.stdout", output):
+                print_sampled_stats(
+                    self.mock_stats, sort=0, sample_interval_usec=100
+                )
+
+            result = output.getvalue()
+            lines = result.strip().split("\n")
+
+        # Find the data lines (skip header)
+        data_lines = [l for l in lines if "file" in l and ".py" in l]
+        # func3 should be first (200 calls)
+        self.assertIn("func3", data_lines[0])
+
+        # Test sort by time
+        with io.StringIO() as output:
+            with mock.patch("sys.stdout", output):
+                print_sampled_stats(
+                    self.mock_stats, sort=1, sample_interval_usec=100
+                )
+
+            result = output.getvalue()
+            lines = result.strip().split("\n")
+
+        data_lines = [l for l in lines if "file" in l and ".py" in l]
+        # func3 should be first (1.5s time)
+        self.assertIn("func3", data_lines[0])
+
+    def test_print_sampled_stats_limit(self):
+        """Test limiting output rows."""
+
+        with io.StringIO() as output:
+            with mock.patch("sys.stdout", output):
+                print_sampled_stats(
+                    self.mock_stats, limit=2, sample_interval_usec=100
+                )
+
+            result = output.getvalue()
+
+        # Count function entries in the main stats section (not in summary)
+        lines = result.split("\n")
+        # Find where the main stats section ends (before summary)
+        main_section_lines = []
+        for line in lines:
+            if "Summary of Interesting Functions:" in line:
+                break
+            main_section_lines.append(line)
+
+        # Count function entries only in main section
+        func_count = sum(
+            1
+            for line in main_section_lines
+            if "func" in line and ".py" in line
+        )
+        self.assertEqual(func_count, 2)
+
+    def test_print_sampled_stats_time_units(self):
+        """Test proper time unit selection."""
+
+        with io.StringIO() as output:
+            with mock.patch("sys.stdout", output):
+                print_sampled_stats(self.mock_stats, sample_interval_usec=100)
+
+            result = output.getvalue()
+
+        # Should use seconds for the header since max time is > 1s
+        self.assertIn("tottime (s)", result)
+        self.assertIn("cumtime (s)", result)
+
+        # Test with only microsecond-range times
+        micro_stats = mock.MagicMock()
+        micro_stats.stats = {
+            ("file1.py", 10, "func1"): (100, 100, 0.000005, 0.000010, {}),
+        }
+
+        with io.StringIO() as output:
+            with mock.patch("sys.stdout", output):
+                print_sampled_stats(micro_stats, sample_interval_usec=100)
+
+            result = output.getvalue()
+
+        # Should use microseconds
+        self.assertIn("tottime (μs)", result)
+        self.assertIn("cumtime (μs)", result)
+
+    def test_print_sampled_stats_summary(self):
+        """Test summary section generation."""
+
+        with io.StringIO() as output:
+            with mock.patch("sys.stdout", output):
+                print_sampled_stats(
+                    self.mock_stats,
+                    show_summary=True,
+                    sample_interval_usec=100,
+                )
+
+            result = output.getvalue()
+
+        # Check summary sections are present
+        self.assertIn("Summary of Interesting Functions:", result)
+        self.assertIn(
+            "Functions with Highest Direct/Cumulative Ratio (Hot Spots):",
+            result,
+        )
+        self.assertIn(
+            "Functions with Highest Call Frequency (Indirect Calls):", result
+        )
+        self.assertIn(
+            "Functions with Highest Call Magnification (Cumulative/Direct):",
+            result,
+        )
+
+    def test_print_sampled_stats_no_summary(self):
+        """Test disabling summary output."""
+
+        with io.StringIO() as output:
+            with mock.patch("sys.stdout", output):
+                print_sampled_stats(
+                    self.mock_stats,
+                    show_summary=False,
+                    sample_interval_usec=100,
+                )
+
+            result = output.getvalue()
+
+        # Summary should not be present
+        self.assertNotIn("Summary of Interesting Functions:", result)
+
+    def test_print_sampled_stats_empty_stats(self):
+        """Test with empty stats."""
+
+        empty_stats = mock.MagicMock()
+        empty_stats.stats = {}
+
+        with io.StringIO() as output:
+            with mock.patch("sys.stdout", output):
+                print_sampled_stats(empty_stats, sample_interval_usec=100)
+
+            result = output.getvalue()
+
+        # Should print message about no samples
+        self.assertIn("No samples were collected.", result)
+
+    def test_print_sampled_stats_sample_percentage_sorting(self):
+        """Test sample percentage sorting options."""
+
+        # Add a function with high sample percentage (more direct calls than func3's 200)
+        self.mock_stats.stats[("expensive.py", 60, "expensive_func")] = (
+            300,  # direct calls (higher than func3's 200)
+            300,  # cumulative calls
+            1.0,  # total time
+            1.0,  # cumulative time
+            {},
+        )
+
+        # Test sort by sample percentage
+        with io.StringIO() as output:
+            with mock.patch("sys.stdout", output):
+                print_sampled_stats(
+                    self.mock_stats, sort=3, sample_interval_usec=100
+                )  # sample percentage
+
+            result = output.getvalue()
+            lines = result.strip().split("\n")
+
+        data_lines = [l for l in lines if ".py" in l and "func" in l]
+        # expensive_func should be first (highest sample percentage)
+        self.assertIn("expensive_func", data_lines[0])
+
+    def test_print_sampled_stats_with_recursive_calls(self):
+        """Test print_sampled_stats with recursive calls where nc != cc."""
+
+        # Create stats with recursive calls (nc != cc)
+        recursive_stats = mock.MagicMock()
+        recursive_stats.stats = {
+            # (direct_calls, cumulative_calls, tt, ct, callers) - recursive function
+            ("recursive.py", 10, "factorial"): (
+                5,  # direct_calls
+                10,  # cumulative_calls (appears more times in stack due to recursion)
+                0.5,
+                0.6,
+                {},
+            ),
+            ("normal.py", 20, "normal_func"): (
+                3,  # direct_calls
+                3,  # cumulative_calls (same as direct for non-recursive)
+                0.2,
+                0.2,
+                {},
+            ),
+        }
+
+        with io.StringIO() as output:
+            with mock.patch("sys.stdout", output):
+                print_sampled_stats(recursive_stats, sample_interval_usec=100)
+
+            result = output.getvalue()
+
+        # Should display recursive calls as "5/10" format
+        self.assertIn("5/10", result)  # nc/cc format for recursive calls
+        self.assertIn("3", result)  # just nc for non-recursive calls
+        self.assertIn("factorial", result)
+        self.assertIn("normal_func", result)
+
+    def test_print_sampled_stats_with_zero_call_counts(self):
+        """Test print_sampled_stats with zero call counts to trigger division protection."""
+
+        # Create stats with zero call counts
+        zero_stats = mock.MagicMock()
+        zero_stats.stats = {
+            ("file.py", 10, "zero_calls"): (0, 0, 0.0, 0.0, {}),  # Zero calls
+            ("file.py", 20, "normal_func"): (
+                5,
+                5,
+                0.1,
+                0.1,
+                {},
+            ),  # Normal function
+        }
+
+        with io.StringIO() as output:
+            with mock.patch("sys.stdout", output):
+                print_sampled_stats(zero_stats, sample_interval_usec=100)
+
+            result = output.getvalue()
+
+        # Should handle zero call counts gracefully
+        self.assertIn("zero_calls", result)
+        self.assertIn("zero_calls", result)
+        self.assertIn("normal_func", result)
+
+    def test_print_sampled_stats_sort_by_name(self):
+        """Test sort by function name option."""
+
+        with io.StringIO() as output:
+            with mock.patch("sys.stdout", output):
+                print_sampled_stats(
+                    self.mock_stats, sort=-1, sample_interval_usec=100
+                )  # sort by name
+
+            result = output.getvalue()
+            lines = result.strip().split("\n")
+
+        # Find the data lines (skip header and summary)
+        # Data lines start with whitespace and numbers, and contain filename:lineno(function)
+        data_lines = []
+        for line in lines:
+            # Skip header lines and summary sections
+            if (
+                line.startswith("     ")
+                and "(" in line
+                and ")" in line
+                and not line.startswith(
+                    "     1."
+                )  # Skip summary lines that start with times
+                and not line.startswith(
+                    "     0."
+                )  # Skip summary lines that start with times
+                and not "per call" in line  # Skip summary lines
+                and not "calls" in line  # Skip summary lines
+                and not "total time" in line  # Skip summary lines
+                and not "cumulative time" in line
+            ):  # Skip summary lines
+                data_lines.append(line)
+
+        # Extract just the function names for comparison
+        func_names = []
+        import re
+
+        for line in data_lines:
+            # Function name is between the last ( and ), accounting for ANSI color codes
+            match = re.search(r"\(([^)]+)\)$", line)
+            if match:
+                func_name = match.group(1)
+                # Remove ANSI color codes
+                func_name = re.sub(r"\x1b\[[0-9;]*m", "", func_name)
+                func_names.append(func_name)
+
+        # Verify we extracted function names and they are sorted
+        self.assertGreater(
+            len(func_names), 0, "Should have extracted some function names"
+        )
+        self.assertEqual(
+            func_names,
+            sorted(func_names),
+            f"Function names {func_names} should be sorted alphabetically",
+        )
+
+    def test_print_sampled_stats_with_zero_time_functions(self):
+        """Test summary sections with functions that have zero time."""
+
+        # Create stats with zero-time functions
+        zero_time_stats = mock.MagicMock()
+        zero_time_stats.stats = {
+            ("file1.py", 10, "zero_time_func"): (
+                5,
+                5,
+                0.0,
+                0.0,
+                {},
+            ),  # Zero time
+            ("file2.py", 20, "normal_func"): (
+                3,
+                3,
+                0.1,
+                0.1,
+                {},
+            ),  # Normal time
+        }
+
+        with io.StringIO() as output:
+            with mock.patch("sys.stdout", output):
+                print_sampled_stats(
+                    zero_time_stats,
+                    show_summary=True,
+                    sample_interval_usec=100,
+                )
+
+            result = output.getvalue()
+
+        # Should handle zero-time functions gracefully in summary
+        self.assertIn("Summary of Interesting Functions:", result)
+        self.assertIn("zero_time_func", result)
+        self.assertIn("normal_func", result)
+
+    def test_print_sampled_stats_with_malformed_qualified_names(self):
+        """Test summary generation with function names that don't contain colons."""
+
+        # Create stats with function names that would create malformed qualified names
+        malformed_stats = mock.MagicMock()
+        malformed_stats.stats = {
+            # Function name without clear module separation
+            ("no_colon_func", 10, "func"): (3, 3, 0.1, 0.1, {}),
+            ("", 20, "empty_filename_func"): (2, 2, 0.05, 0.05, {}),
+            ("normal.py", 30, "normal_func"): (5, 5, 0.2, 0.2, {}),
+        }
+
+        with io.StringIO() as output:
+            with mock.patch("sys.stdout", output):
+                print_sampled_stats(
+                    malformed_stats,
+                    show_summary=True,
+                    sample_interval_usec=100,
+                )
+
+            result = output.getvalue()
+
+        # Should handle malformed names gracefully in summary aggregation
+        self.assertIn("Summary of Interesting Functions:", result)
+        # All function names should appear somewhere in the output
+        self.assertIn("func", result)
+        self.assertIn("empty_filename_func", result)
+        self.assertIn("normal_func", result)
+
+    def test_print_sampled_stats_with_recursive_call_stats_creation(self):
+        """Test create_stats with recursive call data to trigger total_rec_calls branch."""
+        collector = PstatsCollector(sample_interval_usec=1000000)  # 1 second
+
+        # Simulate recursive function data where total_rec_calls would be set
+        # We need to manually manipulate the collector result to test this branch
+        collector.result = {
+            ("recursive.py", 10, "factorial"): {
+                "total_rec_calls": 3,  # Non-zero recursive calls
+                "direct_calls": 5,
+                "cumulative_calls": 10,
+            },
+            ("normal.py", 20, "normal_func"): {
+                "total_rec_calls": 0,  # Zero recursive calls
+                "direct_calls": 2,
+                "cumulative_calls": 5,
+            },
+        }
+
+        collector.create_stats()
+
+        # Check that recursive calls are handled differently from non-recursive
+        factorial_stats = collector.stats[("recursive.py", 10, "factorial")]
+        normal_stats = collector.stats[("normal.py", 20, "normal_func")]
+
+        # factorial should use cumulative_calls (10) as nc
+        self.assertEqual(
+            factorial_stats[1], 10
+        )  # nc should be cumulative_calls
+        self.assertEqual(factorial_stats[0], 5)  # cc should be direct_calls
+
+        # normal_func should use cumulative_calls as nc
+        self.assertEqual(normal_stats[1], 5)  # nc should be cumulative_calls
+        self.assertEqual(normal_stats[0], 2)  # cc should be direct_calls
diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_trend_tracker.py b/Lib/test/test_profiling/test_sampling_profiler/test_trend_tracker.py
new file mode 100644
index 00000000000..43b23be0fe3
--- /dev/null
+++ b/Lib/test/test_profiling/test_sampling_profiler/test_trend_tracker.py
@@ -0,0 +1,97 @@
+"""Simple unit tests for TrendTracker."""
+
+import unittest
+from test.support import requires
+from test.support.import_helper import import_module
+
+# Only run these tests if curses is available
+requires("curses")
+curses = import_module("curses")
+
+from profiling.sampling.live_collector.trend_tracker import TrendTracker
+
+
+class TestTrendTracker(unittest.TestCase):
+    """Tests for TrendTracker class."""
+
+    def setUp(self):
+        """Set up test fixtures."""
+        self.colors = {
+            "trend_up": curses.A_BOLD,
+            "trend_down": curses.A_REVERSE,
+            "trend_stable": curses.A_NORMAL,
+        }
+
+    def test_basic_trend_detection(self):
+        """Test basic up/down/stable trend detection."""
+        tracker = TrendTracker(self.colors, enabled=True)
+
+        # First value is always stable
+        self.assertEqual(tracker.update("func1", "nsamples", 10), "stable")
+
+        # Increasing value
+        self.assertEqual(tracker.update("func1", "nsamples", 20), "up")
+
+        # Decreasing value
+        self.assertEqual(tracker.update("func1", "nsamples", 15), "down")
+
+        # Small change (within threshold) is stable
+        self.assertEqual(tracker.update("func1", "nsamples", 15.0001), "stable")
+
+    def test_multiple_metrics(self):
+        """Test tracking multiple metrics simultaneously."""
+        tracker = TrendTracker(self.colors, enabled=True)
+
+        trends = tracker.update_metrics("func1", {
+            "nsamples": 10,
+            "tottime": 5.0,
+        })
+
+        self.assertEqual(trends["nsamples"], "stable")
+        self.assertEqual(trends["tottime"], "stable")
+
+        # Update with changes
+        trends = tracker.update_metrics("func1", {
+            "nsamples": 15,
+            "tottime": 3.0,
+        })
+
+        self.assertEqual(trends["nsamples"], "up")
+        self.assertEqual(trends["tottime"], "down")
+
+    def test_toggle_enabled(self):
+        """Test enable/disable toggle."""
+        tracker = TrendTracker(self.colors, enabled=True)
+        self.assertTrue(tracker.enabled)
+
+        tracker.toggle()
+        self.assertFalse(tracker.enabled)
+
+        # When disabled, should return A_NORMAL
+        self.assertEqual(tracker.get_color("up"), curses.A_NORMAL)
+
+    def test_get_color(self):
+        """Test color selection for trends."""
+        tracker = TrendTracker(self.colors, enabled=True)
+
+        self.assertEqual(tracker.get_color("up"), curses.A_BOLD)
+        self.assertEqual(tracker.get_color("down"), curses.A_REVERSE)
+        self.assertEqual(tracker.get_color("stable"), curses.A_NORMAL)
+
+    def test_clear(self):
+        """Test clearing tracked values."""
+        tracker = TrendTracker(self.colors, enabled=True)
+
+        # Add some data
+        tracker.update("func1", "nsamples", 10)
+        tracker.update("func1", "nsamples", 20)
+
+        # Clear
+        tracker.clear()
+
+        # After clear, first update should be stable
+        self.assertEqual(tracker.update("func1", "nsamples", 30), "stable")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/Lib/test/test_pty.py b/Lib/test/test_pty.py
index a2018e86444..7e4f4828ce0 100644
--- a/Lib/test/test_pty.py
+++ b/Lib/test/test_pty.py
@@ -3,7 +3,6 @@
     is_android, is_apple_mobile, is_wasm32, reap_children, verbose, warnings_helper
 )
 from test.support.import_helper import import_module
-from test.support.os_helper import TESTFN, unlink
 
 # Skip these tests if termios is not available
 import_module('termios')
@@ -299,26 +298,27 @@ def test_master_read(self):
 
     @warnings_helper.ignore_fork_in_thread_deprecation_warnings()
     def test_spawn_doesnt_hang(self):
-        self.addCleanup(unlink, TESTFN)
-        with open(TESTFN, 'wb') as f:
-            STDOUT_FILENO = 1
-            dup_stdout = os.dup(STDOUT_FILENO)
-            os.dup2(f.fileno(), STDOUT_FILENO)
-            buf = b''
-            def master_read(fd):
-                nonlocal buf
-                data = os.read(fd, 1024)
-                buf += data
-                return data
+        # gh-140482: Do the test in a pty.fork() child to avoid messing
+        # with the interactive test runner's terminal settings.
+        pid, fd = pty.fork()
+        if pid == pty.CHILD:
+            pty.spawn([sys.executable, '-c', 'print("hi there")'])
+            os._exit(0)
+
+        try:
+            buf = bytearray()
             try:
-                pty.spawn([sys.executable, '-c', 'print("hi there")'],
-                          master_read)
-            finally:
-                os.dup2(dup_stdout, STDOUT_FILENO)
-                os.close(dup_stdout)
-        self.assertEqual(buf, b'hi there\r\n')
-        with open(TESTFN, 'rb') as f:
-            self.assertEqual(f.read(), b'hi there\r\n')
+                while (data := os.read(fd, 1024)) != b'':
+                    buf.extend(data)
+            except OSError as e:
+                if e.errno != errno.EIO:
+                    raise
+
+            (pid, status) = os.waitpid(pid, 0)
+            self.assertEqual(status, 0)
+            self.assertEqual(buf.take_bytes(), b"hi there\r\n")
+        finally:
+            os.close(fd)
 
 class SmallPtyTests(unittest.TestCase):
     """These tests don't spawn children or hang."""
diff --git a/Lib/test/test_readline.py b/Lib/test/test_readline.py
index 45192fe5082..3982686dd10 100644
--- a/Lib/test/test_readline.py
+++ b/Lib/test/test_readline.py
@@ -413,6 +413,24 @@ def test_write_read_limited_history(self):
         # So, we've only tested that the read did not fail.
         # See TestHistoryManipulation for the full test.
 
+    @unittest.skipUnless(hasattr(readline, "get_pre_input_hook"),
+                         "get_pre_input_hook not available")
+    def test_get_pre_input_hook(self):
+        # Save and restore the original hook to avoid side effects
+        original_hook = readline.get_pre_input_hook()
+        self.addCleanup(readline.set_pre_input_hook, original_hook)
+
+        # Test that get_pre_input_hook returns None when no hook is set
+        readline.set_pre_input_hook(None)
+        self.assertIsNone(readline.get_pre_input_hook())
+
+        # Set a hook and verify we can retrieve it
+        def my_hook():
+            pass
+
+        readline.set_pre_input_hook(my_hook)
+        self.assertIs(readline.get_pre_input_hook(), my_hook)
+
 
 @unittest.skipUnless(support.Py_GIL_DISABLED, 'these tests can only possibly fail with GIL disabled')
 class FreeThreadingTest(unittest.TestCase):
diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py
index 09de32f8371..ebdf5455163 100644
--- a/Lib/test/test_ssl.py
+++ b/Lib/test/test_ssl.py
@@ -5613,7 +5613,7 @@ def test_tlsalerttype(self):
         class Checked_TLSAlertType(enum.IntEnum):
             """Alert types for TLSContentType.ALERT messages
 
-            See RFC 8466, section B.2
+            See RFC 8446, section B.2
             """
             CLOSE_NOTIFY = 0
             UNEXPECTED_MESSAGE = 10
diff --git a/Lib/test/test_stable_abi_ctypes.py b/Lib/test/test_stable_abi_ctypes.py
index 7167646ecc6..2e93ac08f82 100644
--- a/Lib/test/test_stable_abi_ctypes.py
+++ b/Lib/test/test_stable_abi_ctypes.py
@@ -165,6 +165,7 @@ SYMBOL_NAMES = (
     "PyDict_MergeFromSeq2",
     "PyDict_New",
     "PyDict_Next",
+    "PyDict_SetDefaultRef",
     "PyDict_SetItem",
     "PyDict_SetItemString",
     "PyDict_Size",
@@ -900,6 +901,7 @@ SYMBOL_NAMES = (
     "Py_GetRecursionLimit",
     "Py_GetVersion",
     "Py_HasFileSystemDefaultEncoding",
+    "Py_IS_TYPE",
     "Py_IncRef",
     "Py_Initialize",
     "Py_InitializeEx",
@@ -919,6 +921,8 @@ SYMBOL_NAMES = (
     "Py_REFCNT",
     "Py_ReprEnter",
     "Py_ReprLeave",
+    "Py_SET_SIZE",
+    "Py_SIZE",
     "Py_SetPath",
     "Py_SetProgramName",
     "Py_SetPythonHome",
diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py
index f0e350c71f6..806a1e3fa30 100644
--- a/Lib/test/test_subprocess.py
+++ b/Lib/test/test_subprocess.py
@@ -957,6 +957,48 @@ def test_communicate(self):
         self.assertEqual(stdout, b"banana")
         self.assertEqual(stderr, b"pineapple")
 
+    def test_communicate_memoryview_input(self):
+        # Test memoryview input with byte elements
+        test_data = b"Hello, memoryview!"
+        mv = memoryview(test_data)
+        p = subprocess.Popen([sys.executable, "-c",
+                              'import sys; sys.stdout.write(sys.stdin.read())'],
+                             stdin=subprocess.PIPE,
+                             stdout=subprocess.PIPE)
+        self.addCleanup(p.stdout.close)
+        self.addCleanup(p.stdin.close)
+        (stdout, stderr) = p.communicate(mv)
+        self.assertEqual(stdout, test_data)
+        self.assertIsNone(stderr)
+
+    def test_communicate_memoryview_input_nonbyte(self):
+        # Test memoryview input with non-byte elements (e.g., int32)
+        # This tests the fix for gh-134453 where non-byte memoryviews
+        # had incorrect length tracking on POSIX
+        import array
+        # Create an array of 32-bit integers that's large enough to trigger
+        # the chunked writing behavior (> PIPE_BUF)
+        pipe_buf = getattr(select, 'PIPE_BUF', 512)
+        # Each 'i' element is 4 bytes, so we need more than pipe_buf/4 elements
+        # Add some extra to ensure we exceed the buffer size
+        num_elements = pipe_buf + 1
+        test_array = array.array('i', [0x64306f66 for _ in range(num_elements)])
+        expected_bytes = test_array.tobytes()
+        mv = memoryview(test_array)
+
+        p = subprocess.Popen([sys.executable, "-c",
+                              'import sys; '
+                              'data = sys.stdin.buffer.read(); '
+                              'sys.stdout.buffer.write(data)'],
+                             stdin=subprocess.PIPE,
+                             stdout=subprocess.PIPE)
+        self.addCleanup(p.stdout.close)
+        self.addCleanup(p.stdin.close)
+        (stdout, stderr) = p.communicate(mv)
+        self.assertEqual(stdout, expected_bytes,
+                         msg=f"{len(stdout)=} =? {len(expected_bytes)=}")
+        self.assertIsNone(stderr)
+
     def test_communicate_timeout(self):
         p = subprocess.Popen([sys.executable, "-c",
                               'import sys,os,time;'
@@ -992,6 +1034,62 @@ def test_communicate_timeout_large_output(self):
         (stdout, _) = p.communicate()
         self.assertEqual(len(stdout), 4 * 64 * 1024)
 
+    def test_communicate_timeout_large_input(self):
+        # Test that timeout is enforced when writing large input to a
+        # slow-to-read subprocess, and that partial input is preserved
+        # for continuation after timeout (gh-141473).
+        #
+        # This is a regression test for Windows matching POSIX behavior.
+        # On POSIX, select() is used to multiplex I/O with timeout checking.
+        # On Windows, stdin writing must also honor the timeout rather than
+        # blocking indefinitely when the pipe buffer fills.
+
+        # Input larger than typical pipe buffer (4-64KB on Windows)
+        input_data = b"x" * (128 * 1024)
+
+        p = subprocess.Popen(
+            [sys.executable, "-c",
+             "import sys, time; "
+             "time.sleep(30); "  # Don't read stdin for a long time
+             "sys.stdout.buffer.write(sys.stdin.buffer.read())"],
+            stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE)
+
+        try:
+            timeout = 0.2
+            start = time.monotonic()
+            try:
+                p.communicate(input_data, timeout=timeout)
+                # If we get here without TimeoutExpired, the timeout was ignored
+                elapsed = time.monotonic() - start
+                self.fail(
+                    f"TimeoutExpired not raised. communicate() completed in "
+                    f"{elapsed:.2f}s, but subprocess sleeps for 30s. "
+                    "Stdin writing blocked without enforcing timeout.")
+            except subprocess.TimeoutExpired:
+                elapsed = time.monotonic() - start
+
+            # Timeout should occur close to the specified timeout value,
+            # not after waiting for the subprocess to finish sleeping.
+            # Allow generous margin for slow CI, but must be well under
+            # the subprocess sleep time.
+            self.assertLess(elapsed, 5.0,
+                f"TimeoutExpired raised after {elapsed:.2f}s; expected ~{timeout}s. "
+                "Stdin writing blocked without checking timeout.")
+
+            # After timeout, continue communication. The remaining input
+            # should be sent and we should receive all data back.
+            stdout, stderr = p.communicate()
+
+            # Verify all input was eventually received by the subprocess
+            self.assertEqual(len(stdout), len(input_data),
+                f"Expected {len(input_data)} bytes output but got {len(stdout)}")
+            self.assertEqual(stdout, input_data)
+        finally:
+            p.kill()
+            p.wait()
+
     # Test for the fd leak reported in http://bugs.python.org/issue2791.
     def test_communicate_pipe_fd_leak(self):
         for stdin_pipe in (False, True):
@@ -1062,6 +1160,19 @@ def test_writes_before_communicate(self):
         self.assertEqual(stdout, b"bananasplit")
         self.assertEqual(stderr, b"")
 
+    def test_communicate_stdin_closed_before_call(self):
+        # gh-70560, gh-74389: stdin.close() before communicate()
+        # should not raise ValueError from stdin.flush()
+        with subprocess.Popen([sys.executable, "-c",
+                               'import sys; sys.exit(0)'],
+                              stdin=subprocess.PIPE,
+                              stdout=subprocess.PIPE,
+                              stderr=subprocess.PIPE) as p:
+            p.stdin.close()  # Close stdin before communicate
+            # This should not raise ValueError
+            (stdout, stderr) = p.communicate()
+            self.assertEqual(p.returncode, 0)
+
     def test_universal_newlines_and_text(self):
         args = [
             sys.executable, "-c",
@@ -1643,6 +1754,40 @@ def test_wait_negative_timeout(self):
 
             self.assertEqual(proc.wait(), 0)
 
+    def test_post_timeout_communicate_sends_input(self):
+        """GH-141473 regression test; the stdin pipe must close"""
+        with subprocess.Popen(
+                [sys.executable, "-uc", """\
+import sys
+while c := sys.stdin.read(512):
+    sys.stdout.write(c)
+print()
+"""],
+                stdin=subprocess.PIPE,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                text=True,
+        ) as proc:
+            try:
+                data = f"spam{'#'*4096}beans"
+                proc.communicate(
+                    input=data,
+                    timeout=0,
+                )
+            except subprocess.TimeoutExpired as exc:
+                pass
+            # Prior to the bugfix, this would hang as the stdin
+            # pipe to the child had not been closed.
+            try:
+                stdout, stderr = proc.communicate(timeout=15)
+            except subprocess.TimeoutExpired as exc:
+                self.fail("communicate() hung waiting on child process that should have seen its stdin pipe close and exit")
+            self.assertEqual(
+                    proc.returncode, 0,
+                    msg=f"STDERR:\n{stderr}\nSTDOUT:\n{stdout}")
+            self.assertStartsWith(stdout, "spam")
+            self.assertIn("beans", stdout)
+
 
 class RunFuncTestCase(BaseTestCase):
     def run_python(self, code, **kwargs):
diff --git a/Lib/test/test_support.py b/Lib/test/test_support.py
index d69328a6a6a..667fcc81d8e 100644
--- a/Lib/test/test_support.py
+++ b/Lib/test/test_support.py
@@ -798,10 +798,10 @@ def test_linked_to_musl(self):
             self.assertTrue(linked)
         # The value is cached, so make sure it returns the same value again.
         self.assertIs(linked, support.linked_to_musl())
-        # The unlike libc, the musl version is a triple.
+        # The musl version is either triple or just a major version number.
         if linked:
             self.assertIsInstance(linked, tuple)
-            self.assertEqual(3, len(linked))
+            self.assertIn(len(linked), (1, 3))
             for v in linked:
                 self.assertIsInstance(v, int)
 
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
index 2f169c1165d..04018e9603f 100644
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -730,14 +730,12 @@ def test_thread_info(self):
         self.assertEqual(len(info), 3)
         self.assertIn(info.name, ('nt', 'pthread', 'pthread-stubs', 'solaris', None))
         self.assertIn(info.lock, ('pymutex', None))
-        if sys.platform.startswith(("linux", "android", "freebsd")):
+        if sys.platform.startswith(("linux", "android", "freebsd", "wasi")):
             self.assertEqual(info.name, "pthread")
         elif sys.platform == "win32":
             self.assertEqual(info.name, "nt")
         elif sys.platform == "emscripten":
             self.assertIn(info.name, {"pthread", "pthread-stubs"})
-        elif sys.platform == "wasi":
-            self.assertEqual(info.name, "pthread-stubs")
 
     def test_abi_info(self):
         info = sys.abi_info
@@ -2256,7 +2254,7 @@ def frame_3_jit() -> None:
                 # 1 extra iteration for tracing.
                 for i in range(_testinternalcapi.TIER2_THRESHOLD + 2):
                     # Careful, doing this in the reverse order breaks tracing:
-                    expected = {enabled} and i >= _testinternalcapi.TIER2_THRESHOLD + 1
+                    expected = {enabled} and i >= _testinternalcapi.TIER2_THRESHOLD
                     assert sys._jit.is_active() is expected
                     frame_2_jit(expected)
                     assert sys._jit.is_active() is expected
diff --git a/Lib/test/test_sysconfig.py b/Lib/test/test_sysconfig.py
index 8101657b04a..502103ce629 100644
--- a/Lib/test/test_sysconfig.py
+++ b/Lib/test/test_sysconfig.py
@@ -20,7 +20,7 @@
 )
 from test.support.import_helper import import_module
 from test.support.os_helper import (TESTFN, unlink, skip_unless_symlink,
-                                    change_cwd)
+                                    change_cwd, EnvironmentVarGuard)
 from test.support.venv import VirtualEnvironmentMixin
 
 import sysconfig
@@ -807,7 +807,9 @@ def test_parse_makefile_renamed_vars(self):
             print("PY_LDFLAGS=-lm", file=makefile)
             print("var2=$(LDFLAGS)", file=makefile)
             print("var3=$(CPPFLAGS)", file=makefile)
-        vars = _parse_makefile(TESTFN)
+        with EnvironmentVarGuard() as env:
+            env.clear()
+            vars = _parse_makefile(TESTFN)
         self.assertEqual(vars, {
             'var1': '-Wall',
             'CFLAGS': '-Wall',
diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py
index bf57867a871..d107ad92594 100644
--- a/Lib/test/test_traceback.py
+++ b/Lib/test/test_traceback.py
@@ -1784,6 +1784,23 @@ def test_keyword_suggestions_from_command_string(self):
                 stderr_text = stderr.decode('utf-8')
                 self.assertIn(f"Did you mean '{expected_kw}'", stderr_text)
 
+    def test_no_keyword_suggestion_for_comma_errors(self):
+        # When the parser identifies a missing comma, don't suggest
+        # bogus keyword replacements like 'print' -> 'not'
+        code = '''\
+import sys
+print(
+    "line1"
+    "line2"
+    file=sys.stderr
+)
+'''
+        source = textwrap.dedent(code).strip()
+        rc, stdout, stderr = assert_python_failure('-c', source)
+        stderr_text = stderr.decode('utf-8')
+        self.assertIn("Perhaps you forgot a comma", stderr_text)
+        self.assertNotIn("Did you mean", stderr_text)
+
 @requires_debug_ranges()
 @force_not_colorized_test_class
 class PurePythonTracebackErrorCaretTests(
@@ -5051,7 +5068,7 @@ def test_no_site_package_flavour(self):
              b"or to enable your virtual environment?"), stderr
         )
 
-    def test_missing_stdlib_package(self):
+    def test_missing_stdlib_module(self):
         code = """
             import sys
             sys.stdlib_module_names |= {'spam'}
@@ -5061,6 +5078,27 @@ def test_missing_stdlib_package(self):
 
         self.assertIn(b"Standard library module 'spam' was not found", stderr)
 
+        code = """
+            import sys
+            import traceback
+            traceback._MISSING_STDLIB_MODULE_MESSAGES = {'spam': "Install 'spam4life' for 'spam'"}
+            sys.stdlib_module_names |= {'spam'}
+            import spam
+        """
+        _, _, stderr = assert_python_failure('-S', '-c', code)
+
+        self.assertIn(b"Install 'spam4life' for 'spam'", stderr)
+
+    @unittest.skipIf(sys.platform == "win32", "Non-Windows test")
+    def test_windows_only_module_error(self):
+        try:
+            import msvcrt  # noqa: F401
+        except ModuleNotFoundError:
+            formatted = traceback.format_exc()
+            self.assertIn("Unsupported platform for Windows-only standard library module 'msvcrt'", formatted)
+        else:
+            self.fail("ModuleNotFoundError was not raised")
+
 
 class TestColorizedTraceback(unittest.TestCase):
     maxDiff = None
diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py
index 1edb5dde998..6887a5e5cc4 100644
--- a/Lib/test/test_zipfile/test_core.py
+++ b/Lib/test/test_zipfile/test_core.py
@@ -2531,6 +2531,10 @@ def test_decompress_without_3rd_party_library(self):
 
     @requires_zlib()
     def test_full_overlap_different_names(self):
+        # The ZIP file contains two central directory entries with
+        # different names which refer to the same local header.
+        # The name of the local header matches the name of the first
+        # central directory entry.
         data = (
             b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
             b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00b\xed'
@@ -2560,6 +2564,10 @@ def test_full_overlap_different_names(self):
 
     @requires_zlib()
     def test_full_overlap_different_names2(self):
+        # The ZIP file contains two central directory entries with
+        # different names which refer to the same local header.
+        # The name of the local header matches the name of the second
+        # central directory entry.
         data = (
             b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
             b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00a\xed'
@@ -2591,6 +2599,8 @@ def test_full_overlap_different_names2(self):
 
     @requires_zlib()
     def test_full_overlap_same_name(self):
+        # The ZIP file contains two central directory entries with
+        # the same name which refer to the same local header.
         data = (
             b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e'
             b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00a\xed'
@@ -2623,6 +2633,8 @@ def test_full_overlap_same_name(self):
 
     @requires_zlib()
     def test_quoted_overlap(self):
+        # The ZIP file contains two files. The second local header
+        # is contained in the range of the first file.
         data = (
             b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05Y\xfc'
             b'8\x044\x00\x00\x00(\x04\x00\x00\x01\x00\x00\x00a\x00'
@@ -2654,6 +2666,7 @@ def test_quoted_overlap(self):
 
     @requires_zlib()
     def test_overlap_with_central_dir(self):
+        # The local header offset is equal to the central directory offset.
         data = (
             b'PK\x01\x02\x14\x03\x14\x00\x00\x00\x08\x00G_|Z'
             b'\xe2\x1e8\xbb\x0b\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00'
@@ -2668,11 +2681,15 @@ def test_overlap_with_central_dir(self):
             self.assertEqual(zi.header_offset, 0)
             self.assertEqual(zi.compress_size, 11)
             self.assertEqual(zi.file_size, 1033)
+            # Found central directory signature PK\x01\x02 instead of
+            # local header signature PK\x03\x04.
             with self.assertRaisesRegex(zipfile.BadZipFile, 'Bad magic number'):
                 zipf.read('a')
 
     @requires_zlib()
     def test_overlap_with_archive_comment(self):
+        # The local header is written after the central directory,
+        # in the archive comment.
         data = (
             b'PK\x01\x02\x14\x03\x14\x00\x00\x00\x08\x00G_|Z'
             b'\xe2\x1e8\xbb\x0b\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00'
diff --git a/Lib/traceback.py b/Lib/traceback.py
index 9b4b8c7d566..c1052adeed2 100644
--- a/Lib/traceback.py
+++ b/Lib/traceback.py
@@ -14,6 +14,11 @@
 
 from contextlib import suppress
 
+try:
+    from _missing_stdlib_info import _MISSING_STDLIB_MODULE_MESSAGES
+except ImportError:
+    _MISSING_STDLIB_MODULE_MESSAGES = {}
+
 __all__ = ['extract_stack', 'extract_tb', 'format_exception',
            'format_exception_only', 'format_list', 'format_stack',
            'format_tb', 'print_exc', 'format_exc', 'print_exception',
@@ -1110,7 +1115,11 @@ def __init__(self, exc_type, exc_value, exc_traceback, *, limit=None,
         elif exc_type and issubclass(exc_type, ModuleNotFoundError):
             module_name = getattr(exc_value, "name", None)
             if module_name in sys.stdlib_module_names:
-                self._str = f"Standard library module '{module_name}' was not found"
+                message = _MISSING_STDLIB_MODULE_MESSAGES.get(
+                    module_name,
+                    f"Standard library module {module_name!r} was not found"
+                )
+                self._str = message
             elif sys.flags.no_site:
                 self._str += (". Site initialization is disabled, did you forget to "
                     + "add the site-packages directory to sys.path "
@@ -1331,6 +1340,15 @@ def _find_keyword_typos(self):
         if len(error_code) > 1024:
             return
 
+        # If the original code doesn't raise SyntaxError, we can't validate
+        # that a keyword replacement actually fixes anything
+        try:
+            codeop.compile_command(error_code, symbol="exec", flags=codeop.PyCF_ONLY_AST)
+        except SyntaxError:
+            pass  # Good - the original code has a syntax error we might fix
+        else:
+            return  # Original code compiles or is incomplete - can't validate fixes
+
         error_lines = error_code.splitlines()
         tokens = tokenize.generate_tokens(io.StringIO(error_code).readline)
         tokens_left_to_process = 10
diff --git a/Lib/wave.py b/Lib/wave.py
index 056bd6aab7f..25ca9ef168e 100644
--- a/Lib/wave.py
+++ b/Lib/wave.py
@@ -97,7 +97,7 @@ def _byteswap(data, width):
         for j in range(width):
             swapped_data[i + width - 1 - j] = data[i + j]
 
-    return bytes(swapped_data)
+    return swapped_data.take_bytes()
 
 
 class _Chunk:
diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py
index db51f350ea0..0a2ccc00f18 100644
--- a/Lib/xml/dom/minidom.py
+++ b/Lib/xml/dom/minidom.py
@@ -292,13 +292,6 @@ def _append_child(self, node):
     childNodes.append(node)
     node.parentNode = self
 
-def _in_document(node):
-    # return True iff node is part of a document tree
-    while node is not None:
-        if node.nodeType == Node.DOCUMENT_NODE:
-            return True
-        node = node.parentNode
-    return False
 
 def _write_data(writer, text, attr):
     "Writes datachars to writer."
@@ -1555,7 +1548,7 @@ def _clear_id_cache(node):
     if node.nodeType == Node.DOCUMENT_NODE:
         node._id_cache.clear()
         node._id_search_stack = None
-    elif _in_document(node):
+    elif node.ownerDocument:
         node.ownerDocument._id_cache.clear()
         node.ownerDocument._id_search_stack= None
 
diff --git a/Makefile.pre.in b/Makefile.pre.in
index dd28ff5d2a3..f3086ec1462 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -1604,6 +1604,11 @@ sharedmods: $(SHAREDMODS) pybuilddir.txt
 # dependency on BUILDPYTHON ensures that the target is run last
 .PHONY: checksharedmods
 checksharedmods: sharedmods $(PYTHON_FOR_BUILD_DEPS) $(BUILDPYTHON)
+	@if [ -n "@MISSING_STDLIB_CONFIG@" ]; then \
+		$(RUNSHARED) $(PYTHON_FOR_BUILD) $(srcdir)/Tools/build/check_extension_modules.py --generate-missing-stdlib-info --with-missing-stdlib-config="@MISSING_STDLIB_CONFIG@"; \
+	else \
+		$(RUNSHARED) $(PYTHON_FOR_BUILD) $(srcdir)/Tools/build/check_extension_modules.py --generate-missing-stdlib-info; \
+	fi
 	@$(RUNSHARED) $(PYTHON_FOR_BUILD) $(srcdir)/Tools/build/check_extension_modules.py
 
 .PHONY: rundsymutil
@@ -2573,6 +2578,10 @@ LIBSUBDIRS=	asyncio \
 		profile \
 		profiling profiling/sampling profiling/tracing \
 		profiling/sampling/_assets \
+		profiling/sampling/_heatmap_assets \
+		profiling/sampling/_flamegraph_assets \
+		profiling/sampling/_shared_assets \
+		profiling/sampling/live_collector \
 		profiling/sampling/_vendor/d3/7.8.5 \
 		profiling/sampling/_vendor/d3-flame-graph/4.1.3 \
 		pydoc_data \
@@ -2692,6 +2701,7 @@ TESTSUBDIRS=	idlelib/idle_test \
 		test/test_pathlib/support \
 		test/test_peg_generator \
 		test/test_profiling \
+		test/test_profiling/test_sampling_profiler \
 		test/test_pydoc \
 		test/test_pyrepl \
 		test/test_string \
@@ -2818,6 +2828,7 @@ libinstall:	all $(srcdir)/Modules/xxmodule.c
 	$(INSTALL_DATA) `cat pybuilddir.txt`/_sysconfigdata_$(ABIFLAGS)_$(MACHDEP)_$(MULTIARCH).py $(DESTDIR)$(LIBDEST); \
 	$(INSTALL_DATA) `cat pybuilddir.txt`/_sysconfig_vars_$(ABIFLAGS)_$(MACHDEP)_$(MULTIARCH).json $(DESTDIR)$(LIBDEST); \
 	$(INSTALL_DATA) `cat pybuilddir.txt`/build-details.json $(DESTDIR)$(LIBDEST); \
+	$(INSTALL_DATA) `cat pybuilddir.txt`/_missing_stdlib_info.py $(DESTDIR)$(LIBDEST); \
 	$(INSTALL_DATA) $(srcdir)/LICENSE $(DESTDIR)$(LIBDEST)/LICENSE.txt
 	@ # If app store compliance has been configured, apply the patch to the
 	@ # installed library code. The patch has been previously validated against
@@ -3049,6 +3060,9 @@ frameworkinstallunversionedstructure:	$(LDLIBRARY)
 	$(INSTALL) -d -m $(DIRMODE) $(DESTDIR)$(PYTHONFRAMEWORKINSTALLDIR)
 	sed 's/%VERSION%/'"`$(RUNSHARED) $(PYTHON_FOR_BUILD) -c 'import platform; print(platform.python_version())'`"'/g' < $(RESSRCDIR)/Info.plist > $(DESTDIR)$(PYTHONFRAMEWORKINSTALLDIR)/Info.plist
 	$(INSTALL_SHARED) $(LDLIBRARY) $(DESTDIR)$(PYTHONFRAMEWORKPREFIX)/$(LDLIBRARY)
+	$(INSTALL) -d -m $(DIRMODE) $(DESTDIR)$(LIBDIR)
+	$(LN) -fs "../$(LDLIBRARY)" "$(DESTDIR)$(prefix)/lib/libpython$(LDVERSION).dylib"
+	$(LN) -fs "../$(LDLIBRARY)" "$(DESTDIR)$(prefix)/lib/libpython$(VERSION).dylib"
 	$(INSTALL) -d -m $(DIRMODE) $(DESTDIR)$(BINDIR)
 	for file in $(srcdir)/$(RESSRCDIR)/bin/* ; do \
 		$(INSTALL) -m $(EXEMODE) $$file $(DESTDIR)$(BINDIR); \
@@ -3308,6 +3322,11 @@ check-c-globals:
 		--format summary \
 		--traceback
 
+# Check for undocumented C APIs.
+.PHONY: check-c-api-docs
+check-c-api-docs:
+	$(PYTHON_FOR_REGEN) $(srcdir)/Tools/check-c-api-docs/main.py
+
 # Find files with funny names
 .PHONY: funny
 funny:
@@ -3380,6 +3399,7 @@ MODULE__DECIMAL_DEPS=@LIBMPDEC_INTERNAL@
 MODULE__ELEMENTTREE_DEPS=$(srcdir)/Modules/pyexpat.c @LIBEXPAT_INTERNAL@
 MODULE__HASHLIB_DEPS=$(srcdir)/Modules/hashlib.h
 MODULE__IO_DEPS=$(srcdir)/Modules/_io/_iomodule.h
+MODULE__REMOTE_DEBUGGING_DEPS=$(srcdir)/Modules/_remote_debugging/_remote_debugging.h
 
 # HACL*-based cryptographic primitives
 MODULE__MD5_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_MD5_HEADERS) $(LIBHACL_MD5_LIB_@LIBHACL_LDEPS_LIBTYPE@)
diff --git a/Misc/ACKS b/Misc/ACKS
index f5f15f2eb7e..e3927ff0b33 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -927,6 +927,7 @@ Kristján Valur Jónsson
 Jens B. Jorgensen
 John Jorgensen
 Sijin Joseph
+Paresh Joshi
 Andreas Jung
 Tattoo Mabonzo K.
 Sarah K.
@@ -2118,6 +2119,7 @@ Xiang Zhang
 Robert Xiao
 Florent Xicluna
 Yanbo, Xie
+Kaisheng Xu
 Xinhang Xu
 Arnon Yaari
 Alakshendra Yadav
diff --git a/Misc/NEWS.d/3.10.0a1.rst b/Misc/NEWS.d/3.10.0a1.rst
index f09842f1e77..473e7c7ac0f 100644
--- a/Misc/NEWS.d/3.10.0a1.rst
+++ b/Misc/NEWS.d/3.10.0a1.rst
@@ -3275,8 +3275,8 @@ Types created with :c:func:`PyType_FromSpec` now make any signature in their
 .. nonce: u6Xfr2
 .. section: C API
 
-Fix bug in PyOS_mystrnicmp and PyOS_mystricmp that incremented pointers
-beyond the end of a string.
+Fix bug in :c:func:`PyOS_mystrnicmp` and :c:func:`PyOS_mystricmp` that
+incremented pointers beyond the end of a string.
 
 ..
 
diff --git a/Misc/NEWS.d/3.11.0a1.rst b/Misc/NEWS.d/3.11.0a1.rst
index c7e57754ee3..10ef2968db2 100644
--- a/Misc/NEWS.d/3.11.0a1.rst
+++ b/Misc/NEWS.d/3.11.0a1.rst
@@ -4931,7 +4931,7 @@ Patch by Gabriele N. Tornetta
 .. nonce: 3p14JB
 .. section: C API
 
-:c:func:`Py_RunMain` now resets :c:data:`!PyImport_Inittab` to its initial
+:c:func:`Py_RunMain` now resets :c:data:`PyImport_Inittab` to its initial
 value at exit. It must be possible to call :c:func:`PyImport_AppendInittab`
 or :c:func:`PyImport_ExtendInittab` at each Python initialization. Patch by
 Victor Stinner.
diff --git a/Misc/NEWS.d/3.12.0a2.rst b/Misc/NEWS.d/3.12.0a2.rst
index bc028f30636..20e27c0d92f 100644
--- a/Misc/NEWS.d/3.12.0a2.rst
+++ b/Misc/NEWS.d/3.12.0a2.rst
@@ -35,11 +35,11 @@ Update bundled libexpat to 2.5.0
 .. nonce: ik4iOv
 .. section: Core and Builtins
 
-The docs clearly say that ``PyImport_Inittab``,
+The docs clearly say that :c:data:`PyImport_Inittab`,
 :c:func:`PyImport_AppendInittab`, and :c:func:`PyImport_ExtendInittab`
 should not be used after :c:func:`Py_Initialize` has been called. We now
 enforce this for the two functions.  Additionally, the runtime now uses an
-internal copy of ``PyImport_Inittab``, to guard against modification.
+internal copy of :c:data:`PyImport_Inittab`, to guard against modification.
 
 ..
 
diff --git a/Misc/NEWS.d/3.15.0a2.rst b/Misc/NEWS.d/3.15.0a2.rst
index ba82c854fac..4e3a62b0f4a 100644
--- a/Misc/NEWS.d/3.15.0a2.rst
+++ b/Misc/NEWS.d/3.15.0a2.rst
@@ -388,7 +388,7 @@ Add :func:`os.reload_environ` to ``os.__all__``.
 .. nonce: L13UCV
 .. section: Library
 
-Fix :func:`profiling.sampling.sample` incorrectly handling a
+Fix ``profiling.sampling.sample()`` incorrectly handling a
 :exc:`FileNotFoundError` or :exc:`PermissionError`.
 
 ..
@@ -1451,7 +1451,7 @@ under no memory.
 .. section: Core and Builtins
 
 Update :class:`bytearray` to use a :class:`bytes` under the hood as its
-buffer and add :func:`bytearray.take_bytes` to take it out.
+buffer and add :meth:`bytearray.take_bytes` to take it out.
 
 ..
 
diff --git a/Misc/NEWS.d/next/Build/2025-10-30-10-36-15.gh-issue-139707.QJ1FfJ.rst b/Misc/NEWS.d/next/Build/2025-10-30-10-36-15.gh-issue-139707.QJ1FfJ.rst
new file mode 100644
index 00000000000..d9870d26704
--- /dev/null
+++ b/Misc/NEWS.d/next/Build/2025-10-30-10-36-15.gh-issue-139707.QJ1FfJ.rst
@@ -0,0 +1,4 @@
+Add configure option :option:`--with-missing-stdlib-config=FILE` allows
+which distributors to pass a `JSON <https://www.json.org/json-en.html>`_
+configuration file containing custom error messages for missing
+:term:`standard library` modules.
diff --git a/Misc/NEWS.d/next/Build/2025-11-19-09-21-17.gh-issue-141172.cYWc4x.rst b/Misc/NEWS.d/next/Build/2025-11-19-09-21-17.gh-issue-141172.cYWc4x.rst
new file mode 100644
index 00000000000..7cade1ebd41
--- /dev/null
+++ b/Misc/NEWS.d/next/Build/2025-11-19-09-21-17.gh-issue-141172.cYWc4x.rst
@@ -0,0 +1 @@
+Update to WASI SDK 29.
diff --git a/Misc/NEWS.d/next/Build/2025-11-20-17-01-05.gh-issue-141784.LkYI2n.rst b/Misc/NEWS.d/next/Build/2025-11-20-17-01-05.gh-issue-141784.LkYI2n.rst
new file mode 100644
index 00000000000..f20d8409416
--- /dev/null
+++ b/Misc/NEWS.d/next/Build/2025-11-20-17-01-05.gh-issue-141784.LkYI2n.rst
@@ -0,0 +1,4 @@
+Fix ``_remote_debugging_module.c`` compilation on 32-bit Linux. Include
+Python.h before system headers to make sure that
+``_remote_debugging_module.c`` uses the same types (ABI) than Python. Patch
+by Victor Stinner.
diff --git a/Misc/NEWS.d/next/Build/2025-11-25-13-17-47.gh-issue-141926.KmuM2h.rst b/Misc/NEWS.d/next/Build/2025-11-25-13-17-47.gh-issue-141926.KmuM2h.rst
new file mode 100644
index 00000000000..dab79ba5cf9
--- /dev/null
+++ b/Misc/NEWS.d/next/Build/2025-11-25-13-17-47.gh-issue-141926.KmuM2h.rst
@@ -0,0 +1,4 @@
+``RUNSHARED`` is no longer cleared when cross-compiling. Previously,
+``RUNSHARED`` was cleared when cross-compiling, which breaks PGO when using
+``--enabled-shared`` on systems where the cross-compiled CPython is otherwise
+executable (e.g., via transparent emulation).
diff --git a/Misc/NEWS.d/next/Build/2025-11-28-19-49-01.gh-issue-141808.cV5K12.rst b/Misc/NEWS.d/next/Build/2025-11-28-19-49-01.gh-issue-141808.cV5K12.rst
new file mode 100644
index 00000000000..3162c7c4141
--- /dev/null
+++ b/Misc/NEWS.d/next/Build/2025-11-28-19-49-01.gh-issue-141808.cV5K12.rst
@@ -0,0 +1 @@
+Do not generate the jit stencils twice in case of PGO builds on Windows.
diff --git a/Misc/NEWS.d/next/Build/2025-11-28-21-43-07.gh-issue-142050.PFi4tv.rst b/Misc/NEWS.d/next/Build/2025-11-28-21-43-07.gh-issue-142050.PFi4tv.rst
new file mode 100644
index 00000000000..8917d5df76e
--- /dev/null
+++ b/Misc/NEWS.d/next/Build/2025-11-28-21-43-07.gh-issue-142050.PFi4tv.rst
@@ -0,0 +1 @@
+Fixed a bug where JIT stencils produced on Windows contained debug data. Patch by Chris Eibl.
diff --git a/Misc/NEWS.d/next/Build/2025-12-03-10-44-42.gh-issue-142234.i1kaFb.rst b/Misc/NEWS.d/next/Build/2025-12-03-10-44-42.gh-issue-142234.i1kaFb.rst
new file mode 100644
index 00000000000..a586512fc0a
--- /dev/null
+++ b/Misc/NEWS.d/next/Build/2025-12-03-10-44-42.gh-issue-142234.i1kaFb.rst
@@ -0,0 +1,3 @@
+Allow ``--enable-wasm-dynamic-linking`` for WASI. While CPython doesn't
+directly support it so external/downstream users do not have to patch in
+support for the flag.
diff --git a/Misc/NEWS.d/next/C_API/2025-09-22-16-32-00.gh-issue-139165.6Czn7S.rst b/Misc/NEWS.d/next/C_API/2025-09-22-16-32-00.gh-issue-139165.6Czn7S.rst
new file mode 100644
index 00000000000..039c25b8ce2
--- /dev/null
+++ b/Misc/NEWS.d/next/C_API/2025-09-22-16-32-00.gh-issue-139165.6Czn7S.rst
@@ -0,0 +1,2 @@
+Expose the functions :c:func:`Py_SIZE`, :c:func:`Py_IS_TYPE` and
+:c:func:`Py_SET_SIZE` in the Stable ABI.
diff --git a/Misc/NEWS.d/next/C_API/2025-11-05-21-48-31.gh-issue-141070.mkrhjQ.rst b/Misc/NEWS.d/next/C_API/2025-11-05-21-48-31.gh-issue-141070.mkrhjQ.rst
new file mode 100644
index 00000000000..39cfcf73404
--- /dev/null
+++ b/Misc/NEWS.d/next/C_API/2025-11-05-21-48-31.gh-issue-141070.mkrhjQ.rst
@@ -0,0 +1,2 @@
+Add :c:func:`PyUnstable_Object_Dump` to dump an object to ``stderr``. It should
+only be used for debugging. Patch by Victor Stinner.
diff --git a/Misc/NEWS.d/next/C_API/2025-11-18-04-16-09.gh-issue-140042.S1C7id.rst b/Misc/NEWS.d/next/C_API/2025-11-18-04-16-09.gh-issue-140042.S1C7id.rst
new file mode 100644
index 00000000000..608e806b431
--- /dev/null
+++ b/Misc/NEWS.d/next/C_API/2025-11-18-04-16-09.gh-issue-140042.S1C7id.rst
@@ -0,0 +1 @@
+Removed the sqlite3_shutdown call that could cause closing connections for sqlite when used with multiple sub interpreters.
diff --git a/Misc/NEWS.d/next/C_API/2025-11-18-18-36-15.gh-issue-141726.ILrhyK.rst b/Misc/NEWS.d/next/C_API/2025-11-18-18-36-15.gh-issue-141726.ILrhyK.rst
new file mode 100644
index 00000000000..3fdad5c6b3e
--- /dev/null
+++ b/Misc/NEWS.d/next/C_API/2025-11-18-18-36-15.gh-issue-141726.ILrhyK.rst
@@ -0,0 +1 @@
+Add :c:func:`PyDict_SetDefaultRef` to the Stable ABI.
diff --git a/Misc/NEWS.d/next/C_API/2025-11-21-10-34-00.gh-issue-137422.tzZKLi.rst b/Misc/NEWS.d/next/C_API/2025-11-21-10-34-00.gh-issue-137422.tzZKLi.rst
new file mode 100644
index 00000000000..656289663cf
--- /dev/null
+++ b/Misc/NEWS.d/next/C_API/2025-11-21-10-34-00.gh-issue-137422.tzZKLi.rst
@@ -0,0 +1,4 @@
+Fix :term:`free threading` race condition in
+:c:func:`PyImport_AddModuleRef`. It was previously possible for two calls to
+the function return two different objects, only one of which was stored in
+:data:`sys.modules`.
diff --git a/Misc/NEWS.d/next/C_API/2025-12-01-18-17-16.gh-issue-142163.2HiX5A.rst b/Misc/NEWS.d/next/C_API/2025-12-01-18-17-16.gh-issue-142163.2HiX5A.rst
new file mode 100644
index 00000000000..5edcfd81992
--- /dev/null
+++ b/Misc/NEWS.d/next/C_API/2025-12-01-18-17-16.gh-issue-142163.2HiX5A.rst
@@ -0,0 +1,2 @@
+Fix the ``HAVE_THREAD_LOCAL`` macro being defined without the
+``Py_BUILD_CORE`` macro set after including :file:`Python.h`.
diff --git a/Misc/NEWS.d/next/C_API/2025-12-03-16-35-24.gh-issue-142225.vmCJoo.rst b/Misc/NEWS.d/next/C_API/2025-12-03-16-35-24.gh-issue-142225.vmCJoo.rst
new file mode 100644
index 00000000000..1eaf5b713d9
--- /dev/null
+++ b/Misc/NEWS.d/next/C_API/2025-12-03-16-35-24.gh-issue-142225.vmCJoo.rst
@@ -0,0 +1 @@
+Fixed the :c:macro:`PyABIInfo_VAR` macro.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-10-00-14-20.gh-issue-116738.IxliC_.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-10-00-14-20.gh-issue-116738.IxliC_.rst
new file mode 100644
index 00000000000..8b08bccafd7
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-10-00-14-20.gh-issue-116738.IxliC_.rst
@@ -0,0 +1,2 @@
+Make csv module thread-safe on the :term:`free threaded <free threading>`
+build.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-15-14-04-35.gh-issue-141589.VfdMDD.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-15-14-04-35.gh-issue-141589.VfdMDD.rst
new file mode 100644
index 00000000000..5eb0e0c8b89
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-15-14-04-35.gh-issue-141589.VfdMDD.rst
@@ -0,0 +1,3 @@
+Change ``backoff counter`` to use prime numbers instead of powers of 2.
+Use only 3 bits for ``counter`` and 13 bits for ``value``.
+This allows to support values up to 8191. Patch by Mikhail Efimov.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-15-23-58-23.gh-issue-139103.9cVYJ0.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-15-23-58-23.gh-issue-139103.9cVYJ0.rst
new file mode 100644
index 00000000000..c038dc742cc
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-15-23-58-23.gh-issue-139103.9cVYJ0.rst
@@ -0,0 +1 @@
+Improve multithreaded scaling of dataclasses on the free-threaded build.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-16-21-14-48.gh-issue-41779.rXIj5h.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-16-21-14-48.gh-issue-41779.rXIj5h.rst
new file mode 100644
index 00000000000..8ba3ca8df86
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-16-21-14-48.gh-issue-41779.rXIj5h.rst
@@ -0,0 +1,2 @@
+Allowed defining the *__dict__* and *__weakref__* :ref:`__slots__ <slots>`
+for any class.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-17-14-40-45.gh-issue-139653.LzOy1M.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-17-14-40-45.gh-issue-139653.LzOy1M.rst
new file mode 100644
index 00000000000..c3ae0e8adab
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-17-14-40-45.gh-issue-139653.LzOy1M.rst
@@ -0,0 +1,4 @@
+Only raise a ``RecursionError`` or trigger a fatal error if the stack
+pointer is both below the limit pointer *and* above the stack base. If
+outside of these bounds assume that it is OK. This prevents false positives
+when user-space threads swap stacks.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-18-07-45-37.gh-issue-140638.i06qxD.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-18-07-45-37.gh-issue-140638.i06qxD.rst
new file mode 100644
index 00000000000..891e24d7644
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-18-07-45-37.gh-issue-140638.i06qxD.rst
@@ -0,0 +1,2 @@
+Expose a ``"duration"`` stat in :func:`gc.get_stats` and
+:data:`gc.callbacks`.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-19-16-40-24.gh-issue-141732.PTetqp.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-19-16-40-24.gh-issue-141732.PTetqp.rst
new file mode 100644
index 00000000000..08420fd5f4d
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-19-16-40-24.gh-issue-141732.PTetqp.rst
@@ -0,0 +1,2 @@
+Ensure the :meth:`~object.__repr__` for :exc:`ExceptionGroup` and :exc:`BaseExceptionGroup` does
+not change when the exception sequence that was original passed in to its constructor is subsequently mutated.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-20-13-18-57.gh-issue-141780.xDrVNr.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-20-13-18-57.gh-issue-141780.xDrVNr.rst
new file mode 100644
index 00000000000..8700ac14824
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-20-13-18-57.gh-issue-141780.xDrVNr.rst
@@ -0,0 +1,2 @@
+Fix :c:macro:`Py_mod_gil` with API added in :pep:`793`:
+:c:func:`!PyModule_FromSlotsAndSpec` and ``PyModExport`` hooks
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-20-22-09-22.gh-issue-140638.f6btj0.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-20-22-09-22.gh-issue-140638.f6btj0.rst
new file mode 100644
index 00000000000..e3af941523c
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-20-22-09-22.gh-issue-140638.f6btj0.rst
@@ -0,0 +1,2 @@
+Expose a ``"candidates"`` stat in :func:`gc.get_stats` and
+:data:`gc.callbacks`.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-22-10-43-26.gh-issue-120158.41_rXd.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-22-10-43-26.gh-issue-120158.41_rXd.rst
new file mode 100644
index 00000000000..b3b5f252ac0
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-22-10-43-26.gh-issue-120158.41_rXd.rst
@@ -0,0 +1,2 @@
+Fix inconsistent state when enabling or disabling monitoring events too many
+times.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-24-16-07-57.gh-issue-138122.m3EF9E.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-24-16-07-57.gh-issue-138122.m3EF9E.rst
new file mode 100644
index 00000000000..a4a29e40027
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-24-16-07-57.gh-issue-138122.m3EF9E.rst
@@ -0,0 +1,6 @@
+Add incomplete sample detection to prevent corrupted profiling data. Each
+thread state now contains an embedded base frame (sentinel at the bottom of
+the frame stack) with owner type ``FRAME_OWNED_BY_INTERPRETER``. The profiler
+validates that stack unwinding terminates at this sentinel frame. Samples that
+fail to reach the base frame (due to race conditions, memory corruption, or
+other errors) are now rejected rather than being included as spurious data.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-24-21-09-30.gh-issue-141930.hIIzSd.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-24-21-09-30.gh-issue-141930.hIIzSd.rst
new file mode 100644
index 00000000000..06a12f98224
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-24-21-09-30.gh-issue-141930.hIIzSd.rst
@@ -0,0 +1,2 @@
+When importing a module, use Python's regular file object to ensure that
+writes to ``.pyc`` files are complete or an appropriate error is raised.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-25-02-23-31.gh-issue-141861.QcMdcM.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-25-02-23-31.gh-issue-141861.QcMdcM.rst
new file mode 100644
index 00000000000..4a115669998
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-25-02-23-31.gh-issue-141861.QcMdcM.rst
@@ -0,0 +1 @@
+Fix invalid memory read in the ``ENTER_EXECUTOR`` instruction.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-25-13-13-34.gh-issue-116738.MnZRdV.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-25-13-13-34.gh-issue-116738.MnZRdV.rst
new file mode 100644
index 00000000000..151f8968292
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-25-13-13-34.gh-issue-116738.MnZRdV.rst
@@ -0,0 +1,2 @@
+Fix thread safety issue with :mod:`re` scanner objects in free-threaded
+builds.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-12-01-10-03-08.gh-issue-116738.972YsG.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-01-10-03-08.gh-issue-116738.972YsG.rst
new file mode 100644
index 00000000000..d6d9d02b017
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-01-10-03-08.gh-issue-116738.972YsG.rst
@@ -0,0 +1,2 @@
+Fix :mod:`cmath` data race when initializing trigonometric tables with
+subinterpreters.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-12-01-20-41-26.gh-issue-142048.c2YosX.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-01-20-41-26.gh-issue-142048.c2YosX.rst
new file mode 100644
index 00000000000..1400dae13ff
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-01-20-41-26.gh-issue-142048.c2YosX.rst
@@ -0,0 +1,2 @@
+Fix quadratically increasing garbage collection delays in free-threaded
+build.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-12-02-21-11-46.gh-issue-141976.yu7pDV.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-02-21-11-46.gh-issue-141976.yu7pDV.rst
new file mode 100644
index 00000000000..f77315b7c37
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-02-21-11-46.gh-issue-141976.yu7pDV.rst
@@ -0,0 +1 @@
+Check against abstract stack overflow in the JIT optimizer.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-12-03-11-03-35.gh-issue-142218.44Fq_J.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-03-11-03-35.gh-issue-142218.44Fq_J.rst
new file mode 100644
index 00000000000..a8ce0fc6526
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-03-11-03-35.gh-issue-142218.44Fq_J.rst
@@ -0,0 +1,2 @@
+Fix crash when inserting into a split table dictionary with a non
+:class:`str` key that matches an existing key.
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-12-06-00-16-43.gh-issue-142236.m3EF9E.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-06-00-16-43.gh-issue-142236.m3EF9E.rst
new file mode 100644
index 00000000000..b5c6a27fd6a
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-06-00-16-43.gh-issue-142236.m3EF9E.rst
@@ -0,0 +1,4 @@
+Fix incorrect keyword suggestions for syntax errors in :mod:`traceback`. The
+keyword typo suggestion mechanism would incorrectly suggest replacements when
+the extracted source code was incomplete rather than containing an actual typo.
+Patch by Pablo Galindo.
diff --git a/Misc/NEWS.d/next/Documentation/2025-11-26-23-30-09.gh-issue-141994.arBEG6.rst b/Misc/NEWS.d/next/Documentation/2025-11-26-23-30-09.gh-issue-141994.arBEG6.rst
new file mode 100644
index 00000000000..c370e8a86e1
--- /dev/null
+++ b/Misc/NEWS.d/next/Documentation/2025-11-26-23-30-09.gh-issue-141994.arBEG6.rst
@@ -0,0 +1,4 @@
+:mod:`xml.sax.handler`: Make Documentation of
+:data:`xml.sax.handler.feature_external_ges` warn of opening up to `external
+entity attacks <https://en.wikipedia.org/wiki/XML_external_entity_attack>`_.
+Patch by Sebastian Pipping.
diff --git a/Misc/NEWS.d/next/Library/2024-05-20-12-35-52.gh-issue-115952.J6n_Kf.rst b/Misc/NEWS.d/next/Library/2024-05-20-12-35-52.gh-issue-115952.J6n_Kf.rst
new file mode 100644
index 00000000000..4c4c65d45d7
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-05-20-12-35-52.gh-issue-115952.J6n_Kf.rst
@@ -0,0 +1,7 @@
+Fix a potential memory denial of service in the :mod:`pickle` module.
+When reading a pickled data received from untrusted source, it could cause
+an arbitrary amount of memory to be allocated, even if the code that is
+allowed to execute is restricted by overriding the
+:meth:`~pickle.Unpickler.find_class` method.
+This could have led to symptoms including a :exc:`MemoryError`, swapping, out
+of memory (OOM) killed processes or containers, or even system crashes.
diff --git a/Misc/NEWS.d/next/Library/2025-05-30-18-37-44.gh-issue-134453.kxkA-o.rst b/Misc/NEWS.d/next/Library/2025-05-30-18-37-44.gh-issue-134453.kxkA-o.rst
new file mode 100644
index 00000000000..fee975ea702
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-05-30-18-37-44.gh-issue-134453.kxkA-o.rst
@@ -0,0 +1,4 @@
+Fixed :func:`subprocess.Popen.communicate` ``input=`` handling of :class:`memoryview`
+instances that were non-byte shaped on POSIX platforms. Those are now properly
+cast to a byte shaped view instead of truncating the input.  Windows platforms
+did not have this bug.
diff --git a/Misc/NEWS.d/next/Library/2025-09-09-10-13-24.gh-issue-138525.hDTaAM.rst b/Misc/NEWS.d/next/Library/2025-09-09-10-13-24.gh-issue-138525.hDTaAM.rst
new file mode 100644
index 00000000000..c4cea4b74b8
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-09-09-10-13-24.gh-issue-138525.hDTaAM.rst
@@ -0,0 +1,2 @@
+Add support for single-dash long options and alternate prefix characters in
+:class:`argparse.BooleanOptionalAction`.
diff --git a/Misc/NEWS.d/next/Library/2025-09-09-13-00-42.gh-issue-138697.QVwJw_.rst b/Misc/NEWS.d/next/Library/2025-09-09-13-00-42.gh-issue-138697.QVwJw_.rst
new file mode 100644
index 00000000000..35aaa7cf70f
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-09-09-13-00-42.gh-issue-138697.QVwJw_.rst
@@ -0,0 +1,4 @@
+Fix inferring *dest* from a single-dash long option in :mod:`argparse`. If a
+short option and a single-dash long option are passed to
+:meth:`!add_argument`, *dest* is now inferred from the single-dash long
+option.
diff --git a/Misc/NEWS.d/next/Library/2025-10-23-06-38-35.gh-issue-139946.HZa5hu.rst b/Misc/NEWS.d/next/Library/2025-10-23-06-38-35.gh-issue-139946.HZa5hu.rst
new file mode 100644
index 00000000000..fb479317284
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-10-23-06-38-35.gh-issue-139946.HZa5hu.rst
@@ -0,0 +1 @@
+Distinguish stdout and stderr when colorizing output in argparse module.
diff --git a/Misc/NEWS.d/next/Library/2025-10-27-17-00-11.gh-issue-140677.hM9pTq.rst b/Misc/NEWS.d/next/Library/2025-10-27-17-00-11.gh-issue-140677.hM9pTq.rst
new file mode 100644
index 00000000000..2daa15e13ad
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-10-27-17-00-11.gh-issue-140677.hM9pTq.rst
@@ -0,0 +1,4 @@
+Add heatmap visualization mode to the Tachyon sampling profiler. The new
+``--heatmap`` output format provides a line-by-line view showing execution
+intensity with color-coded samples, inline statistics, and interactive call
+graph navigation between callers and callees.
diff --git a/Misc/NEWS.d/next/Library/2025-11-02-10-44-23.gh-issue-140875.wt6B37.rst b/Misc/NEWS.d/next/Library/2025-11-02-10-44-23.gh-issue-140875.wt6B37.rst
new file mode 100644
index 00000000000..c08a8966d53
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-02-10-44-23.gh-issue-140875.wt6B37.rst
@@ -0,0 +1,3 @@
+Fix handling of unclosed character references (named and numerical)
+followed by the end of file in :class:`html.parser.HTMLParser` with
+``convert_charrefs=False``.
diff --git a/Misc/NEWS.d/next/Library/2025-11-03-17-13-00.gh-issue-140911.7KFvSQ.rst b/Misc/NEWS.d/next/Library/2025-11-03-17-13-00.gh-issue-140911.7KFvSQ.rst
new file mode 100644
index 00000000000..b0b6e461192
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-03-17-13-00.gh-issue-140911.7KFvSQ.rst
@@ -0,0 +1,3 @@
+:mod:`collections`: Ensure that the methods ``UserString.rindex()`` and
+``UserString.index()`` accept :class:`collections.UserString` instances as the
+sub argument.
diff --git a/Misc/NEWS.d/next/Library/2025-11-13-13-11-02.gh-issue-60107.LZq3QF.rst b/Misc/NEWS.d/next/Library/2025-11-13-13-11-02.gh-issue-60107.LZq3QF.rst
new file mode 100644
index 00000000000..c5103fb8005
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-13-13-11-02.gh-issue-60107.LZq3QF.rst
@@ -0,0 +1,2 @@
+Remove a copy from :meth:`io.RawIOBase.read`. If the underlying I/O class
+keeps a reference to the mutable memory, raise a :exc:`BufferError`.
diff --git a/Misc/NEWS.d/next/Library/2025-11-14-18-00-41.gh-issue-141565.Ap2bhJ.rst b/Misc/NEWS.d/next/Library/2025-11-14-18-00-41.gh-issue-141565.Ap2bhJ.rst
new file mode 100644
index 00000000000..628f1e0af03
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-14-18-00-41.gh-issue-141565.Ap2bhJ.rst
@@ -0,0 +1 @@
+Add async-aware profiling to the Tachyon sampling profiler. The profiler now reconstructs and displays async task hierarchies in flamegraphs, making the output more actionable for users. Patch by Savannah Ostrowski and Pablo Galindo Salgado.
diff --git a/Misc/NEWS.d/next/Library/2025-11-15-11-10-16.gh-issue-48752.aB3xYz.rst b/Misc/NEWS.d/next/Library/2025-11-15-11-10-16.gh-issue-48752.aB3xYz.rst
new file mode 100644
index 00000000000..37b91196658
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-15-11-10-16.gh-issue-48752.aB3xYz.rst
@@ -0,0 +1,3 @@
+Add :func:`readline.get_pre_input_hook` function to retrieve the current
+pre-input hook. This allows applications to save and restore the hook
+without overwriting user settings. Patch by Sanyam Khurana.
diff --git a/Misc/NEWS.d/next/Library/2025-11-15-14-58-12.gh-issue-141600.XY2BXg.rst b/Misc/NEWS.d/next/Library/2025-11-15-14-58-12.gh-issue-141600.XY2BXg.rst
new file mode 100644
index 00000000000..8071246f130
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-15-14-58-12.gh-issue-141600.XY2BXg.rst
@@ -0,0 +1 @@
+Fix musl version detection on Void Linux.
diff --git a/Misc/NEWS.d/next/Library/2025-11-16-04-40-06.gh-issue-69113.Xy7Fmn.rst b/Misc/NEWS.d/next/Library/2025-11-16-04-40-06.gh-issue-69113.Xy7Fmn.rst
new file mode 100644
index 00000000000..cd76ae9b11e
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-16-04-40-06.gh-issue-69113.Xy7Fmn.rst
@@ -0,0 +1 @@
+Fix :mod:`doctest` to correctly report line numbers for doctests in ``__test__`` dictionary when formatted as triple-quoted strings by finding unique lines in the string and matching them in the source file.
diff --git a/Misc/NEWS.d/next/Library/2025-11-16-06-08-46.gh-issue-141615.--6EK3.rst b/Misc/NEWS.d/next/Library/2025-11-16-06-08-46.gh-issue-141615.--6EK3.rst
new file mode 100644
index 00000000000..bb54e683987
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-16-06-08-46.gh-issue-141615.--6EK3.rst
@@ -0,0 +1 @@
+Check ``stdin`` instead of ``stdout`` for ``use_rawinput`` in :mod:`pdb`.
diff --git a/Misc/NEWS.d/next/Library/2025-11-17-00-53-51.gh-issue-141645.TC3TL3.rst b/Misc/NEWS.d/next/Library/2025-11-17-00-53-51.gh-issue-141645.TC3TL3.rst
new file mode 100644
index 00000000000..d3f3bfddf6e
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-17-00-53-51.gh-issue-141645.TC3TL3.rst
@@ -0,0 +1,4 @@
+Add a new ``--live`` mode to the tachyon profiler in
+:mod:`!profiling.sampling` module. This mode consist of a live TUI that
+displays real-time profiling statistics as the target application runs,
+similar to ``top``. Patch by Pablo Galindo
diff --git a/Misc/NEWS.d/next/Library/2025-11-17-08-16-30.gh-issue-141659.QNi9Aj.rst b/Misc/NEWS.d/next/Library/2025-11-17-08-16-30.gh-issue-141659.QNi9Aj.rst
new file mode 100644
index 00000000000..eeb055c6012
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-17-08-16-30.gh-issue-141659.QNi9Aj.rst
@@ -0,0 +1 @@
+Fix bad file descriptor errors from ``_posixsubprocess`` on AIX.
diff --git a/Misc/NEWS.d/next/Library/2025-11-17-16-53-49.gh-issue-141686.V-xaoI.rst b/Misc/NEWS.d/next/Library/2025-11-17-16-53-49.gh-issue-141686.V-xaoI.rst
new file mode 100644
index 00000000000..87e9cb8d69b
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-17-16-53-49.gh-issue-141686.V-xaoI.rst
@@ -0,0 +1,2 @@
+Break reference cycles created by each call to :func:`json.dump` or
+:meth:`json.JSONEncoder.iterencode`.
diff --git a/Misc/NEWS.d/next/Library/2025-11-17-21-41-58.gh-issue-141679.fs7zLJ.rst b/Misc/NEWS.d/next/Library/2025-11-17-21-41-58.gh-issue-141679.fs7zLJ.rst
new file mode 100644
index 00000000000..b8203ca5a4a
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-17-21-41-58.gh-issue-141679.fs7zLJ.rst
@@ -0,0 +1 @@
+Add colour to defaults in :mod:`argparse` help. Patch by Hugo van Kemenade.
diff --git a/Misc/NEWS.d/next/Library/2025-11-18-14-39-31.gh-issue-141570.q3n984.rst b/Misc/NEWS.d/next/Library/2025-11-18-14-39-31.gh-issue-141570.q3n984.rst
new file mode 100644
index 00000000000..8f4641ce4cf
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-18-14-39-31.gh-issue-141570.q3n984.rst
@@ -0,0 +1,2 @@
+Support :term:`file-like object` raising :exc:`OSError` from :meth:`~io.IOBase.fileno` in color
+detection (``_colorize.can_colorize()``). This can occur when ``sys.stdout`` is redirected.
diff --git a/Misc/NEWS.d/next/Library/2025-11-18-15-48-13.gh-issue-105836.sbUw24.rst b/Misc/NEWS.d/next/Library/2025-11-18-15-48-13.gh-issue-105836.sbUw24.rst
new file mode 100644
index 00000000000..d2edc5b2cb7
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-18-15-48-13.gh-issue-105836.sbUw24.rst
@@ -0,0 +1,2 @@
+Fix :meth:`asyncio.run_coroutine_threadsafe` leaving underlying cancelled
+asyncio task running.
diff --git a/Misc/NEWS.d/next/Library/2025-11-21-21-14-10.gh-issue-141817._v5LdB.rst b/Misc/NEWS.d/next/Library/2025-11-21-21-14-10.gh-issue-141817._v5LdB.rst
new file mode 100644
index 00000000000..774e13b2129
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-21-21-14-10.gh-issue-141817._v5LdB.rst
@@ -0,0 +1 @@
+Add :data:`!socket.IPV6_HDRINCL` constant.
diff --git a/Misc/NEWS.d/next/Library/2025-11-22-16-33-48.gh-issue-141863.4PLhnv.rst b/Misc/NEWS.d/next/Library/2025-11-22-16-33-48.gh-issue-141863.4PLhnv.rst
new file mode 100644
index 00000000000..585774ba2cb
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-22-16-33-48.gh-issue-141863.4PLhnv.rst
@@ -0,0 +1,2 @@
+Update :ref:`asyncio-streams` to use :meth:`bytearray.take_bytes` for a over
+10% performance improvement on pyperformance asyncio_tcp benchmark.
diff --git a/Misc/NEWS.d/next/Library/2025-11-24-06-44-45.gh-issue-141781.MsK27r.rst b/Misc/NEWS.d/next/Library/2025-11-24-06-44-45.gh-issue-141781.MsK27r.rst
new file mode 100644
index 00000000000..21db97025a3
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-24-06-44-45.gh-issue-141781.MsK27r.rst
@@ -0,0 +1 @@
+Fixed an issue where pdb.line_prefix assignment was ignored if assigned after the module was imported.
diff --git a/Misc/NEWS.d/next/Library/2025-11-24-14-05-52.gh-issue-138122.2bbGA8.rst b/Misc/NEWS.d/next/Library/2025-11-24-14-05-52.gh-issue-138122.2bbGA8.rst
new file mode 100644
index 00000000000..5742beeb85c
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-24-14-05-52.gh-issue-138122.2bbGA8.rst
@@ -0,0 +1,5 @@
+The ``profiling.sampling`` flamegraph profiler now displays thread status
+statistics showing the percentage of time threads spend holding the GIL,
+running without the GIL, waiting for the GIL, and performing garbage
+collection. These statistics help identify GIL contention and thread behavior
+patterns. When filtering by thread, the display shows per-thread metrics.
diff --git a/Misc/NEWS.d/next/Library/2025-11-25-16-00-29.gh-issue-59000.YtOyJy.rst b/Misc/NEWS.d/next/Library/2025-11-25-16-00-29.gh-issue-59000.YtOyJy.rst
new file mode 100644
index 00000000000..33ab8a0659e
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-25-16-00-29.gh-issue-59000.YtOyJy.rst
@@ -0,0 +1 @@
+Fix :mod:`pdb` breakpoint resolution for class methods when the module defining the class is not imported.
diff --git a/Misc/NEWS.d/next/Library/2025-11-25-22-54-07.gh-issue-141968.vg3AMJ.rst b/Misc/NEWS.d/next/Library/2025-11-25-22-54-07.gh-issue-141968.vg3AMJ.rst
new file mode 100644
index 00000000000..4c89902813f
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-25-22-54-07.gh-issue-141968.vg3AMJ.rst
@@ -0,0 +1,2 @@
+Remove a data copy from :func:`base64.b32decode` and
+:func:`base64.b32encode` by using :meth:`bytearray.take_bytes`.
diff --git a/Misc/NEWS.d/next/Library/2025-11-25-23-22-46.gh-issue-141968.R1sHnJ.rst b/Misc/NEWS.d/next/Library/2025-11-25-23-22-46.gh-issue-141968.R1sHnJ.rst
new file mode 100644
index 00000000000..eca0ac4c8e6
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-25-23-22-46.gh-issue-141968.R1sHnJ.rst
@@ -0,0 +1,2 @@
+Remove data copy from :func:`wave.Wave_read.readframes` and
+:func:`wave.Wave_write.writeframes` by using :meth:`bytearray.take_bytes`.
diff --git a/Misc/NEWS.d/next/Library/2025-11-25-23-29-08.gh-issue-141968.0JnjXf.rst b/Misc/NEWS.d/next/Library/2025-11-25-23-29-08.gh-issue-141968.0JnjXf.rst
new file mode 100644
index 00000000000..0cefeed30ed
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-25-23-29-08.gh-issue-141968.0JnjXf.rst
@@ -0,0 +1,2 @@
+Remove data copy from :mod:`codecs` ``punycode`` encoding by using
+:meth:`bytearray.take_bytes`.
diff --git a/Misc/NEWS.d/next/Library/2025-11-25-23-35-07.gh-issue-141968.b3Gscp.rst b/Misc/NEWS.d/next/Library/2025-11-25-23-35-07.gh-issue-141968.b3Gscp.rst
new file mode 100644
index 00000000000..50124e8b196
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-25-23-35-07.gh-issue-141968.b3Gscp.rst
@@ -0,0 +1,2 @@
+Remove data copy from :mod:`encodings.idna` :meth:`~codecs.Codec.encode` and
+:meth:`~codecs.IncrementalEncoder.encode` by using :meth:`bytearray.take_bytes`.
diff --git a/Misc/NEWS.d/next/Library/2025-11-26-14-20-10.gh-issue-141968.W139Pv.rst b/Misc/NEWS.d/next/Library/2025-11-26-14-20-10.gh-issue-141968.W139Pv.rst
new file mode 100644
index 00000000000..c5375707814
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-26-14-20-10.gh-issue-141968.W139Pv.rst
@@ -0,0 +1,2 @@
+Remove data copy from :mod:`re` compilation of regexes with large charsets
+by using :meth:`bytearray.take_bytes`.
diff --git a/Misc/NEWS.d/next/Library/2025-11-27-10-49-13.gh-issue-142006.nzJDG5.rst b/Misc/NEWS.d/next/Library/2025-11-27-10-49-13.gh-issue-142006.nzJDG5.rst
new file mode 100644
index 00000000000..49643892ff9
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-27-10-49-13.gh-issue-142006.nzJDG5.rst
@@ -0,0 +1 @@
+Fix a bug in the :mod:`email.policy.default` folding algorithm which incorrectly resulted in a doubled newline when a line ending at exactly max_line_length was followed by an unfoldable token.
diff --git a/Misc/NEWS.d/next/Library/2025-11-27-11-39-50.gh-issue-141999._FKGlu.rst b/Misc/NEWS.d/next/Library/2025-11-27-11-39-50.gh-issue-141999._FKGlu.rst
new file mode 100644
index 00000000000..3b54a831b54
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-27-11-39-50.gh-issue-141999._FKGlu.rst
@@ -0,0 +1,2 @@
+Correctly allow :exc:`KeyboardInterrupt` to stop the process when using
+:mod:`!profiling.sampling`.
diff --git a/Misc/NEWS.d/next/Library/2025-11-27-20-16-38.gh-issue-141473.Wq4xVN.rst b/Misc/NEWS.d/next/Library/2025-11-27-20-16-38.gh-issue-141473.Wq4xVN.rst
new file mode 100644
index 00000000000..f6aa592cefd
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-27-20-16-38.gh-issue-141473.Wq4xVN.rst
@@ -0,0 +1,4 @@
+When :meth:`subprocess.Popen.communicate` was called with *input* and a
+*timeout* and is called for a second time after a
+:exc:`~subprocess.TimeoutExpired` exception before the process has died, it
+should no longer hang.
diff --git a/Misc/NEWS.d/next/Library/2025-11-29-03-02-45.gh-issue-87512.bn4xbm.rst b/Misc/NEWS.d/next/Library/2025-11-29-03-02-45.gh-issue-87512.bn4xbm.rst
new file mode 100644
index 00000000000..091350108ee
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-29-03-02-45.gh-issue-87512.bn4xbm.rst
@@ -0,0 +1,5 @@
+Fix :func:`subprocess.Popen.communicate` timeout handling on Windows
+when writing large input. Previously, the timeout was ignored during
+stdin writing, causing the method to block indefinitely if the child
+process did not consume input quickly. The stdin write is now performed
+in a background thread, allowing the timeout to be properly enforced.
diff --git a/Misc/NEWS.d/next/Library/2025-11-29-04-20-44.gh-issue-74389.pW3URj.rst b/Misc/NEWS.d/next/Library/2025-11-29-04-20-44.gh-issue-74389.pW3URj.rst
new file mode 100644
index 00000000000..a9bf5f80d80
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-29-04-20-44.gh-issue-74389.pW3URj.rst
@@ -0,0 +1,3 @@
+When the stdin being used by a :class:`subprocess.Popen` instance is closed,
+this is now ignored in :meth:`subprocess.Popen.communicate` instead of
+leaving the class in an inconsistent state.
diff --git a/Misc/NEWS.d/next/Library/2025-11-30-04-28-30.gh-issue-141982.pxZct9.rst b/Misc/NEWS.d/next/Library/2025-11-30-04-28-30.gh-issue-141982.pxZct9.rst
new file mode 100644
index 00000000000..e5ec593dd6e
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-11-30-04-28-30.gh-issue-141982.pxZct9.rst
@@ -0,0 +1 @@
+Allow :mod:`pdb` to set breakpoints on async functions with function names.
diff --git a/Misc/NEWS.d/next/Library/2025-12-01-14-43-58.gh-issue-138122.nRm3ic.rst b/Misc/NEWS.d/next/Library/2025-12-01-14-43-58.gh-issue-138122.nRm3ic.rst
new file mode 100644
index 00000000000..e24fea416ff
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-12-01-14-43-58.gh-issue-138122.nRm3ic.rst
@@ -0,0 +1,5 @@
+The ``_remote_debugging`` module now implements frame caching in the
+``RemoteUnwinder`` class to reduce memory reads when profiling remote
+processes. When ``cache_frames=True``, unchanged portions of the call stack
+are reused from previous samples, significantly improving profiling
+performance for deep call stacks.
diff --git a/Misc/NEWS.d/next/Library/2025-12-03-06-12-39.gh-issue-142214.appYNZ.rst b/Misc/NEWS.d/next/Library/2025-12-03-06-12-39.gh-issue-142214.appYNZ.rst
new file mode 100644
index 00000000000..b87430ec1a3
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-12-03-06-12-39.gh-issue-142214.appYNZ.rst
@@ -0,0 +1,12 @@
+Fix two regressions in :mod:`dataclasses` in Python 3.14.1 related to
+annotations.
+
+* An exception is no longer raised if ``slots=True`` is used and the
+  ``__init__`` method does not have an ``__annotate__`` attribute
+  (likely because ``init=False`` was used).
+
+* An exception is no longer raised if annotations are requested on the
+  ``__init__`` method and one of the fields is not present in the class
+  annotations. This can occur in certain dynamic scenarios.
+
+Patch by Jelle Zijlstra.
diff --git a/Misc/NEWS.d/next/Library/2025-12-03-09-36-29.gh-issue-142206.ilwegH.rst b/Misc/NEWS.d/next/Library/2025-12-03-09-36-29.gh-issue-142206.ilwegH.rst
new file mode 100644
index 00000000000..90e4dd96985
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-12-03-09-36-29.gh-issue-142206.ilwegH.rst
@@ -0,0 +1,4 @@
+The resource tracker in the :mod:`multiprocessing` module can now understand
+messages from older versions of itself. This avoids issues with upgrading
+Python while it is running. (Note that such 'in-place' upgrades are not
+tested.)
diff --git a/Misc/NEWS.d/next/Library/2025-12-04-09-22-31.gh-issue-68552.I_v-xB.rst b/Misc/NEWS.d/next/Library/2025-12-04-09-22-31.gh-issue-68552.I_v-xB.rst
new file mode 100644
index 00000000000..bd3e53c9f81
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-12-04-09-22-31.gh-issue-68552.I_v-xB.rst
@@ -0,0 +1 @@
+``MisplacedEnvelopeHeaderDefect`` and ``Missing header name`` defects are now correctly passed to the ``handle_defect`` method of ``policy`` in :class:`~email.parser.FeedParser`.
diff --git a/Misc/NEWS.d/next/Library/2025-12-04-23-24-24.gh-issue-139862.NBfsD4.rst b/Misc/NEWS.d/next/Library/2025-12-04-23-24-24.gh-issue-139862.NBfsD4.rst
new file mode 100644
index 00000000000..2bee8881a75
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-12-04-23-24-24.gh-issue-139862.NBfsD4.rst
@@ -0,0 +1 @@
+Remove ``color`` parameter from :class:`!argparse.HelpFormatter` constructor. Color is controlled by :class:`~argparse.ArgumentParser`.
diff --git a/Misc/NEWS.d/next/Library/2025-12-04-23-26-12.gh-issue-142267.yOM6fP.rst b/Misc/NEWS.d/next/Library/2025-12-04-23-26-12.gh-issue-142267.yOM6fP.rst
new file mode 100644
index 00000000000..f46e82105fc
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-12-04-23-26-12.gh-issue-142267.yOM6fP.rst
@@ -0,0 +1 @@
+Improve :mod:`argparse` performance by caching the formatter used for argument validation.
diff --git a/Misc/NEWS.d/next/Library/2025-12-05-16-39-17.gh-issue-75949.pHxW98.rst b/Misc/NEWS.d/next/Library/2025-12-05-16-39-17.gh-issue-75949.pHxW98.rst
new file mode 100644
index 00000000000..5ca3fc05b98
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-12-05-16-39-17.gh-issue-75949.pHxW98.rst
@@ -0,0 +1 @@
+Fix :mod:`argparse` to preserve ``|`` separators in mutually exclusive groups when the usage line wraps due to length.
diff --git a/Misc/NEWS.d/next/Library/2025-12-05-18-25-29.gh-issue-142318.EzcQ3N.rst b/Misc/NEWS.d/next/Library/2025-12-05-18-25-29.gh-issue-142318.EzcQ3N.rst
new file mode 100644
index 00000000000..8710ebfb1a1
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-12-05-18-25-29.gh-issue-142318.EzcQ3N.rst
@@ -0,0 +1,2 @@
+Fix typing ``'q'`` at the help of the interactive tachyon profiler exiting
+the profiler.
diff --git a/Misc/NEWS.d/next/Library/2025-12-06-13-02-13.gh-issue-142332.PNvXCV.rst b/Misc/NEWS.d/next/Library/2025-12-06-13-02-13.gh-issue-142332.PNvXCV.rst
new file mode 100644
index 00000000000..ee2d5e1d491
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-12-06-13-02-13.gh-issue-142332.PNvXCV.rst
@@ -0,0 +1,2 @@
+Fix usage formatting for positional arguments in mutually exclusive groups in :mod:`argparse`.
+in :mod:`argparse`.
diff --git a/Misc/NEWS.d/next/Library/2025-12-06-13-19-43.gh-issue-142207.x_X9oH.rst b/Misc/NEWS.d/next/Library/2025-12-06-13-19-43.gh-issue-142207.x_X9oH.rst
new file mode 100644
index 00000000000..69ca8c41ac9
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-12-06-13-19-43.gh-issue-142207.x_X9oH.rst
@@ -0,0 +1,2 @@
+Fix: profiling.sampling may cause assertion ``!(has_gil &&
+gil_requested)``
diff --git a/Misc/NEWS.d/next/Library/2025-12-06-16-45-34.gh-issue-64532.4OXZpF.rst b/Misc/NEWS.d/next/Library/2025-12-06-16-45-34.gh-issue-64532.4OXZpF.rst
new file mode 100644
index 00000000000..3bd950050ae
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-12-06-16-45-34.gh-issue-64532.4OXZpF.rst
@@ -0,0 +1 @@
+Subparser help now includes required optional arguments from the parent parser in the usage, making it clearer what arguments are needed to run a subcommand. Patch by Savannah Ostrowski.
diff --git a/Misc/NEWS.d/next/Library/2025-12-07-17-30-05.gh-issue-142346.okcAAp.rst b/Misc/NEWS.d/next/Library/2025-12-07-17-30-05.gh-issue-142346.okcAAp.rst
new file mode 100644
index 00000000000..cf570f314c0
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-12-07-17-30-05.gh-issue-142346.okcAAp.rst
@@ -0,0 +1,3 @@
+Fix usage formatting for mutually exclusive groups in :mod:`argparse`
+when they are preceded by positional arguments or followed or intermixed
+with other optional arguments.
diff --git a/Misc/NEWS.d/next/Security/2024-05-21-22-11-31.gh-issue-119342.BTFj4Z.rst b/Misc/NEWS.d/next/Security/2024-05-21-22-11-31.gh-issue-119342.BTFj4Z.rst
new file mode 100644
index 00000000000..04fd8faca4c
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2024-05-21-22-11-31.gh-issue-119342.BTFj4Z.rst
@@ -0,0 +1,5 @@
+Fix a potential memory denial of service in the :mod:`plistlib` module.
+When reading a Plist file received from untrusted source, it could cause
+an arbitrary amount of memory to be allocated.
+This could have led to symptoms including a :exc:`MemoryError`, swapping, out
+of memory (OOM) killed processes or containers, or even system crashes.
diff --git a/Misc/NEWS.d/next/Security/2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst b/Misc/NEWS.d/next/Security/2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst
new file mode 100644
index 00000000000..6d6f25cd2f8
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2024-05-23-11-47-48.gh-issue-119451.qkJe9-.rst
@@ -0,0 +1,5 @@
+Fix a potential memory denial of service in the :mod:`http.client` module.
+When connecting to a malicious server, it could cause
+an arbitrary amount of memory to be allocated.
+This could have led to symptoms including a :exc:`MemoryError`, swapping, out
+of memory (OOM) killed processes or containers, or even system crashes.
diff --git a/Misc/NEWS.d/next/Security/2025-11-13-22-31-56.gh-issue-42400.pqB5Kq.rst b/Misc/NEWS.d/next/Security/2025-11-13-22-31-56.gh-issue-42400.pqB5Kq.rst
new file mode 100644
index 00000000000..17dc241aef9
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2025-11-13-22-31-56.gh-issue-42400.pqB5Kq.rst
@@ -0,0 +1,3 @@
+Fix buffer overflow in ``_Py_wrealpath()`` for paths exceeding ``MAXPATHLEN`` bytes
+by using dynamic memory allocation instead of fixed-size buffer.
+Patch by Shamil Abdulaev.
diff --git a/Misc/NEWS.d/next/Security/2025-12-01-09-36-45.gh-issue-142145.tcAUhg.rst b/Misc/NEWS.d/next/Security/2025-12-01-09-36-45.gh-issue-142145.tcAUhg.rst
new file mode 100644
index 00000000000..440bc7794c6
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2025-12-01-09-36-45.gh-issue-142145.tcAUhg.rst
@@ -0,0 +1 @@
+Remove quadratic behavior in ``xml.minidom`` node ID cache clearing.
diff --git a/Misc/NEWS.d/next/Tests/2025-10-16-15-08-58.gh-issue-140210.P9vUP8.rst b/Misc/NEWS.d/next/Tests/2025-10-16-15-08-58.gh-issue-140210.P9vUP8.rst
new file mode 100644
index 00000000000..f2064bfd377
--- /dev/null
+++ b/Misc/NEWS.d/next/Tests/2025-10-16-15-08-58.gh-issue-140210.P9vUP8.rst
@@ -0,0 +1,2 @@
+Make ``test_sysconfig.test_parse_makefile_renamed_vars`` less fragile by
+clearing the environment variables before parsing the Makefile.
diff --git a/Misc/NEWS.d/next/Tests/2025-10-27-15-53-47.gh-issue-140381.N5o3pa.rst b/Misc/NEWS.d/next/Tests/2025-10-27-15-53-47.gh-issue-140381.N5o3pa.rst
new file mode 100644
index 00000000000..568a2b65d7d
--- /dev/null
+++ b/Misc/NEWS.d/next/Tests/2025-10-27-15-53-47.gh-issue-140381.N5o3pa.rst
@@ -0,0 +1 @@
+Fix flaky test_profiling tests on i686 and s390x architectures by increasing slow_fibonacci call frequency from every 5th iteration to every 2nd iteration.
diff --git a/Misc/NEWS.d/next/Tools-Demos/2025-11-18-13-55-47.gh-issue-141692.tud9if.rst b/Misc/NEWS.d/next/Tools-Demos/2025-11-18-13-55-47.gh-issue-141692.tud9if.rst
new file mode 100644
index 00000000000..d85c54db364
--- /dev/null
+++ b/Misc/NEWS.d/next/Tools-Demos/2025-11-18-13-55-47.gh-issue-141692.tud9if.rst
@@ -0,0 +1,3 @@
+Each slice of an iOS XCframework now contains a ``lib`` folder that contains
+a symlink to the libpython dylib. This allows binary modules to be compiled
+for iOS using dynamic libreary linking, rather than Framework linking.
diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml
index 7ee6cf1dae5..31d22e64b84 100644
--- a/Misc/stable_abi.toml
+++ b/Misc/stable_abi.toml
@@ -1902,6 +1902,13 @@
     added = '3.5'
 [data.PyModuleDef_Type]
     added = '3.5'
+[const.Py_mod_create]
+    added = '3.5'
+[const.Py_mod_exec]
+    added = '3.5'
+[struct.PyModuleDef_Slot]
+    added = '3.5'
+    struct_abi_kind = 'full-abi'
 
 # New slots in 3.5:
 # d51374ed78a3e3145911a16cdf3b9b84b3ba7d15 - Matrix multiplication (PEP 465)
@@ -2444,6 +2451,9 @@
     added = '3.12'
 [const.Py_TPFLAGS_ITEMS_AT_END]
     added = '3.12'
+[const.Py_mod_multiple_interpreters]
+    added = '3.12'
+
 [function.PyImport_AddModuleRef]
     added = '3.13'
 [function.PyWeakref_GetRef]
@@ -2522,10 +2532,14 @@
     added = '3.13'
 [function.PyEval_GetFrameLocals]
     added = '3.13'
+[const.Py_mod_gil]
+    added = '3.13'
 
 [function.Py_TYPE]
+    # Before 3.14, this was a macro that accessed the PyObject member
     added = '3.14'
 [function.Py_REFCNT]
+    # Before 3.14, this was a macro that accessed the PyObject member
     added = '3.14'
 [function.PyIter_NextItem]
     added = '3.14'
@@ -2639,3 +2653,14 @@
     added = '3.15'
 [const.Py_mod_token]
     added = '3.15'
+[function.PyDict_SetDefaultRef]
+    added = '3.15'
+[function.Py_SIZE]
+    # Before 3.15, this was a macro that accessed the PyObject member
+    added = '3.15'
+[function.Py_IS_TYPE]
+    # Before 3.15, this was a macro that accessed the PyObject member
+    added = '3.15'
+[function.Py_SET_SIZE]
+    # Before 3.15, this was a macro that accessed the PyObject member
+    added = '3.15'
diff --git a/Modules/Setup b/Modules/Setup
index 8a54c0aaec6..7d816ead843 100644
--- a/Modules/Setup
+++ b/Modules/Setup
@@ -285,7 +285,7 @@ PYTHONPATH=$(COREPYTHONPATH)
 
 #*shared*
 #_ctypes_test _ctypes/_ctypes_test.c
-#_remote_debugging _remote_debugging_module.c
+#_remote_debugging _remote_debugging/module.c _remote_debugging/object_reading.c _remote_debugging/code_objects.c _remote_debugging/frames.c _remote_debugging/threads.c _remote_debugging/asyncio.c
 #_testcapi _testcapimodule.c
 #_testimportmultiple _testimportmultiple.c
 #_testmultiphase _testmultiphase.c
diff --git a/Modules/Setup.stdlib.in b/Modules/Setup.stdlib.in
index 2c3013e3d0c..1be83b45526 100644
--- a/Modules/Setup.stdlib.in
+++ b/Modules/Setup.stdlib.in
@@ -41,7 +41,7 @@
 @MODULE__PICKLE_TRUE@_pickle _pickle.c
 @MODULE__QUEUE_TRUE@_queue _queuemodule.c
 @MODULE__RANDOM_TRUE@_random _randommodule.c
-@MODULE__REMOTE_DEBUGGING_TRUE@_remote_debugging _remote_debugging_module.c
+@MODULE__REMOTE_DEBUGGING_TRUE@_remote_debugging _remote_debugging/module.c _remote_debugging/object_reading.c _remote_debugging/code_objects.c _remote_debugging/frames.c _remote_debugging/frame_cache.c _remote_debugging/threads.c _remote_debugging/asyncio.c
 @MODULE__STRUCT_TRUE@_struct _struct.c
 
 # build supports subinterpreters
diff --git a/Modules/_csv.c b/Modules/_csv.c
index 87be7a8f1fb..1f41976e95f 100644
--- a/Modules/_csv.c
+++ b/Modules/_csv.c
@@ -918,7 +918,7 @@ parse_reset(ReaderObj *self)
 }
 
 static PyObject *
-Reader_iternext(PyObject *op)
+Reader_iternext_lock_held(PyObject *op)
 {
     ReaderObj *self = _ReaderObj_CAST(op);
 
@@ -985,6 +985,16 @@ Reader_iternext(PyObject *op)
     return fields;
 }
 
+static PyObject *
+Reader_iternext(PyObject *op)
+{
+    PyObject *result;
+    Py_BEGIN_CRITICAL_SECTION(op);
+    result = Reader_iternext_lock_held(op);
+    Py_END_CRITICAL_SECTION();
+    return result;
+}
+
 static void
 Reader_dealloc(PyObject *op)
 {
@@ -1303,15 +1313,8 @@ join_append_lineterminator(WriterObj *self)
     return 1;
 }
 
-PyDoc_STRVAR(csv_writerow_doc,
-"writerow($self, row, /)\n"
-"--\n\n"
-"Construct and write a CSV record from an iterable of fields.\n"
-"\n"
-"Non-string elements will be converted to string.");
-
 static PyObject *
-csv_writerow(PyObject *op, PyObject *seq)
+csv_writerow_lock_held(PyObject *op, PyObject *seq)
 {
     WriterObj *self = _WriterObj_CAST(op);
     DialectObj *dialect = self->dialect;
@@ -1414,6 +1417,23 @@ csv_writerow(PyObject *op, PyObject *seq)
     return result;
 }
 
+PyDoc_STRVAR(csv_writerow_doc,
+"writerow($self, row, /)\n"
+"--\n\n"
+"Construct and write a CSV record from an iterable of fields.\n"
+"\n"
+"Non-string elements will be converted to string.");
+
+static PyObject *
+csv_writerow(PyObject *op, PyObject *seq)
+{
+    PyObject *result;
+    Py_BEGIN_CRITICAL_SECTION(op);
+    result = csv_writerow_lock_held(op, seq);
+    Py_END_CRITICAL_SECTION();
+    return result;
+}
+
 PyDoc_STRVAR(csv_writerows_doc,
 "writerows($self, rows, /)\n"
 "--\n\n"
diff --git a/Modules/_io/iobase.c b/Modules/_io/iobase.c
index f1c2fe17801..f036ea503b1 100644
--- a/Modules/_io/iobase.c
+++ b/Modules/_io/iobase.c
@@ -927,33 +927,33 @@ _io__RawIOBase_read_impl(PyObject *self, Py_ssize_t n)
         return PyObject_CallMethodNoArgs(self, &_Py_ID(readall));
     }
 
-    /* TODO: allocate a bytes object directly instead and manually construct
-       a writable memoryview pointing to it. */
     b = PyByteArray_FromStringAndSize(NULL, n);
-    if (b == NULL)
+    if (b == NULL) {
         return NULL;
+    }
 
     res = PyObject_CallMethodObjArgs(self, &_Py_ID(readinto), b, NULL);
     if (res == NULL || res == Py_None) {
-        Py_DECREF(b);
-        return res;
+        goto cleanup;
     }
 
     Py_ssize_t bytes_filled = PyNumber_AsSsize_t(res, PyExc_ValueError);
-    Py_DECREF(res);
+    Py_CLEAR(res);
     if (bytes_filled == -1 && PyErr_Occurred()) {
-        Py_DECREF(b);
-        return NULL;
+        goto cleanup;
     }
     if (bytes_filled < 0 || bytes_filled > n) {
-        Py_DECREF(b);
         PyErr_Format(PyExc_ValueError,
                      "readinto returned %zd outside buffer size %zd",
                      bytes_filled, n);
-        return NULL;
+        goto cleanup;
     }
+    if (PyByteArray_Resize(b, bytes_filled) < 0) {
+        goto cleanup;
+    }
+    res = PyObject_CallMethodNoArgs(b, &_Py_ID(take_bytes));
 
-    res = PyBytes_FromStringAndSize(PyByteArray_AsString(b), bytes_filled);
+cleanup:
     Py_DECREF(b);
     return res;
 }
diff --git a/Modules/_pickle.c b/Modules/_pickle.c
index bfb2830f389..608598eb5a5 100644
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -155,6 +155,9 @@ enum {
 
     /* Prefetch size when unpickling (disabled on unpeekable streams) */
     PREFETCH = 8192 * 16,
+    /* Data larger that this will be read in chunks, to prevent extreme
+       overallocation. */
+    MIN_READ_BUF_SIZE = 1 << 20,
 
     FRAME_SIZE_MIN = 4,
     FRAME_SIZE_TARGET = 64 * 1024,
@@ -647,10 +650,11 @@ typedef struct UnpicklerObject {
     Pdata *stack;               /* Pickle data stack, store unpickled objects. */
 
     /* The unpickler memo is just an array of PyObject *s. Using a dict
-       is unnecessary, since the keys are contiguous ints. */
+       is unnecessary, since the keys usually are contiguous ints. */
     PyObject **memo;
     size_t memo_size;       /* Capacity of the memo array */
     size_t memo_len;        /* Number of objects in the memo */
+    PyObject *memo_dict;    /* The backup memo dict for non-continuous keys. */
 
     PyObject *persistent_load;  /* persistent_load() method, can be NULL. */
     PyObject *persistent_load_attr;  /* instance attribute, can be NULL. */
@@ -1247,141 +1251,13 @@ _Unpickler_SkipConsumed(UnpicklerObject *self)
 
 static const Py_ssize_t READ_WHOLE_LINE = -1;
 
-/* If reading from a file, we need to only pull the bytes we need, since there
-   may be multiple pickle objects arranged contiguously in the same input
-   buffer.
-
-   If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
-   bytes from the input stream/buffer.
-
-   Update the unpickler's input buffer with the newly-read data. Returns -1 on
-   failure; on success, returns the number of bytes read from the file.
-
-   On success, self->input_len will be 0; this is intentional so that when
-   unpickling from a file, the "we've run out of data" code paths will trigger,
-   causing the Unpickler to go back to the file for more data. Use the returned
-   size to tell you how much data you can process. */
+/* Don't call it directly: use _Unpickler_ReadInto() */
 static Py_ssize_t
-_Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
-{
-    PyObject *data;
-    Py_ssize_t read_size;
-
-    assert(self->read != NULL);
-
-    if (_Unpickler_SkipConsumed(self) < 0)
-        return -1;
-
-    if (n == READ_WHOLE_LINE) {
-        data = PyObject_CallNoArgs(self->readline);
-    }
-    else {
-        PyObject *len;
-        /* Prefetch some data without advancing the file pointer, if possible */
-        if (self->peek && n < PREFETCH) {
-            len = PyLong_FromSsize_t(PREFETCH);
-            if (len == NULL)
-                return -1;
-            data = _Pickle_FastCall(self->peek, len);
-            if (data == NULL) {
-                if (!PyErr_ExceptionMatches(PyExc_NotImplementedError))
-                    return -1;
-                /* peek() is probably not supported by the given file object */
-                PyErr_Clear();
-                Py_CLEAR(self->peek);
-            }
-            else {
-                read_size = _Unpickler_SetStringInput(self, data);
-                Py_DECREF(data);
-                if (read_size < 0) {
-                    return -1;
-                }
-
-                self->prefetched_idx = 0;
-                if (n <= read_size)
-                    return n;
-            }
-        }
-        len = PyLong_FromSsize_t(n);
-        if (len == NULL)
-            return -1;
-        data = _Pickle_FastCall(self->read, len);
-    }
-    if (data == NULL)
-        return -1;
-
-    read_size = _Unpickler_SetStringInput(self, data);
-    Py_DECREF(data);
-    return read_size;
-}
-
-/* Don't call it directly: use _Unpickler_Read() */
-static Py_ssize_t
-_Unpickler_ReadImpl(UnpicklerObject *self, PickleState *st, char **s, Py_ssize_t n)
-{
-    Py_ssize_t num_read;
-
-    *s = NULL;
-    if (self->next_read_idx > PY_SSIZE_T_MAX - n) {
-        PyErr_SetString(st->UnpicklingError,
-                        "read would overflow (invalid bytecode)");
-        return -1;
-    }
-
-    /* This case is handled by the _Unpickler_Read() macro for efficiency */
-    assert(self->next_read_idx + n > self->input_len);
-
-    if (!self->read)
-        return bad_readline(st);
-
-    /* Extend the buffer to satisfy desired size */
-    num_read = _Unpickler_ReadFromFile(self, n);
-    if (num_read < 0)
-        return -1;
-    if (num_read < n)
-        return bad_readline(st);
-    *s = self->input_buffer;
-    self->next_read_idx = n;
-    return n;
-}
-
-/* Read `n` bytes from the unpickler's data source, storing the result in `buf`.
- *
- * This should only be used for non-small data reads where potentially
- * avoiding a copy is beneficial.  This method does not try to prefetch
- * more data into the input buffer.
- *
- * _Unpickler_Read() is recommended in most cases.
- */
-static Py_ssize_t
-_Unpickler_ReadInto(PickleState *state, UnpicklerObject *self, char *buf,
-                    Py_ssize_t n)
+_Unpickler_ReadIntoFromFile(PickleState *state, UnpicklerObject *self, char *buf,
+                            Py_ssize_t n)
 {
     assert(n != READ_WHOLE_LINE);
 
-    /* Read from available buffer data, if any */
-    Py_ssize_t in_buffer = self->input_len - self->next_read_idx;
-    if (in_buffer > 0) {
-        Py_ssize_t to_read = Py_MIN(in_buffer, n);
-        memcpy(buf, self->input_buffer + self->next_read_idx, to_read);
-        self->next_read_idx += to_read;
-        buf += to_read;
-        n -= to_read;
-        if (n == 0) {
-            /* Entire read was satisfied from buffer */
-            return n;
-        }
-    }
-
-    /* Read from file */
-    if (!self->read) {
-        /* We're unpickling memory, this means the input is truncated */
-        return bad_readline(state);
-    }
-    if (_Unpickler_SkipConsumed(self) < 0) {
-        return -1;
-    }
-
     if (!self->readinto) {
         /* readinto() not supported on file-like object, fall back to read()
          * and copy into destination buffer (bpo-39681) */
@@ -1435,6 +1311,163 @@ _Unpickler_ReadInto(PickleState *state, UnpicklerObject *self, char *buf,
     return n;
 }
 
+/* If reading from a file, we need to only pull the bytes we need, since there
+   may be multiple pickle objects arranged contiguously in the same input
+   buffer.
+
+   If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
+   bytes from the input stream/buffer.
+
+   Update the unpickler's input buffer with the newly-read data. Returns -1 on
+   failure; on success, returns the number of bytes read from the file.
+
+   On success, self->input_len will be 0; this is intentional so that when
+   unpickling from a file, the "we've run out of data" code paths will trigger,
+   causing the Unpickler to go back to the file for more data. Use the returned
+   size to tell you how much data you can process. */
+static Py_ssize_t
+_Unpickler_ReadFromFile(PickleState *state, UnpicklerObject *self, Py_ssize_t n)
+{
+    PyObject *data;
+    Py_ssize_t read_size;
+
+    assert(self->read != NULL);
+
+    if (_Unpickler_SkipConsumed(self) < 0)
+        return -1;
+
+    if (n == READ_WHOLE_LINE) {
+        data = PyObject_CallNoArgs(self->readline);
+        if (data == NULL) {
+            return -1;
+        }
+    }
+    else {
+        PyObject *len;
+        /* Prefetch some data without advancing the file pointer, if possible */
+        if (self->peek && n < PREFETCH) {
+            len = PyLong_FromSsize_t(PREFETCH);
+            if (len == NULL)
+                return -1;
+            data = _Pickle_FastCall(self->peek, len);
+            if (data == NULL) {
+                if (!PyErr_ExceptionMatches(PyExc_NotImplementedError))
+                    return -1;
+                /* peek() is probably not supported by the given file object */
+                PyErr_Clear();
+                Py_CLEAR(self->peek);
+            }
+            else {
+                read_size = _Unpickler_SetStringInput(self, data);
+                Py_DECREF(data);
+                if (read_size < 0) {
+                    return -1;
+                }
+
+                self->prefetched_idx = 0;
+                if (n <= read_size)
+                    return n;
+            }
+        }
+        Py_ssize_t cursize = Py_MIN(n, MIN_READ_BUF_SIZE);
+        len = PyLong_FromSsize_t(cursize);
+        if (len == NULL)
+            return -1;
+        data = _Pickle_FastCall(self->read, len);
+        if (data == NULL) {
+            return -1;
+        }
+        while (cursize < n) {
+            Py_ssize_t prevsize = cursize;
+            // geometrically double the chunk size to avoid CPU DoS
+            cursize += Py_MIN(cursize, n - cursize);
+            if (_PyBytes_Resize(&data, cursize) < 0) {
+                return -1;
+            }
+            if (_Unpickler_ReadIntoFromFile(state, self,
+                    PyBytes_AS_STRING(data) + prevsize, cursize - prevsize) < 0)
+            {
+                Py_DECREF(data);
+                return -1;
+            }
+        }
+    }
+
+    read_size = _Unpickler_SetStringInput(self, data);
+    Py_DECREF(data);
+    return read_size;
+}
+
+/* Don't call it directly: use _Unpickler_Read() */
+static Py_ssize_t
+_Unpickler_ReadImpl(UnpicklerObject *self, PickleState *st, char **s, Py_ssize_t n)
+{
+    Py_ssize_t num_read;
+
+    *s = NULL;
+    if (self->next_read_idx > PY_SSIZE_T_MAX - n) {
+        PyErr_SetString(st->UnpicklingError,
+                        "read would overflow (invalid bytecode)");
+        return -1;
+    }
+
+    /* This case is handled by the _Unpickler_Read() macro for efficiency */
+    assert(self->next_read_idx + n > self->input_len);
+
+    if (!self->read)
+        return bad_readline(st);
+
+    /* Extend the buffer to satisfy desired size */
+    num_read = _Unpickler_ReadFromFile(st, self, n);
+    if (num_read < 0)
+        return -1;
+    if (num_read < n)
+        return bad_readline(st);
+    *s = self->input_buffer;
+    self->next_read_idx = n;
+    return n;
+}
+
+/* Read `n` bytes from the unpickler's data source, storing the result in `buf`.
+ *
+ * This should only be used for non-small data reads where potentially
+ * avoiding a copy is beneficial.  This method does not try to prefetch
+ * more data into the input buffer.
+ *
+ * _Unpickler_Read() is recommended in most cases.
+ */
+static Py_ssize_t
+_Unpickler_ReadInto(PickleState *state, UnpicklerObject *self, char *buf,
+                    Py_ssize_t n)
+{
+    assert(n != READ_WHOLE_LINE);
+
+    /* Read from available buffer data, if any */
+    Py_ssize_t in_buffer = self->input_len - self->next_read_idx;
+    if (in_buffer > 0) {
+        Py_ssize_t to_read = Py_MIN(in_buffer, n);
+        memcpy(buf, self->input_buffer + self->next_read_idx, to_read);
+        self->next_read_idx += to_read;
+        buf += to_read;
+        n -= to_read;
+        if (n == 0) {
+            /* Entire read was satisfied from buffer */
+            return n;
+        }
+    }
+
+    /* Read from file */
+    if (!self->read) {
+        /* We're unpickling memory, this means the input is truncated */
+        return bad_readline(state);
+    }
+    if (_Unpickler_SkipConsumed(self) < 0) {
+        return -1;
+    }
+
+    return _Unpickler_ReadIntoFromFile(state, self, buf, n);
+}
+
 /* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
 
    This should be used for all data reads, rather than accessing the unpickler's
@@ -1492,7 +1525,7 @@ _Unpickler_Readline(PickleState *state, UnpicklerObject *self, char **result)
     if (!self->read)
         return bad_readline(state);
 
-    num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
+    num_read = _Unpickler_ReadFromFile(state, self, READ_WHOLE_LINE);
     if (num_read < 0)
         return -1;
     if (num_read == 0 || self->input_buffer[num_read - 1] != '\n')
@@ -1525,12 +1558,35 @@ _Unpickler_ResizeMemoList(UnpicklerObject *self, size_t new_size)
 
 /* Returns NULL if idx is out of bounds. */
 static PyObject *
-_Unpickler_MemoGet(UnpicklerObject *self, size_t idx)
+_Unpickler_MemoGet(PickleState *st, UnpicklerObject *self, size_t idx)
 {
-    if (idx >= self->memo_size)
-        return NULL;
-
-    return self->memo[idx];
+    PyObject *value;
+    if (idx < self->memo_size) {
+        value = self->memo[idx];
+        if (value != NULL) {
+            return value;
+        }
+    }
+    if (self->memo_dict != NULL) {
+        PyObject *key = PyLong_FromSize_t(idx);
+        if (key == NULL) {
+            return NULL;
+        }
+        if (idx < self->memo_size) {
+            (void)PyDict_Pop(self->memo_dict, key, &value);
+            // Migrate dict entry to array for faster future access
+            self->memo[idx] = value;
+        }
+        else {
+            value = PyDict_GetItemWithError(self->memo_dict, key);
+        }
+        Py_DECREF(key);
+        if (value != NULL || PyErr_Occurred()) {
+            return value;
+        }
+    }
+    PyErr_Format(st->UnpicklingError, "Memo value not found at index %zd", idx);
+    return NULL;
 }
 
 /* Returns -1 (with an exception set) on failure, 0 on success.
@@ -1541,6 +1597,27 @@ _Unpickler_MemoPut(UnpicklerObject *self, size_t idx, PyObject *value)
     PyObject *old_item;
 
     if (idx >= self->memo_size) {
+        if (idx > self->memo_len * 2) {
+            /* The memo keys are too sparse. Use a dict instead of
+             * a continuous array for the memo. */
+            if (self->memo_dict == NULL) {
+                self->memo_dict = PyDict_New();
+                if (self->memo_dict == NULL) {
+                    return -1;
+                }
+            }
+            PyObject *key = PyLong_FromSize_t(idx);
+            if (key == NULL) {
+                return -1;
+            }
+
+            if (PyDict_SetItem(self->memo_dict, key, value) < 0) {
+                Py_DECREF(key);
+                return -1;
+            }
+            Py_DECREF(key);
+            return 0;
+        }
         if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
             return -1;
         assert(idx < self->memo_size);
@@ -1610,6 +1687,7 @@ _Unpickler_New(PyObject *module)
     self->memo = memo;
     self->memo_size = MEMO_SIZE;
     self->memo_len = 0;
+    self->memo_dict = NULL;
     self->persistent_load = NULL;
     self->persistent_load_attr = NULL;
     memset(&self->buffer, 0, sizeof(Py_buffer));
@@ -5582,13 +5660,28 @@ load_counted_binbytes(PickleState *state, UnpicklerObject *self, int nbytes)
         return -1;
     }
 
-    bytes = PyBytes_FromStringAndSize(NULL, size);
-    if (bytes == NULL)
-        return -1;
-    if (_Unpickler_ReadInto(state, self, PyBytes_AS_STRING(bytes), size) < 0) {
-        Py_DECREF(bytes);
+    Py_ssize_t cursize = Py_MIN(size, MIN_READ_BUF_SIZE);
+    Py_ssize_t prevsize = 0;
+    bytes = PyBytes_FromStringAndSize(NULL, cursize);
+    if (bytes == NULL) {
         return -1;
     }
+    while (1) {
+        if (_Unpickler_ReadInto(state, self,
+                PyBytes_AS_STRING(bytes) + prevsize, cursize - prevsize) < 0)
+        {
+            Py_DECREF(bytes);
+            return -1;
+        }
+        if (cursize >= size) {
+            break;
+        }
+        prevsize = cursize;
+        cursize += Py_MIN(cursize, size - cursize);
+        if (_PyBytes_Resize(&bytes, cursize) < 0) {
+            return -1;
+        }
+    }
 
     PDATA_PUSH(self->stack, bytes, -1);
     return 0;
@@ -5613,14 +5706,27 @@ load_counted_bytearray(PickleState *state, UnpicklerObject *self)
         return -1;
     }
 
-    bytearray = PyByteArray_FromStringAndSize(NULL, size);
+    Py_ssize_t cursize = Py_MIN(size, MIN_READ_BUF_SIZE);
+    Py_ssize_t prevsize = 0;
+    bytearray = PyByteArray_FromStringAndSize(NULL, cursize);
     if (bytearray == NULL) {
         return -1;
     }
-    char *str = PyByteArray_AS_STRING(bytearray);
-    if (_Unpickler_ReadInto(state, self, str, size) < 0) {
-        Py_DECREF(bytearray);
-        return -1;
+    while (1) {
+        if (_Unpickler_ReadInto(state, self,
+                PyByteArray_AS_STRING(bytearray) + prevsize,
+                cursize - prevsize) < 0) {
+            Py_DECREF(bytearray);
+            return -1;
+        }
+        if (cursize >= size) {
+            break;
+        }
+        prevsize = cursize;
+        cursize += Py_MIN(cursize, size - cursize);
+        if (PyByteArray_Resize(bytearray, cursize) < 0) {
+            return -1;
+        }
     }
 
     PDATA_PUSH(self->stack, bytearray, -1);
@@ -6222,20 +6328,15 @@ load_get(PickleState *st, UnpicklerObject *self)
     if (key == NULL)
         return -1;
     idx = PyLong_AsSsize_t(key);
+    Py_DECREF(key);
     if (idx == -1 && PyErr_Occurred()) {
-        Py_DECREF(key);
         return -1;
     }
 
-    value = _Unpickler_MemoGet(self, idx);
+    value = _Unpickler_MemoGet(st, self, idx);
     if (value == NULL) {
-        if (!PyErr_Occurred()) {
-           PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
-        }
-        Py_DECREF(key);
         return -1;
     }
-    Py_DECREF(key);
 
     PDATA_APPEND(self->stack, value, -1);
     return 0;
@@ -6253,13 +6354,8 @@ load_binget(PickleState *st, UnpicklerObject *self)
 
     idx = Py_CHARMASK(s[0]);
 
-    value = _Unpickler_MemoGet(self, idx);
+    value = _Unpickler_MemoGet(st, self, idx);
     if (value == NULL) {
-        PyObject *key = PyLong_FromSsize_t(idx);
-        if (key != NULL) {
-            PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
-            Py_DECREF(key);
-        }
         return -1;
     }
 
@@ -6279,13 +6375,8 @@ load_long_binget(PickleState *st, UnpicklerObject *self)
 
     idx = calc_binsize(s, 4);
 
-    value = _Unpickler_MemoGet(self, idx);
+    value = _Unpickler_MemoGet(st, self, idx);
     if (value == NULL) {
-        PyObject *key = PyLong_FromSsize_t(idx);
-        if (key != NULL) {
-            PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
-            Py_DECREF(key);
-        }
         return -1;
     }
 
@@ -7250,6 +7341,7 @@ Unpickler_clear(PyObject *op)
         self->buffer.buf = NULL;
     }
 
+    Py_CLEAR(self->memo_dict);
     _Unpickler_MemoCleanup(self);
     PyMem_Free(self->marks);
     self->marks = NULL;
@@ -7286,6 +7378,7 @@ Unpickler_traverse(PyObject *op, visitproc visit, void *arg)
     Py_VISIT(self->persistent_load);
     Py_VISIT(self->persistent_load_attr);
     Py_VISIT(self->buffers);
+    Py_VISIT(self->memo_dict);
     PyObject **memo = self->memo;
     if (memo) {
         Py_ssize_t i = self->memo_size;
diff --git a/Modules/_posixsubprocess.c b/Modules/_posixsubprocess.c
index 0219a3360fd..6f0a6d1d4e3 100644
--- a/Modules/_posixsubprocess.c
+++ b/Modules/_posixsubprocess.c
@@ -514,7 +514,13 @@ _close_open_fds_maybe_unsafe(int start_fd, int *fds_to_keep,
         proc_fd_dir = NULL;
     else
 #endif
+#if defined(_AIX)
+        char fd_path[PATH_MAX];
+        snprintf(fd_path, sizeof(fd_path), "/proc/%ld/fd", (long)getpid());
+        proc_fd_dir = opendir(fd_path);
+#else
         proc_fd_dir = opendir(FD_DIR);
+#endif
     if (!proc_fd_dir) {
         /* No way to get a list of open fds. */
         _close_range_except(start_fd, -1, fds_to_keep, fds_to_keep_len,
diff --git a/Modules/_remote_debugging/_remote_debugging.h b/Modules/_remote_debugging/_remote_debugging.h
new file mode 100644
index 00000000000..7f3c0d363f5
--- /dev/null
+++ b/Modules/_remote_debugging/_remote_debugging.h
@@ -0,0 +1,574 @@
+/******************************************************************************
+ * Python Remote Debugging Module - Shared Header
+ *
+ * This header provides common declarations, types, and utilities shared
+ * across the remote debugging module implementation files.
+ ******************************************************************************/
+
+#ifndef Py_REMOTE_DEBUGGING_H
+#define Py_REMOTE_DEBUGGING_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define _GNU_SOURCE
+
+#ifndef Py_BUILD_CORE_BUILTIN
+#    define Py_BUILD_CORE_MODULE 1
+#endif
+
+#include "Python.h"
+#include <internal/pycore_debug_offsets.h>  // _Py_DebugOffsets
+#include <internal/pycore_frame.h>          // FRAME_SUSPENDED_YIELD_FROM
+#include <internal/pycore_interpframe.h>    // FRAME_OWNED_BY_INTERPRETER
+#include <internal/pycore_llist.h>          // struct llist_node
+#include <internal/pycore_long.h>           // _PyLong_GetZero
+#include <internal/pycore_stackref.h>       // Py_TAG_BITS
+#include "../../Python/remote_debug.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifndef HAVE_PROCESS_VM_READV
+#    define HAVE_PROCESS_VM_READV 0
+#endif
+
+#if defined(__APPLE__) && TARGET_OS_OSX
+#include <libproc.h>
+#include <sys/types.h>
+#define MAX_NATIVE_THREADS 4096
+#endif
+
+#ifdef MS_WINDOWS
+#include <windows.h>
+#include <winternl.h>
+#define STATUS_SUCCESS                   ((NTSTATUS)0x00000000L)
+#define STATUS_INFO_LENGTH_MISMATCH      ((NTSTATUS)0xC0000004L)
+typedef enum _WIN32_THREADSTATE {
+    WIN32_THREADSTATE_INITIALIZED = 0,
+    WIN32_THREADSTATE_READY       = 1,
+    WIN32_THREADSTATE_RUNNING     = 2,
+    WIN32_THREADSTATE_STANDBY     = 3,
+    WIN32_THREADSTATE_TERMINATED  = 4,
+    WIN32_THREADSTATE_WAITING     = 5,
+    WIN32_THREADSTATE_TRANSITION  = 6,
+    WIN32_THREADSTATE_UNKNOWN     = 7
+} WIN32_THREADSTATE;
+#endif
+
+/* ============================================================================
+ * MACROS AND CONSTANTS
+ * ============================================================================ */
+
+#define GET_MEMBER(type, obj, offset) (*(type*)((char*)(obj) + (offset)))
+#define CLEAR_PTR_TAG(ptr) (((uintptr_t)(ptr) & ~Py_TAG_BITS))
+#define GET_MEMBER_NO_TAG(type, obj, offset) (type)(CLEAR_PTR_TAG(*(type*)((char*)(obj) + (offset))))
+
+/* Size macros for opaque buffers */
+#define SIZEOF_BYTES_OBJ sizeof(PyBytesObject)
+#define SIZEOF_CODE_OBJ sizeof(PyCodeObject)
+#define SIZEOF_GEN_OBJ sizeof(PyGenObject)
+#define SIZEOF_INTERP_FRAME sizeof(_PyInterpreterFrame)
+#define SIZEOF_LLIST_NODE sizeof(struct llist_node)
+#define SIZEOF_PAGE_CACHE_ENTRY sizeof(page_cache_entry_t)
+#define SIZEOF_PYOBJECT sizeof(PyObject)
+#define SIZEOF_SET_OBJ sizeof(PySetObject)
+#define SIZEOF_TASK_OBJ 4096
+#define SIZEOF_THREAD_STATE sizeof(PyThreadState)
+#define SIZEOF_TYPE_OBJ sizeof(PyTypeObject)
+#define SIZEOF_UNICODE_OBJ sizeof(PyUnicodeObject)
+#define SIZEOF_LONG_OBJ sizeof(PyLongObject)
+#define SIZEOF_GC_RUNTIME_STATE sizeof(struct _gc_runtime_state)
+#define SIZEOF_INTERPRETER_STATE sizeof(PyInterpreterState)
+
+#ifndef MAX
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#endif
+
+#ifdef Py_GIL_DISABLED
+#define INTERP_STATE_MIN_SIZE MAX(MAX(MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \
+                                              offsetof(PyInterpreterState, tlbc_indices.tlbc_generation) + sizeof(uint32_t)), \
+                                          offsetof(PyInterpreterState, threads.head) + sizeof(void*)), \
+                                      offsetof(PyInterpreterState, _gil.last_holder) + sizeof(PyThreadState*)), \
+                                  offsetof(PyInterpreterState, gc.frame) + sizeof(_PyInterpreterFrame *))
+#else
+#define INTERP_STATE_MIN_SIZE MAX(MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \
+                                          offsetof(PyInterpreterState, threads.head) + sizeof(void*)), \
+                                      offsetof(PyInterpreterState, _gil.last_holder) + sizeof(PyThreadState*)), \
+                                  offsetof(PyInterpreterState, gc.frame) + sizeof(_PyInterpreterFrame *))
+#endif
+#define INTERP_STATE_BUFFER_SIZE MAX(INTERP_STATE_MIN_SIZE, 256)
+
+#define MAX_TLBC_SIZE 2048
+
+/* Thread status flags */
+#define THREAD_STATUS_HAS_GIL        (1 << 0)
+#define THREAD_STATUS_ON_CPU         (1 << 1)
+#define THREAD_STATUS_UNKNOWN        (1 << 2)
+#define THREAD_STATUS_GIL_REQUESTED  (1 << 3)
+
+/* Exception cause macro */
+#define set_exception_cause(unwinder, exc_type, message) \
+    if (unwinder->debug) { \
+        _set_debug_exception_cause(exc_type, message); \
+    }
+
+/* ============================================================================
+ * TYPE DEFINITIONS
+ * ============================================================================ */
+
+struct _Py_AsyncioModuleDebugOffsets {
+    struct _asyncio_task_object {
+        uint64_t size;
+        uint64_t task_name;
+        uint64_t task_awaited_by;
+        uint64_t task_is_task;
+        uint64_t task_awaited_by_is_set;
+        uint64_t task_coro;
+        uint64_t task_node;
+    } asyncio_task_object;
+    struct _asyncio_interpreter_state {
+        uint64_t size;
+        uint64_t asyncio_tasks_head;
+    } asyncio_interpreter_state;
+    struct _asyncio_thread_state {
+        uint64_t size;
+        uint64_t asyncio_running_loop;
+        uint64_t asyncio_running_task;
+        uint64_t asyncio_tasks_head;
+    } asyncio_thread_state;
+};
+
+typedef struct {
+    PyObject *func_name;
+    PyObject *file_name;
+    int first_lineno;
+    PyObject *linetable;  // bytes
+    uintptr_t addr_code_adaptive;
+} CachedCodeMetadata;
+
+/* Frame cache constants and types */
+#define FRAME_CACHE_MAX_THREADS 32
+#define FRAME_CACHE_MAX_FRAMES 1024
+
+typedef struct {
+    uint64_t thread_id;                      // 0 = empty slot
+    uintptr_t addrs[FRAME_CACHE_MAX_FRAMES];
+    Py_ssize_t num_addrs;
+    PyObject *frame_list;                    // owned reference, NULL if empty
+} FrameCacheEntry;
+
+/* Statistics for profiling performance analysis */
+typedef struct {
+    uint64_t total_samples;                  // Total number of get_stack_trace calls
+    uint64_t frame_cache_hits;               // Full cache hits (entire stack unchanged)
+    uint64_t frame_cache_misses;             // Cache misses requiring full walk
+    uint64_t frame_cache_partial_hits;       // Partial hits (stopped at cached frame)
+    uint64_t frames_read_from_cache;         // Total frames retrieved from cache
+    uint64_t frames_read_from_memory;        // Total frames read from remote memory
+    uint64_t memory_reads;                   // Total remote memory read operations
+    uint64_t memory_bytes_read;              // Total bytes read from remote memory
+    uint64_t code_object_cache_hits;         // Code object cache hits
+    uint64_t code_object_cache_misses;       // Code object cache misses
+    uint64_t stale_cache_invalidations;      // Times stale entries were cleared
+} UnwinderStats;
+
+/* Stats tracking macros - no-op when stats collection is disabled */
+#define STATS_INC(unwinder, field) \
+    do { if ((unwinder)->collect_stats) (unwinder)->stats.field++; } while(0)
+
+#define STATS_ADD(unwinder, field, val) \
+    do { if ((unwinder)->collect_stats) (unwinder)->stats.field += (val); } while(0)
+
+typedef struct {
+    PyTypeObject *RemoteDebugging_Type;
+    PyTypeObject *TaskInfo_Type;
+    PyTypeObject *FrameInfo_Type;
+    PyTypeObject *CoroInfo_Type;
+    PyTypeObject *ThreadInfo_Type;
+    PyTypeObject *InterpreterInfo_Type;
+    PyTypeObject *AwaitedInfo_Type;
+} RemoteDebuggingState;
+
+enum _ThreadState {
+    THREAD_STATE_RUNNING,
+    THREAD_STATE_IDLE,
+    THREAD_STATE_GIL_WAIT,
+    THREAD_STATE_UNKNOWN
+};
+
+enum _ProfilingMode {
+    PROFILING_MODE_WALL = 0,
+    PROFILING_MODE_CPU = 1,
+    PROFILING_MODE_GIL = 2,
+    PROFILING_MODE_ALL = 3
+};
+
+typedef struct {
+    PyObject_HEAD
+    proc_handle_t handle;
+    uintptr_t runtime_start_address;
+    struct _Py_DebugOffsets debug_offsets;
+    int async_debug_offsets_available;
+    struct _Py_AsyncioModuleDebugOffsets async_debug_offsets;
+    uintptr_t interpreter_addr;
+    uintptr_t tstate_addr;
+    uint64_t code_object_generation;
+    _Py_hashtable_t *code_object_cache;
+    int debug;
+    int only_active_thread;
+    int mode;
+    int skip_non_matching_threads;
+    int native;
+    int gc;
+    int cache_frames;
+    int collect_stats;  // whether to collect statistics
+    uint32_t stale_invalidation_counter;  // counter for throttling frame_cache_invalidate_stale
+    RemoteDebuggingState *cached_state;
+    FrameCacheEntry *frame_cache;  // preallocated array of FRAME_CACHE_MAX_THREADS entries
+    UnwinderStats stats;  // statistics for performance analysis
+#ifdef Py_GIL_DISABLED
+    uint32_t tlbc_generation;
+    _Py_hashtable_t *tlbc_cache;
+#endif
+#ifdef __APPLE__
+    uint64_t thread_id_offset;
+#endif
+#ifdef MS_WINDOWS
+    PVOID win_process_buffer;
+    ULONG win_process_buffer_size;
+#endif
+} RemoteUnwinderObject;
+
+#define RemoteUnwinder_CAST(op) ((RemoteUnwinderObject *)(op))
+
+typedef struct {
+    int lineno;
+    int end_lineno;
+    int column;
+    int end_column;
+} LocationInfo;
+
+typedef struct {
+    uintptr_t remote_addr;
+    size_t size;
+    void *local_copy;
+} StackChunkInfo;
+
+typedef struct {
+    StackChunkInfo *chunks;
+    size_t count;
+} StackChunkList;
+
+/* Function pointer types for iteration callbacks */
+typedef int (*thread_processor_func)(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t thread_state_addr,
+    unsigned long tid,
+    void *context
+);
+
+typedef int (*set_entry_processor_func)(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t key_addr,
+    void *context
+);
+
+/* ============================================================================
+ * STRUCTSEQ DESCRIPTORS (extern declarations)
+ * ============================================================================ */
+
+extern PyStructSequence_Desc TaskInfo_desc;
+extern PyStructSequence_Desc FrameInfo_desc;
+extern PyStructSequence_Desc CoroInfo_desc;
+extern PyStructSequence_Desc ThreadInfo_desc;
+extern PyStructSequence_Desc InterpreterInfo_desc;
+extern PyStructSequence_Desc AwaitedInfo_desc;
+
+/* ============================================================================
+ * UTILITY FUNCTION DECLARATIONS
+ * ============================================================================ */
+
+/* State access functions */
+extern RemoteDebuggingState *RemoteDebugging_GetState(PyObject *module);
+extern RemoteDebuggingState *RemoteDebugging_GetStateFromType(PyTypeObject *type);
+extern RemoteDebuggingState *RemoteDebugging_GetStateFromObject(PyObject *obj);
+extern int RemoteDebugging_InitState(RemoteDebuggingState *st);
+
+/* Cache management */
+extern void cached_code_metadata_destroy(void *ptr);
+
+/* Validation */
+extern int is_prerelease_version(uint64_t version);
+extern int validate_debug_offsets(struct _Py_DebugOffsets *debug_offsets);
+
+/* ============================================================================
+ * MEMORY READING FUNCTION DECLARATIONS
+ * ============================================================================ */
+
+extern int read_ptr(RemoteUnwinderObject *unwinder, uintptr_t address, uintptr_t *result);
+extern int read_Py_ssize_t(RemoteUnwinderObject *unwinder, uintptr_t address, Py_ssize_t *result);
+extern int read_char(RemoteUnwinderObject *unwinder, uintptr_t address, char *result);
+extern int read_py_ptr(RemoteUnwinderObject *unwinder, uintptr_t address, uintptr_t *ptr_addr);
+
+/* Python object reading */
+extern PyObject *read_py_str(RemoteUnwinderObject *unwinder, uintptr_t address, Py_ssize_t max_len);
+extern PyObject *read_py_bytes(RemoteUnwinderObject *unwinder, uintptr_t address, Py_ssize_t max_len);
+extern long read_py_long(RemoteUnwinderObject *unwinder, uintptr_t address);
+
+/* ============================================================================
+ * CODE OBJECT FUNCTION DECLARATIONS
+ * ============================================================================ */
+
+extern int parse_code_object(
+    RemoteUnwinderObject *unwinder,
+    PyObject **result,
+    uintptr_t address,
+    uintptr_t instruction_pointer,
+    uintptr_t *previous_frame,
+    int32_t tlbc_index
+);
+
+extern PyObject *make_frame_info(
+    RemoteUnwinderObject *unwinder,
+    PyObject *file,
+    PyObject *line,
+    PyObject *func
+);
+
+/* Line table parsing */
+extern bool parse_linetable(
+    const uintptr_t addrq,
+    const char* linetable,
+    int firstlineno,
+    LocationInfo* info
+);
+
+/* TLBC cache (only for Py_GIL_DISABLED) */
+#ifdef Py_GIL_DISABLED
+typedef struct {
+    void *tlbc_array;
+    Py_ssize_t tlbc_array_size;
+    uint32_t generation;
+} TLBCCacheEntry;
+
+extern void tlbc_cache_entry_destroy(void *ptr);
+extern TLBCCacheEntry *get_tlbc_cache_entry(RemoteUnwinderObject *self, uintptr_t code_addr, uint32_t current_generation);
+extern int cache_tlbc_array(RemoteUnwinderObject *unwinder, uintptr_t code_addr, uintptr_t tlbc_array_addr, uint32_t generation);
+#endif
+
+/* ============================================================================
+ * FRAME FUNCTION DECLARATIONS
+ * ============================================================================ */
+
+extern int is_frame_valid(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t frame_addr,
+    uintptr_t code_object_addr
+);
+
+extern int parse_frame_object(
+    RemoteUnwinderObject *unwinder,
+    PyObject** result,
+    uintptr_t address,
+    uintptr_t* address_of_code_object,
+    uintptr_t* previous_frame
+);
+
+extern int parse_frame_from_chunks(
+    RemoteUnwinderObject *unwinder,
+    PyObject **result,
+    uintptr_t address,
+    uintptr_t *previous_frame,
+    uintptr_t *stackpointer,
+    StackChunkList *chunks
+);
+
+/* Stack chunk management */
+extern void cleanup_stack_chunks(StackChunkList *chunks);
+extern int copy_stack_chunks(RemoteUnwinderObject *unwinder, uintptr_t tstate_addr, StackChunkList *out_chunks);
+extern void *find_frame_in_chunks(StackChunkList *chunks, uintptr_t remote_ptr);
+
+extern int process_frame_chain(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t initial_frame_addr,
+    StackChunkList *chunks,
+    PyObject *frame_info,
+    uintptr_t base_frame_addr,
+    uintptr_t gc_frame,
+    uintptr_t last_profiled_frame,
+    int *stopped_at_cached_frame,
+    uintptr_t *frame_addrs,
+    Py_ssize_t *num_addrs,
+    Py_ssize_t max_addrs
+);
+
+/* Frame cache functions */
+extern int frame_cache_init(RemoteUnwinderObject *unwinder);
+extern void frame_cache_cleanup(RemoteUnwinderObject *unwinder);
+extern FrameCacheEntry *frame_cache_find(RemoteUnwinderObject *unwinder, uint64_t thread_id);
+extern int clear_last_profiled_frames(RemoteUnwinderObject *unwinder);
+extern void frame_cache_invalidate_stale(RemoteUnwinderObject *unwinder, PyObject *result);
+extern int frame_cache_lookup_and_extend(
+    RemoteUnwinderObject *unwinder,
+    uint64_t thread_id,
+    uintptr_t last_profiled_frame,
+    PyObject *frame_info,
+    uintptr_t *frame_addrs,
+    Py_ssize_t *num_addrs,
+    Py_ssize_t max_addrs);
+// Returns: 1 = stored, 0 = not stored (graceful), -1 = error
+extern int frame_cache_store(
+    RemoteUnwinderObject *unwinder,
+    uint64_t thread_id,
+    PyObject *frame_list,
+    const uintptr_t *addrs,
+    Py_ssize_t num_addrs);
+
+extern int collect_frames_with_cache(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t frame_addr,
+    StackChunkList *chunks,
+    PyObject *frame_info,
+    uintptr_t gc_frame,
+    uintptr_t last_profiled_frame,
+    uint64_t thread_id);
+
+/* ============================================================================
+ * THREAD FUNCTION DECLARATIONS
+ * ============================================================================ */
+
+extern int iterate_threads(
+    RemoteUnwinderObject *unwinder,
+    thread_processor_func processor,
+    void *context
+);
+
+extern int populate_initial_state_data(
+    int all_threads,
+    RemoteUnwinderObject *unwinder,
+    uintptr_t runtime_start_address,
+    uintptr_t *interpreter_state,
+    uintptr_t *tstate
+);
+
+extern int find_running_frame(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t address_of_thread,
+    uintptr_t *frame
+);
+
+extern int get_thread_status(RemoteUnwinderObject *unwinder, uint64_t tid, uint64_t pthread_id);
+
+extern PyObject* unwind_stack_for_thread(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t *current_tstate,
+    uintptr_t gil_holder_tstate,
+    uintptr_t gc_frame
+);
+
+/* ============================================================================
+ * ASYNCIO FUNCTION DECLARATIONS
+ * ============================================================================ */
+
+extern uintptr_t _Py_RemoteDebug_GetAsyncioDebugAddress(proc_handle_t* handle);
+extern int read_async_debug(RemoteUnwinderObject *unwinder);
+extern int ensure_async_debug_offsets(RemoteUnwinderObject *unwinder);
+
+/* Task parsing */
+extern PyObject *parse_task_name(RemoteUnwinderObject *unwinder, uintptr_t task_address);
+
+extern int parse_task(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t task_address,
+    PyObject *render_to
+);
+
+extern int parse_coro_chain(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t coro_address,
+    PyObject *render_to
+);
+
+extern int parse_async_frame_chain(
+    RemoteUnwinderObject *unwinder,
+    PyObject *calls,
+    uintptr_t address_of_thread,
+    uintptr_t running_task_code_obj
+);
+
+/* Set iteration */
+extern int iterate_set_entries(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t set_addr,
+    set_entry_processor_func processor,
+    void *context
+);
+
+/* Task awaited_by processing */
+extern int process_task_awaited_by(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t task_address,
+    set_entry_processor_func processor,
+    void *context
+);
+
+extern int process_single_task_node(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t task_addr,
+    PyObject **task_info,
+    PyObject *result
+);
+
+extern int process_task_and_waiters(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t task_addr,
+    PyObject *result
+);
+
+extern int find_running_task_in_thread(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t thread_state_addr,
+    uintptr_t *running_task_addr
+);
+
+extern int get_task_code_object(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t task_addr,
+    uintptr_t *code_obj_addr
+);
+
+extern int append_awaited_by(
+    RemoteUnwinderObject *unwinder,
+    unsigned long tid,
+    uintptr_t head_addr,
+    PyObject *result
+);
+
+/* Thread processors for asyncio */
+extern int process_thread_for_awaited_by(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t thread_state_addr,
+    unsigned long tid,
+    void *context
+);
+
+extern int process_thread_for_async_stack_trace(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t thread_state_addr,
+    unsigned long tid,
+    void *context
+);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* Py_REMOTE_DEBUGGING_H */
diff --git a/Modules/_remote_debugging/asyncio.c b/Modules/_remote_debugging/asyncio.c
new file mode 100644
index 00000000000..7f91f16e3a2
--- /dev/null
+++ b/Modules/_remote_debugging/asyncio.c
@@ -0,0 +1,1014 @@
+/******************************************************************************
+ * Remote Debugging Module - Asyncio Functions
+ *
+ * This file contains functions for parsing asyncio tasks, coroutines,
+ * and awaited_by relationships from remote process memory.
+ ******************************************************************************/
+
+#include "_remote_debugging.h"
+
+/* ============================================================================
+ * ASYNCIO DEBUG ADDRESS FUNCTIONS
+ * ============================================================================ */
+
+uintptr_t
+_Py_RemoteDebug_GetAsyncioDebugAddress(proc_handle_t* handle)
+{
+    uintptr_t address;
+
+#ifdef MS_WINDOWS
+    // On Windows, search for asyncio debug in executable or DLL
+    address = search_windows_map_for_section(handle, "AsyncioD", L"_asyncio");
+    if (address == 0) {
+        // Error out: 'python' substring covers both executable and DLL
+        PyObject *exc = PyErr_GetRaisedException();
+        PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process.");
+        _PyErr_ChainExceptions1(exc);
+    }
+#elif defined(__linux__) && HAVE_PROCESS_VM_READV
+    // On Linux, search for asyncio debug in executable or DLL
+    address = search_linux_map_for_section(handle, "AsyncioDebug", "python");
+    if (address == 0) {
+        // Error out: 'python' substring covers both executable and DLL
+        PyObject *exc = PyErr_GetRaisedException();
+        PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process.");
+        _PyErr_ChainExceptions1(exc);
+    }
+#elif defined(__APPLE__) && TARGET_OS_OSX
+    // On macOS, try libpython first, then fall back to python
+    address = search_map_for_section(handle, "AsyncioDebug", "libpython");
+    if (address == 0) {
+        PyErr_Clear();
+        address = search_map_for_section(handle, "AsyncioDebug", "python");
+    }
+    if (address == 0) {
+        // Error out: 'python' substring covers both executable and DLL
+        PyObject *exc = PyErr_GetRaisedException();
+        PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process.");
+        _PyErr_ChainExceptions1(exc);
+    }
+#else
+    Py_UNREACHABLE();
+#endif
+
+    return address;
+}
+
+int
+read_async_debug(RemoteUnwinderObject *unwinder)
+{
+    uintptr_t async_debug_addr = _Py_RemoteDebug_GetAsyncioDebugAddress(&unwinder->handle);
+    if (!async_debug_addr) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to get AsyncioDebug address");
+        return -1;
+    }
+
+    size_t size = sizeof(struct _Py_AsyncioModuleDebugOffsets);
+    int result = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, async_debug_addr, size, &unwinder->async_debug_offsets);
+    if (result < 0) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read AsyncioDebug offsets");
+    }
+    return result;
+}
+
+int
+ensure_async_debug_offsets(RemoteUnwinderObject *unwinder)
+{
+    // If already available, nothing to do
+    if (unwinder->async_debug_offsets_available) {
+        return 0;
+    }
+
+    // Try to load async debug offsets (the target process may have
+    // loaded asyncio since we last checked)
+    if (read_async_debug(unwinder) < 0) {
+        PyErr_Clear();
+        PyErr_SetString(PyExc_RuntimeError, "AsyncioDebug section not available");
+        set_exception_cause(unwinder, PyExc_RuntimeError,
+            "AsyncioDebug section unavailable - asyncio module may not be loaded in target process");
+        return -1;
+    }
+
+    unwinder->async_debug_offsets_available = 1;
+    return 0;
+}
+
+/* ============================================================================
+ * SET ITERATION FUNCTIONS
+ * ============================================================================ */
+
+int
+iterate_set_entries(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t set_addr,
+    set_entry_processor_func processor,
+    void *context
+) {
+    char set_object[SIZEOF_SET_OBJ];
+    if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, set_addr,
+                                              SIZEOF_SET_OBJ, set_object) < 0) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read set object");
+        return -1;
+    }
+
+    Py_ssize_t num_els = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.used);
+    Py_ssize_t set_len = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.mask) + 1;
+    uintptr_t table_ptr = GET_MEMBER(uintptr_t, set_object, unwinder->debug_offsets.set_object.table);
+
+    Py_ssize_t i = 0;
+    Py_ssize_t els = 0;
+    while (i < set_len && els < num_els) {
+        uintptr_t key_addr;
+        if (read_py_ptr(unwinder, table_ptr, &key_addr) < 0) {
+            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read set entry key");
+            return -1;
+        }
+
+        if ((void*)key_addr != NULL) {
+            Py_ssize_t ref_cnt;
+            if (read_Py_ssize_t(unwinder, table_ptr, &ref_cnt) < 0) {
+                set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read set entry ref count");
+                return -1;
+            }
+
+            if (ref_cnt) {
+                // Process this valid set entry
+                if (processor(unwinder, key_addr, context) < 0) {
+                    return -1;
+                }
+                els++;
+            }
+        }
+        table_ptr += sizeof(void*) * 2;
+        i++;
+    }
+
+    return 0;
+}
+
+/* ============================================================================
+ * TASK NAME PARSING
+ * ============================================================================ */
+
+PyObject *
+parse_task_name(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t task_address
+) {
+    // Read the entire TaskObj at once
+    char task_obj[SIZEOF_TASK_OBJ];
+    int err = _Py_RemoteDebug_PagedReadRemoteMemory(
+        &unwinder->handle,
+        task_address,
+        (size_t)unwinder->async_debug_offsets.asyncio_task_object.size,
+        task_obj);
+    if (err < 0) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task object");
+        return NULL;
+    }
+
+    uintptr_t task_name_addr = GET_MEMBER_NO_TAG(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_name);
+
+    // The task name can be a long or a string so we need to check the type
+    char task_name_obj[SIZEOF_PYOBJECT];
+    err = _Py_RemoteDebug_PagedReadRemoteMemory(
+        &unwinder->handle,
+        task_name_addr,
+        SIZEOF_PYOBJECT,
+        task_name_obj);
+    if (err < 0) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task name object");
+        return NULL;
+    }
+
+    // Now read the type object to get the flags
+    char type_obj[SIZEOF_TYPE_OBJ];
+    err = _Py_RemoteDebug_PagedReadRemoteMemory(
+        &unwinder->handle,
+        GET_MEMBER(uintptr_t, task_name_obj, unwinder->debug_offsets.pyobject.ob_type),
+        SIZEOF_TYPE_OBJ,
+        type_obj);
+    if (err < 0) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task name type object");
+        return NULL;
+    }
+
+    if ((GET_MEMBER(unsigned long, type_obj, unwinder->debug_offsets.type_object.tp_flags) & Py_TPFLAGS_LONG_SUBCLASS)) {
+        long res = read_py_long(unwinder, task_name_addr);
+        if (res == -1) {
+            set_exception_cause(unwinder, PyExc_RuntimeError, "Task name PyLong parsing failed");
+            return NULL;
+        }
+        return PyUnicode_FromFormat("Task-%d", res);
+    }
+
+    if(!(GET_MEMBER(unsigned long, type_obj, unwinder->debug_offsets.type_object.tp_flags) & Py_TPFLAGS_UNICODE_SUBCLASS)) {
+        PyErr_SetString(PyExc_RuntimeError, "Invalid task name object");
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Task name object is neither long nor unicode");
+        return NULL;
+    }
+
+    return read_py_str(
+        unwinder,
+        task_name_addr,
+        255
+    );
+}
+
+/* ============================================================================
+ * COROUTINE CHAIN PARSING
+ * ============================================================================ */
+
+static int
+handle_yield_from_frame(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t gi_iframe_addr,
+    uintptr_t gen_type_addr,
+    PyObject *render_to
+) {
+    // Read the entire interpreter frame at once
+    char iframe[SIZEOF_INTERP_FRAME];
+    int err = _Py_RemoteDebug_PagedReadRemoteMemory(
+        &unwinder->handle,
+        gi_iframe_addr,
+        SIZEOF_INTERP_FRAME,
+        iframe);
+    if (err < 0) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read interpreter frame in yield_from handler");
+        return -1;
+    }
+
+    if (GET_MEMBER(char, iframe, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_GENERATOR) {
+        PyErr_SetString(
+            PyExc_RuntimeError,
+            "generator doesn't own its frame \\_o_/");
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Frame ownership mismatch in yield_from");
+        return -1;
+    }
+
+    uintptr_t stackpointer_addr = GET_MEMBER_NO_TAG(uintptr_t, iframe, unwinder->debug_offsets.interpreter_frame.stackpointer);
+
+    if ((void*)stackpointer_addr != NULL) {
+        uintptr_t gi_await_addr;
+        err = read_py_ptr(
+            unwinder,
+            stackpointer_addr - sizeof(void*),
+            &gi_await_addr);
+        if (err) {
+            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read gi_await address");
+            return -1;
+        }
+
+        if ((void*)gi_await_addr != NULL) {
+            uintptr_t gi_await_addr_type_addr;
+            err = read_ptr(
+                unwinder,
+                gi_await_addr + (uintptr_t)unwinder->debug_offsets.pyobject.ob_type,
+                &gi_await_addr_type_addr);
+            if (err) {
+                set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read gi_await type address");
+                return -1;
+            }
+
+            if (gen_type_addr == gi_await_addr_type_addr) {
+                /* This needs an explanation. We always start with parsing
+                   native coroutine / generator frames. Ultimately they
+                   are awaiting on something. That something can be
+                   a native coroutine frame or... an iterator.
+                   If it's the latter -- we can't continue building
+                   our chain. So the condition to bail out of this is
+                   to do that when the type of the current coroutine
+                   doesn't match the type of whatever it points to
+                   in its cr_await.
+                */
+                err = parse_coro_chain(unwinder, gi_await_addr, render_to);
+                if (err) {
+                    set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse coroutine chain in yield_from");
+                    return -1;
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+int
+parse_coro_chain(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t coro_address,
+    PyObject *render_to
+) {
+    assert((void*)coro_address != NULL);
+
+    // Read the entire generator object at once
+    char gen_object[SIZEOF_GEN_OBJ];
+    int err = _Py_RemoteDebug_PagedReadRemoteMemory(
+        &unwinder->handle,
+        coro_address,
+        SIZEOF_GEN_OBJ,
+        gen_object);
+    if (err < 0) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read generator object in coro chain");
+        return -1;
+    }
+
+    int8_t frame_state = GET_MEMBER(int8_t, gen_object, unwinder->debug_offsets.gen_object.gi_frame_state);
+    if (frame_state == FRAME_CLEARED) {
+        return 0;
+    }
+
+    uintptr_t gen_type_addr = GET_MEMBER(uintptr_t, gen_object, unwinder->debug_offsets.pyobject.ob_type);
+
+    PyObject* name = NULL;
+
+    // Parse the previous frame using the gi_iframe from local copy
+    uintptr_t prev_frame;
+    uintptr_t gi_iframe_addr = coro_address + (uintptr_t)unwinder->debug_offsets.gen_object.gi_iframe;
+    uintptr_t address_of_code_object = 0;
+    if (parse_frame_object(unwinder, &name, gi_iframe_addr, &address_of_code_object, &prev_frame) < 0) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse frame object in coro chain");
+        return -1;
+    }
+
+    if (!name) {
+        return 0;
+    }
+
+    if (PyList_Append(render_to, name)) {
+        Py_DECREF(name);
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append frame to coro chain");
+        return -1;
+    }
+    Py_DECREF(name);
+
+    if (frame_state == FRAME_SUSPENDED_YIELD_FROM) {
+        return handle_yield_from_frame(unwinder, gi_iframe_addr, gen_type_addr, render_to);
+    }
+
+    return 0;
+}
+
+/* ============================================================================
+ * TASK PARSING FUNCTIONS
+ * ============================================================================ */
+
+static PyObject*
+create_task_result(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t task_address
+) {
+    PyObject* result = NULL;
+    PyObject *call_stack = NULL;
+    PyObject *tn = NULL;
+    char task_obj[SIZEOF_TASK_OBJ];
+    uintptr_t coro_addr;
+
+    // Create call_stack first since it's the first tuple element
+    call_stack = PyList_New(0);
+    if (call_stack == NULL) {
+        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create call stack list");
+        goto error;
+    }
+
+    // Create task name/address for second tuple element
+    tn = PyLong_FromUnsignedLongLong(task_address);
+    if (tn == NULL) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create task name/address");
+        goto error;
+    }
+
+    // Parse coroutine chain
+    if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, task_address,
+                                              (size_t)unwinder->async_debug_offsets.asyncio_task_object.size,
+                                              task_obj) < 0) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task object for coro chain");
+        goto error;
+    }
+
+    coro_addr = GET_MEMBER_NO_TAG(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_coro);
+
+    if ((void*)coro_addr != NULL) {
+        if (parse_coro_chain(unwinder, coro_addr, call_stack) < 0) {
+            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse coroutine chain");
+            goto error;
+        }
+
+        if (PyList_Reverse(call_stack)) {
+            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to reverse call stack");
+            goto error;
+        }
+    }
+
+    // Create final CoroInfo result
+    RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder);
+    result = PyStructSequence_New(state->CoroInfo_Type);
+    if (result == NULL) {
+        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create CoroInfo");
+        goto error;
+    }
+
+    // PyStructSequence_SetItem steals references, so we don't need to DECREF on success
+    PyStructSequence_SetItem(result, 0, call_stack);  // This steals the reference
+    PyStructSequence_SetItem(result, 1, tn);  // This steals the reference
+
+    return result;
+
+error:
+    Py_XDECREF(result);
+    Py_XDECREF(call_stack);
+    Py_XDECREF(tn);
+    return NULL;
+}
+
+int
+parse_task(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t task_address,
+    PyObject *render_to
+) {
+    char is_task;
+    PyObject* result = NULL;
+    int err;
+
+    err = read_char(
+        unwinder,
+        task_address + (uintptr_t)unwinder->async_debug_offsets.asyncio_task_object.task_is_task,
+        &is_task);
+    if (err) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read is_task flag");
+        goto error;
+    }
+
+    if (is_task) {
+        result = create_task_result(unwinder, task_address);
+        if (!result) {
+            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create task result");
+            goto error;
+        }
+    } else {
+        // Create an empty CoroInfo for non-task objects
+        RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder);
+        result = PyStructSequence_New(state->CoroInfo_Type);
+        if (result == NULL) {
+            set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create empty CoroInfo");
+            goto error;
+        }
+        PyObject *empty_list = PyList_New(0);
+        if (empty_list == NULL) {
+            set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create empty list");
+            goto error;
+        }
+        PyObject *task_name = PyLong_FromUnsignedLongLong(task_address);
+        if (task_name == NULL) {
+            Py_DECREF(empty_list);
+            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create task name");
+            goto error;
+        }
+        PyStructSequence_SetItem(result, 0, empty_list);  // This steals the reference
+        PyStructSequence_SetItem(result, 1, task_name);  // This steals the reference
+    }
+    if (PyList_Append(render_to, result)) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append task result to render list");
+        goto error;
+    }
+
+    Py_DECREF(result);
+    return 0;
+
+error:
+    Py_XDECREF(result);
+    return -1;
+}
+
+/* ============================================================================
+ * TASK AWAITED_BY PROCESSING
+ * ============================================================================ */
+
+// Forward declaration for mutual recursion
+static int process_waiter_task(RemoteUnwinderObject *unwinder, uintptr_t key_addr, void *context);
+
+// Processor function for parsing tasks in sets
+static int
+process_task_parser(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t key_addr,
+    void *context
+) {
+    PyObject *awaited_by = (PyObject *)context;
+    return parse_task(unwinder, key_addr, awaited_by);
+}
+
+static int
+parse_task_awaited_by(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t task_address,
+    PyObject *awaited_by
+) {
+    return process_task_awaited_by(unwinder, task_address, process_task_parser, awaited_by);
+}
+
+int
+process_task_awaited_by(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t task_address,
+    set_entry_processor_func processor,
+    void *context
+) {
+    // Read the entire TaskObj at once
+    char task_obj[SIZEOF_TASK_OBJ];
+    if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, task_address,
+                                              (size_t)unwinder->async_debug_offsets.asyncio_task_object.size,
+                                              task_obj) < 0) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task object");
+        return -1;
+    }
+
+    uintptr_t task_ab_addr = GET_MEMBER_NO_TAG(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_awaited_by);
+    if ((void*)task_ab_addr == NULL) {
+        return 0;  // No tasks waiting for this one
+    }
+
+    char awaited_by_is_a_set = GET_MEMBER(char, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_awaited_by_is_set);
+
+    if (awaited_by_is_a_set) {
+        return iterate_set_entries(unwinder, task_ab_addr, processor, context);
+    } else {
+        // Single task waiting
+        return processor(unwinder, task_ab_addr, context);
+    }
+}
+
+int
+process_single_task_node(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t task_addr,
+    PyObject **task_info,
+    PyObject *result
+) {
+    PyObject *tn = NULL;
+    PyObject *current_awaited_by = NULL;
+    PyObject *task_id = NULL;
+    PyObject *result_item = NULL;
+    PyObject *coroutine_stack = NULL;
+
+    tn = parse_task_name(unwinder, task_addr);
+    if (tn == NULL) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse task name in single task node");
+        goto error;
+    }
+
+    current_awaited_by = PyList_New(0);
+    if (current_awaited_by == NULL) {
+        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create awaited_by list in single task node");
+        goto error;
+    }
+
+    // Extract the coroutine stack for this task
+    coroutine_stack = PyList_New(0);
+    if (coroutine_stack == NULL) {
+        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create coroutine stack list in single task node");
+        goto error;
+    }
+
+    if (parse_task(unwinder, task_addr, coroutine_stack) < 0) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse task coroutine stack in single task node");
+        goto error;
+    }
+
+    task_id = PyLong_FromUnsignedLongLong(task_addr);
+    if (task_id == NULL) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create task ID in single task node");
+        goto error;
+    }
+
+    RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder);
+    result_item = PyStructSequence_New(state->TaskInfo_Type);
+    if (result_item == NULL) {
+        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create TaskInfo in single task node");
+        goto error;
+    }
+
+    PyStructSequence_SetItem(result_item, 0, task_id);  // steals ref
+    PyStructSequence_SetItem(result_item, 1, tn);  // steals ref
+    PyStructSequence_SetItem(result_item, 2, coroutine_stack);  // steals ref
+    PyStructSequence_SetItem(result_item, 3, current_awaited_by);  // steals ref
+
+    // References transferred to tuple
+    task_id = NULL;
+    tn = NULL;
+    coroutine_stack = NULL;
+    current_awaited_by = NULL;
+
+    if (PyList_Append(result, result_item)) {
+        Py_DECREF(result_item);
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append result item in single task node");
+        return -1;
+    }
+    if (task_info != NULL) {
+        *task_info = result_item;
+    }
+    Py_DECREF(result_item);
+
+    // Get back current_awaited_by reference for parse_task_awaited_by
+    current_awaited_by = PyStructSequence_GetItem(result_item, 3);
+    if (parse_task_awaited_by(unwinder, task_addr, current_awaited_by) < 0) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse awaited_by in single task node");
+        // No cleanup needed here since all references were transferred to result_item
+        // and result_item was already added to result list and decreffed
+        return -1;
+    }
+
+    return 0;
+
+error:
+    Py_XDECREF(tn);
+    Py_XDECREF(current_awaited_by);
+    Py_XDECREF(task_id);
+    Py_XDECREF(result_item);
+    Py_XDECREF(coroutine_stack);
+    return -1;
+}
+
+int
+process_task_and_waiters(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t task_addr,
+    PyObject *result
+) {
+    // First, add this task to the result
+    if (process_single_task_node(unwinder, task_addr, NULL, result) < 0) {
+        return -1;
+    }
+
+    // Now find all tasks that are waiting for this task and process them
+    return process_task_awaited_by(unwinder, task_addr, process_waiter_task, result);
+}
+
+// Processor function for task waiters
+static int
+process_waiter_task(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t key_addr,
+    void *context
+) {
+    PyObject *result = (PyObject *)context;
+    return process_task_and_waiters(unwinder, key_addr, result);
+}
+
+/* ============================================================================
+ * RUNNING TASK FUNCTIONS
+ * ============================================================================ */
+
+int
+find_running_task_in_thread(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t thread_state_addr,
+    uintptr_t *running_task_addr
+) {
+    *running_task_addr = (uintptr_t)NULL;
+
+    uintptr_t address_of_running_loop;
+    int bytes_read = read_py_ptr(
+        unwinder,
+        thread_state_addr + (uintptr_t)unwinder->async_debug_offsets.asyncio_thread_state.asyncio_running_loop,
+        &address_of_running_loop);
+    if (bytes_read == -1) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read running loop address");
+        return -1;
+    }
+
+    // no asyncio loop is now running
+    if ((void*)address_of_running_loop == NULL) {
+        return 0;
+    }
+
+    int err = read_ptr(
+        unwinder,
+        thread_state_addr + (uintptr_t)unwinder->async_debug_offsets.asyncio_thread_state.asyncio_running_task,
+        running_task_addr);
+    if (err) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read running task address");
+        return -1;
+    }
+
+    return 0;
+}
+
+int
+get_task_code_object(RemoteUnwinderObject *unwinder, uintptr_t task_addr, uintptr_t *code_obj_addr) {
+    uintptr_t running_coro_addr = 0;
+
+    if(read_py_ptr(
+        unwinder,
+        task_addr + (uintptr_t)unwinder->async_debug_offsets.asyncio_task_object.task_coro,
+        &running_coro_addr) < 0) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Running task coro read failed");
+        return -1;
+    }
+
+    if (running_coro_addr == 0) {
+        PyErr_SetString(PyExc_RuntimeError, "Running task coro is NULL");
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Running task coro address is NULL");
+        return -1;
+    }
+
+    // note: genobject's gi_iframe is an embedded struct so the address to
+    // the offset leads directly to its first field: f_executable
+    if (read_py_ptr(
+        unwinder,
+        running_coro_addr + (uintptr_t)unwinder->debug_offsets.gen_object.gi_iframe, code_obj_addr) < 0) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read running task code object");
+        return -1;
+    }
+
+    if (*code_obj_addr == 0) {
+        PyErr_SetString(PyExc_RuntimeError, "Running task code object is NULL");
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Running task code object address is NULL");
+        return -1;
+    }
+
+    return 0;
+}
+
+/* ============================================================================
+ * ASYNC FRAME CHAIN PARSING
+ * ============================================================================ */
+
+int
+parse_async_frame_chain(
+    RemoteUnwinderObject *unwinder,
+    PyObject *calls,
+    uintptr_t address_of_thread,
+    uintptr_t running_task_code_obj
+) {
+    uintptr_t address_of_current_frame;
+    if (find_running_frame(unwinder, address_of_thread, &address_of_current_frame) < 0) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Running frame search failed in async chain");
+        return -1;
+    }
+
+    while ((void*)address_of_current_frame != NULL) {
+        PyObject* frame_info = NULL;
+        uintptr_t address_of_code_object;
+        int res = parse_frame_object(
+            unwinder,
+            &frame_info,
+            address_of_current_frame,
+            &address_of_code_object,
+            &address_of_current_frame
+        );
+
+        if (res < 0) {
+            set_exception_cause(unwinder, PyExc_RuntimeError, "Async frame object parsing failed in chain");
+            return -1;
+        }
+
+        if (!frame_info) {
+            continue;
+        }
+
+        if (PyList_Append(calls, frame_info) == -1) {
+            Py_DECREF(frame_info);
+            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append frame info to async chain");
+            return -1;
+        }
+
+        Py_DECREF(frame_info);
+
+        if (address_of_code_object == running_task_code_obj) {
+            break;
+        }
+    }
+
+    return 0;
+}
+
+/* ============================================================================
+ * AWAITED BY PARSING FUNCTIONS
+ * ============================================================================ */
+
+static int
+append_awaited_by_for_thread(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t head_addr,
+    PyObject *result
+) {
+    char task_node[SIZEOF_LLIST_NODE];
+
+    if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, head_addr,
+                                              sizeof(task_node), task_node) < 0) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task node head");
+        return -1;
+    }
+
+    size_t iteration_count = 0;
+    const size_t MAX_ITERATIONS = 2 << 15;  // A reasonable upper bound
+
+    while (GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next) != head_addr) {
+        if (++iteration_count > MAX_ITERATIONS) {
+            PyErr_SetString(PyExc_RuntimeError, "Task list appears corrupted");
+            set_exception_cause(unwinder, PyExc_RuntimeError, "Task list iteration limit exceeded");
+            return -1;
+        }
+
+        if (GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next) == 0) {
+            PyErr_SetString(PyExc_RuntimeError,
+                           "Invalid linked list structure reading remote memory");
+            set_exception_cause(unwinder, PyExc_RuntimeError, "NULL pointer in task linked list");
+            return -1;
+        }
+
+        uintptr_t task_addr = (uintptr_t)GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next)
+            - (uintptr_t)unwinder->async_debug_offsets.asyncio_task_object.task_node;
+
+        if (process_single_task_node(unwinder, task_addr, NULL, result) < 0) {
+            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process task node in awaited_by");
+            return -1;
+        }
+
+        // Read next node
+        if (_Py_RemoteDebug_PagedReadRemoteMemory(
+                &unwinder->handle,
+                (uintptr_t)GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next),
+                sizeof(task_node),
+                task_node) < 0) {
+            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read next task node in awaited_by");
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+int
+append_awaited_by(
+    RemoteUnwinderObject *unwinder,
+    unsigned long tid,
+    uintptr_t head_addr,
+    PyObject *result)
+{
+    PyObject *tid_py = PyLong_FromUnsignedLong(tid);
+    if (tid_py == NULL) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create thread ID object");
+        return -1;
+    }
+
+    PyObject* awaited_by_for_thread = PyList_New(0);
+    if (awaited_by_for_thread == NULL) {
+        Py_DECREF(tid_py);
+        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create awaited_by thread list");
+        return -1;
+    }
+
+    RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder);
+    PyObject *result_item = PyStructSequence_New(state->AwaitedInfo_Type);
+    if (result_item == NULL) {
+        Py_DECREF(tid_py);
+        Py_DECREF(awaited_by_for_thread);
+        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create AwaitedInfo");
+        return -1;
+    }
+
+    PyStructSequence_SetItem(result_item, 0, tid_py);  // steals ref
+    PyStructSequence_SetItem(result_item, 1, awaited_by_for_thread);  // steals ref
+    if (PyList_Append(result, result_item)) {
+        Py_DECREF(result_item);
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append awaited_by result item");
+        return -1;
+    }
+    Py_DECREF(result_item);
+
+    if (append_awaited_by_for_thread(unwinder, head_addr, awaited_by_for_thread))
+    {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append awaited_by for thread");
+        return -1;
+    }
+
+    return 0;
+}
+
+/* ============================================================================
+ * THREAD PROCESSOR FUNCTIONS FOR ASYNCIO
+ * ============================================================================ */
+
+int
+process_thread_for_awaited_by(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t thread_state_addr,
+    unsigned long tid,
+    void *context
+) {
+    PyObject *result = (PyObject *)context;
+    uintptr_t head_addr = thread_state_addr + (uintptr_t)unwinder->async_debug_offsets.asyncio_thread_state.asyncio_tasks_head;
+    return append_awaited_by(unwinder, tid, head_addr, result);
+}
+
+static int
+process_running_task_chain(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t running_task_addr,
+    uintptr_t thread_state_addr,
+    PyObject *result
+) {
+    uintptr_t running_task_code_obj = 0;
+    if(get_task_code_object(unwinder, running_task_addr, &running_task_code_obj) < 0) {
+        return -1;
+    }
+
+    // First, add this task to the result
+    PyObject *task_info = NULL;
+    if (process_single_task_node(unwinder, running_task_addr, &task_info, result) < 0) {
+        return -1;
+    }
+
+    // Get the chain from the current frame to this task
+    PyObject *coro_chain = PyStructSequence_GET_ITEM(task_info, 2);
+    assert(coro_chain != NULL);
+    if (PyList_GET_SIZE(coro_chain) != 1) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Coro chain is not a single item");
+        return -1;
+    }
+    PyObject *coro_info = PyList_GET_ITEM(coro_chain, 0);
+    assert(coro_info != NULL);
+    PyObject *frame_chain = PyStructSequence_GET_ITEM(coro_info, 0);
+    assert(frame_chain != NULL);
+
+    // Clear the coro_chain
+    if (PyList_Clear(frame_chain) < 0) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to clear coroutine chain");
+        return -1;
+    }
+
+    // Add the chain from the current frame to this task
+    if (parse_async_frame_chain(unwinder, frame_chain, thread_state_addr, running_task_code_obj) < 0) {
+        return -1;
+    }
+
+    // Now find all tasks that are waiting for this task and process them
+    if (process_task_awaited_by(unwinder, running_task_addr, process_waiter_task, result) < 0) {
+        return -1;
+    }
+
+    return 0;
+}
+
+int
+process_thread_for_async_stack_trace(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t thread_state_addr,
+    unsigned long tid,
+    void *context
+) {
+    PyObject *result = (PyObject *)context;
+
+    // Find running task in this thread
+    uintptr_t running_task_addr;
+    if (find_running_task_in_thread(unwinder, thread_state_addr, &running_task_addr) < 0) {
+        return 0;
+    }
+
+    // If we found a running task, process it and its waiters
+    if ((void*)running_task_addr != NULL) {
+        PyObject *task_list = PyList_New(0);
+        if (task_list == NULL) {
+            set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create task list for thread");
+            return -1;
+        }
+
+        if (process_running_task_chain(unwinder, running_task_addr, thread_state_addr, task_list) < 0) {
+            Py_DECREF(task_list);
+            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process running task chain");
+            return -1;
+        }
+
+        // Create AwaitedInfo structure for this thread
+        PyObject *tid_py = PyLong_FromUnsignedLong(tid);
+        if (tid_py == NULL) {
+            Py_DECREF(task_list);
+            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create thread ID");
+            return -1;
+        }
+
+        RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder);
+        PyObject *awaited_info = PyStructSequence_New(state->AwaitedInfo_Type);
+        if (awaited_info == NULL) {
+            Py_DECREF(tid_py);
+            Py_DECREF(task_list);
+            set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create AwaitedInfo");
+            return -1;
+        }
+
+        PyStructSequence_SetItem(awaited_info, 0, tid_py);  // steals ref
+        PyStructSequence_SetItem(awaited_info, 1, task_list);  // steals ref
+
+        if (PyList_Append(result, awaited_info)) {
+            Py_DECREF(awaited_info);
+            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append AwaitedInfo to result");
+            return -1;
+        }
+        Py_DECREF(awaited_info);
+    }
+
+    return 0;
+}
diff --git a/Modules/_remote_debugging/clinic/module.c.h b/Modules/_remote_debugging/clinic/module.c.h
new file mode 100644
index 00000000000..03127b753cc
--- /dev/null
+++ b/Modules/_remote_debugging/clinic/module.c.h
@@ -0,0 +1,422 @@
+/*[clinic input]
+preserve
+[clinic start generated code]*/
+
+#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+#  include "pycore_gc.h"          // PyGC_Head
+#  include "pycore_runtime.h"     // _Py_ID()
+#endif
+#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION()
+#include "pycore_modsupport.h"    // _PyArg_UnpackKeywords()
+
+PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__,
+"RemoteUnwinder(pid, *, all_threads=False, only_active_thread=False,\n"
+"               mode=0, debug=False, skip_non_matching_threads=True,\n"
+"               native=False, gc=False, cache_frames=False, stats=False)\n"
+"--\n"
+"\n"
+"Initialize a new RemoteUnwinder object for debugging a remote Python process.\n"
+"\n"
+"Args:\n"
+"    pid: Process ID of the target Python process to debug\n"
+"    all_threads: If True, initialize state for all threads in the process.\n"
+"                If False, only initialize for the main thread.\n"
+"    only_active_thread: If True, only sample the thread holding the GIL.\n"
+"    mode: Profiling mode: 0=WALL (wall-time), 1=CPU (cpu-time), 2=GIL (gil-time).\n"
+"                       Cannot be used together with all_threads=True.\n"
+"    debug: If True, chain exceptions to explain the sequence of events that\n"
+"           lead to the exception.\n"
+"    skip_non_matching_threads: If True, skip threads that don\'t match the selected mode.\n"
+"                              If False, include all threads regardless of mode.\n"
+"    native: If True, include artificial \"<native>\" frames to denote calls to\n"
+"            non-Python code.\n"
+"    gc: If True, include artificial \"<GC>\" frames to denote active garbage\n"
+"        collection.\n"
+"    cache_frames: If True, enable frame caching optimization to avoid re-reading\n"
+"                 unchanged parent frames between samples.\n"
+"    stats: If True, collect statistics about cache hits, memory reads, etc.\n"
+"           Use get_stats() to retrieve the collected statistics.\n"
+"\n"
+"The RemoteUnwinder provides functionality to inspect and debug a running Python\n"
+"process, including examining thread states, stack frames and other runtime data.\n"
+"\n"
+"Raises:\n"
+"    PermissionError: If access to the target process is denied\n"
+"    OSError: If unable to attach to the target process or access its memory\n"
+"    RuntimeError: If unable to read debug information from the target process\n"
+"    ValueError: If both all_threads and only_active_thread are True");
+
+static int
+_remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
+                                               int pid, int all_threads,
+                                               int only_active_thread,
+                                               int mode, int debug,
+                                               int skip_non_matching_threads,
+                                               int native, int gc,
+                                               int cache_frames, int stats);
+
+static int
+_remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObject *kwargs)
+{
+    int return_value = -1;
+    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+    #define NUM_KEYWORDS 10
+    static struct {
+        PyGC_Head _this_is_not_used;
+        PyObject_VAR_HEAD
+        Py_hash_t ob_hash;
+        PyObject *ob_item[NUM_KEYWORDS];
+    } _kwtuple = {
+        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+        .ob_hash = -1,
+        .ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), &_Py_ID(only_active_thread), &_Py_ID(mode), &_Py_ID(debug), &_Py_ID(skip_non_matching_threads), &_Py_ID(native), &_Py_ID(gc), &_Py_ID(cache_frames), &_Py_ID(stats), },
+    };
+    #undef NUM_KEYWORDS
+    #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+    #else  // !Py_BUILD_CORE
+    #  define KWTUPLE NULL
+    #endif  // !Py_BUILD_CORE
+
+    static const char * const _keywords[] = {"pid", "all_threads", "only_active_thread", "mode", "debug", "skip_non_matching_threads", "native", "gc", "cache_frames", "stats", NULL};
+    static _PyArg_Parser _parser = {
+        .keywords = _keywords,
+        .fname = "RemoteUnwinder",
+        .kwtuple = KWTUPLE,
+    };
+    #undef KWTUPLE
+    PyObject *argsbuf[10];
+    PyObject * const *fastargs;
+    Py_ssize_t nargs = PyTuple_GET_SIZE(args);
+    Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1;
+    int pid;
+    int all_threads = 0;
+    int only_active_thread = 0;
+    int mode = 0;
+    int debug = 0;
+    int skip_non_matching_threads = 1;
+    int native = 0;
+    int gc = 0;
+    int cache_frames = 0;
+    int stats = 0;
+
+    fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser,
+            /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
+    if (!fastargs) {
+        goto exit;
+    }
+    pid = PyLong_AsInt(fastargs[0]);
+    if (pid == -1 && PyErr_Occurred()) {
+        goto exit;
+    }
+    if (!noptargs) {
+        goto skip_optional_kwonly;
+    }
+    if (fastargs[1]) {
+        all_threads = PyObject_IsTrue(fastargs[1]);
+        if (all_threads < 0) {
+            goto exit;
+        }
+        if (!--noptargs) {
+            goto skip_optional_kwonly;
+        }
+    }
+    if (fastargs[2]) {
+        only_active_thread = PyObject_IsTrue(fastargs[2]);
+        if (only_active_thread < 0) {
+            goto exit;
+        }
+        if (!--noptargs) {
+            goto skip_optional_kwonly;
+        }
+    }
+    if (fastargs[3]) {
+        mode = PyLong_AsInt(fastargs[3]);
+        if (mode == -1 && PyErr_Occurred()) {
+            goto exit;
+        }
+        if (!--noptargs) {
+            goto skip_optional_kwonly;
+        }
+    }
+    if (fastargs[4]) {
+        debug = PyObject_IsTrue(fastargs[4]);
+        if (debug < 0) {
+            goto exit;
+        }
+        if (!--noptargs) {
+            goto skip_optional_kwonly;
+        }
+    }
+    if (fastargs[5]) {
+        skip_non_matching_threads = PyObject_IsTrue(fastargs[5]);
+        if (skip_non_matching_threads < 0) {
+            goto exit;
+        }
+        if (!--noptargs) {
+            goto skip_optional_kwonly;
+        }
+    }
+    if (fastargs[6]) {
+        native = PyObject_IsTrue(fastargs[6]);
+        if (native < 0) {
+            goto exit;
+        }
+        if (!--noptargs) {
+            goto skip_optional_kwonly;
+        }
+    }
+    if (fastargs[7]) {
+        gc = PyObject_IsTrue(fastargs[7]);
+        if (gc < 0) {
+            goto exit;
+        }
+        if (!--noptargs) {
+            goto skip_optional_kwonly;
+        }
+    }
+    if (fastargs[8]) {
+        cache_frames = PyObject_IsTrue(fastargs[8]);
+        if (cache_frames < 0) {
+            goto exit;
+        }
+        if (!--noptargs) {
+            goto skip_optional_kwonly;
+        }
+    }
+    stats = PyObject_IsTrue(fastargs[9]);
+    if (stats < 0) {
+        goto exit;
+    }
+skip_optional_kwonly:
+    return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads, only_active_thread, mode, debug, skip_non_matching_threads, native, gc, cache_frames, stats);
+
+exit:
+    return return_value;
+}
+
+PyDoc_STRVAR(_remote_debugging_RemoteUnwinder_get_stack_trace__doc__,
+"get_stack_trace($self, /)\n"
+"--\n"
+"\n"
+"Returns stack traces for all interpreters and threads in process.\n"
+"\n"
+"Each element in the returned list is a tuple of (interpreter_id, thread_list), where:\n"
+"- interpreter_id is the interpreter identifier\n"
+"- thread_list is a list of tuples (thread_id, frame_list) for threads in that interpreter\n"
+"  - thread_id is the OS thread identifier\n"
+"  - frame_list is a list of tuples (function_name, filename, line_number) representing\n"
+"    the Python stack frames for that thread, ordered from most recent to oldest\n"
+"\n"
+"The threads returned depend on the initialization parameters:\n"
+"- If only_active_thread was True: returns only the thread holding the GIL across all interpreters\n"
+"- If all_threads was True: returns all threads across all interpreters\n"
+"- Otherwise: returns only the main thread of each interpreter\n"
+"\n"
+"Example:\n"
+"    [\n"
+"        (0, [  # Main interpreter\n"
+"            (1234, [\n"
+"                (\'process_data\', \'worker.py\', 127),\n"
+"                (\'run_worker\', \'worker.py\', 45),\n"
+"                (\'main\', \'app.py\', 23)\n"
+"            ]),\n"
+"            (1235, [\n"
+"                (\'handle_request\', \'server.py\', 89),\n"
+"                (\'serve_forever\', \'server.py\', 52)\n"
+"            ])\n"
+"        ]),\n"
+"        (1, [  # Sub-interpreter\n"
+"            (1236, [\n"
+"                (\'sub_worker\', \'sub.py\', 15)\n"
+"            ])\n"
+"        ])\n"
+"    ]\n"
+"\n"
+"Raises:\n"
+"    RuntimeError: If there is an error copying memory from the target process\n"
+"    OSError: If there is an error accessing the target process\n"
+"    PermissionError: If access to the target process is denied\n"
+"    UnicodeDecodeError: If there is an error decoding strings from the target process");
+
+#define _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_STACK_TRACE_METHODDEF    \
+    {"get_stack_trace", (PyCFunction)_remote_debugging_RemoteUnwinder_get_stack_trace, METH_NOARGS, _remote_debugging_RemoteUnwinder_get_stack_trace__doc__},
+
+static PyObject *
+_remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self);
+
+static PyObject *
+_remote_debugging_RemoteUnwinder_get_stack_trace(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+    PyObject *return_value = NULL;
+
+    Py_BEGIN_CRITICAL_SECTION(self);
+    return_value = _remote_debugging_RemoteUnwinder_get_stack_trace_impl((RemoteUnwinderObject *)self);
+    Py_END_CRITICAL_SECTION();
+
+    return return_value;
+}
+
+PyDoc_STRVAR(_remote_debugging_RemoteUnwinder_get_all_awaited_by__doc__,
+"get_all_awaited_by($self, /)\n"
+"--\n"
+"\n"
+"Get all tasks and their awaited_by relationships from the remote process.\n"
+"\n"
+"This provides a tree structure showing which tasks are waiting for other tasks.\n"
+"\n"
+"For each task, returns:\n"
+"1. The call stack frames leading to where the task is currently executing\n"
+"2. The name of the task\n"
+"3. A list of tasks that this task is waiting for, with their own frames/names/etc\n"
+"\n"
+"Returns a list of [frames, task_name, subtasks] where:\n"
+"- frames: List of (func_name, filename, lineno) showing the call stack\n"
+"- task_name: String identifier for the task\n"
+"- subtasks: List of tasks being awaited by this task, in same format\n"
+"\n"
+"Raises:\n"
+"    RuntimeError: If AsyncioDebug section is not available in the remote process\n"
+"    MemoryError: If memory allocation fails\n"
+"    OSError: If reading from the remote process fails\n"
+"\n"
+"Example output:\n"
+"[\n"
+"    [\n"
+"        [(\"c5\", \"script.py\", 10), (\"c4\", \"script.py\", 14)],\n"
+"        \"c2_root\",\n"
+"        [\n"
+"            [\n"
+"                [(\"c1\", \"script.py\", 23)],\n"
+"                \"sub_main_2\",\n"
+"                [...]\n"
+"            ],\n"
+"            [...]\n"
+"        ]\n"
+"    ]\n"
+"]");
+
+#define _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_ALL_AWAITED_BY_METHODDEF    \
+    {"get_all_awaited_by", (PyCFunction)_remote_debugging_RemoteUnwinder_get_all_awaited_by, METH_NOARGS, _remote_debugging_RemoteUnwinder_get_all_awaited_by__doc__},
+
+static PyObject *
+_remote_debugging_RemoteUnwinder_get_all_awaited_by_impl(RemoteUnwinderObject *self);
+
+static PyObject *
+_remote_debugging_RemoteUnwinder_get_all_awaited_by(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+    PyObject *return_value = NULL;
+
+    Py_BEGIN_CRITICAL_SECTION(self);
+    return_value = _remote_debugging_RemoteUnwinder_get_all_awaited_by_impl((RemoteUnwinderObject *)self);
+    Py_END_CRITICAL_SECTION();
+
+    return return_value;
+}
+
+PyDoc_STRVAR(_remote_debugging_RemoteUnwinder_get_async_stack_trace__doc__,
+"get_async_stack_trace($self, /)\n"
+"--\n"
+"\n"
+"Get the currently running async tasks and their dependency graphs from the remote process.\n"
+"\n"
+"This returns information about running tasks and all tasks that are waiting for them,\n"
+"forming a complete dependency graph for each thread\'s active task.\n"
+"\n"
+"For each thread with a running task, returns the running task plus all tasks that\n"
+"transitively depend on it (tasks waiting for the running task, tasks waiting for\n"
+"those tasks, etc.).\n"
+"\n"
+"Returns a list of per-thread results, where each thread result contains:\n"
+"- Thread ID\n"
+"- List of task information for the running task and all its waiters\n"
+"\n"
+"Each task info contains:\n"
+"- Task ID (memory address)\n"
+"- Task name\n"
+"- Call stack frames: List of (func_name, filename, lineno)\n"
+"- List of tasks waiting for this task (recursive structure)\n"
+"\n"
+"Raises:\n"
+"    RuntimeError: If AsyncioDebug section is not available in the target process\n"
+"    MemoryError: If memory allocation fails\n"
+"    OSError: If reading from the remote process fails\n"
+"\n"
+"Example output (similar structure to get_all_awaited_by but only for running tasks):\n"
+"[\n"
+"    (140234, [\n"
+"        (4345585712, \'main_task\',\n"
+"         [(\"run_server\", \"server.py\", 127), (\"main\", \"app.py\", 23)],\n"
+"         [\n"
+"             (4345585800, \'worker_1\', [...], [...]),\n"
+"             (4345585900, \'worker_2\', [...], [...])\n"
+"         ])\n"
+"    ])\n"
+"]");
+
+#define _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_ASYNC_STACK_TRACE_METHODDEF    \
+    {"get_async_stack_trace", (PyCFunction)_remote_debugging_RemoteUnwinder_get_async_stack_trace, METH_NOARGS, _remote_debugging_RemoteUnwinder_get_async_stack_trace__doc__},
+
+static PyObject *
+_remote_debugging_RemoteUnwinder_get_async_stack_trace_impl(RemoteUnwinderObject *self);
+
+static PyObject *
+_remote_debugging_RemoteUnwinder_get_async_stack_trace(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+    PyObject *return_value = NULL;
+
+    Py_BEGIN_CRITICAL_SECTION(self);
+    return_value = _remote_debugging_RemoteUnwinder_get_async_stack_trace_impl((RemoteUnwinderObject *)self);
+    Py_END_CRITICAL_SECTION();
+
+    return return_value;
+}
+
+PyDoc_STRVAR(_remote_debugging_RemoteUnwinder_get_stats__doc__,
+"get_stats($self, /)\n"
+"--\n"
+"\n"
+"Get collected statistics about profiling performance.\n"
+"\n"
+"Returns a dictionary containing statistics about cache performance,\n"
+"memory reads, and other profiling metrics. Only available if the\n"
+"RemoteUnwinder was created with stats=True.\n"
+"\n"
+"Returns:\n"
+"    dict: A dictionary containing:\n"
+"        - total_samples: Total number of get_stack_trace calls\n"
+"        - frame_cache_hits: Full cache hits (entire stack unchanged)\n"
+"        - frame_cache_misses: Cache misses requiring full walk\n"
+"        - frame_cache_partial_hits: Partial hits (stopped at cached frame)\n"
+"        - frames_read_from_cache: Total frames retrieved from cache\n"
+"        - frames_read_from_memory: Total frames read from remote memory\n"
+"        - memory_reads: Total remote memory read operations\n"
+"        - memory_bytes_read: Total bytes read from remote memory\n"
+"        - code_object_cache_hits: Code object cache hits\n"
+"        - code_object_cache_misses: Code object cache misses\n"
+"        - stale_cache_invalidations: Times stale cache entries were cleared\n"
+"        - frame_cache_hit_rate: Percentage of samples that hit the cache\n"
+"        - code_object_cache_hit_rate: Percentage of code object lookups that hit cache\n"
+"\n"
+"Raises:\n"
+"    RuntimeError: If stats collection was not enabled (stats=False)");
+
+#define _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_STATS_METHODDEF    \
+    {"get_stats", (PyCFunction)_remote_debugging_RemoteUnwinder_get_stats, METH_NOARGS, _remote_debugging_RemoteUnwinder_get_stats__doc__},
+
+static PyObject *
+_remote_debugging_RemoteUnwinder_get_stats_impl(RemoteUnwinderObject *self);
+
+static PyObject *
+_remote_debugging_RemoteUnwinder_get_stats(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+    PyObject *return_value = NULL;
+
+    Py_BEGIN_CRITICAL_SECTION(self);
+    return_value = _remote_debugging_RemoteUnwinder_get_stats_impl((RemoteUnwinderObject *)self);
+    Py_END_CRITICAL_SECTION();
+
+    return return_value;
+}
+/*[clinic end generated code: output=f1fd6c1d4c4c7254 input=a9049054013a1b77]*/
diff --git a/Modules/_remote_debugging/code_objects.c b/Modules/_remote_debugging/code_objects.c
new file mode 100644
index 00000000000..2cd2505d0f9
--- /dev/null
+++ b/Modules/_remote_debugging/code_objects.c
@@ -0,0 +1,395 @@
+/******************************************************************************
+ * Remote Debugging Module - Code Object Functions
+ *
+ * This file contains functions for parsing code objects and line tables
+ * from remote process memory.
+ ******************************************************************************/
+
+#include "_remote_debugging.h"
+
+/* ============================================================================
+ * TLBC CACHING FUNCTIONS (Py_GIL_DISABLED only)
+ * ============================================================================ */
+
+#ifdef Py_GIL_DISABLED
+
+void
+tlbc_cache_entry_destroy(void *ptr)
+{
+    TLBCCacheEntry *entry = (TLBCCacheEntry *)ptr;
+    if (entry->tlbc_array) {
+        PyMem_RawFree(entry->tlbc_array);
+    }
+    PyMem_RawFree(entry);
+}
+
+TLBCCacheEntry *
+get_tlbc_cache_entry(RemoteUnwinderObject *self, uintptr_t code_addr, uint32_t current_generation)
+{
+    void *key = (void *)code_addr;
+    TLBCCacheEntry *entry = _Py_hashtable_get(self->tlbc_cache, key);
+
+    if (entry && entry->generation != current_generation) {
+        // Entry is stale, remove it by setting to NULL
+        _Py_hashtable_set(self->tlbc_cache, key, NULL);
+        entry = NULL;
+    }
+
+    return entry;
+}
+
+int
+cache_tlbc_array(RemoteUnwinderObject *unwinder, uintptr_t code_addr, uintptr_t tlbc_array_addr, uint32_t generation)
+{
+    uintptr_t tlbc_array_ptr;
+    void *tlbc_array = NULL;
+    TLBCCacheEntry *entry = NULL;
+
+    // Read the TLBC array pointer
+    if (read_ptr(unwinder, tlbc_array_addr, &tlbc_array_ptr) != 0) {
+        PyErr_SetString(PyExc_RuntimeError, "Failed to read TLBC array pointer");
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read TLBC array pointer");
+        return 0; // Read error
+    }
+
+    // Validate TLBC array pointer
+    if (tlbc_array_ptr == 0) {
+        PyErr_SetString(PyExc_RuntimeError, "TLBC array pointer is NULL");
+        return 0; // No TLBC array
+    }
+
+    // Read the TLBC array size
+    Py_ssize_t tlbc_size;
+    if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, tlbc_array_ptr, sizeof(tlbc_size), &tlbc_size) != 0) {
+        PyErr_SetString(PyExc_RuntimeError, "Failed to read TLBC array size");
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read TLBC array size");
+        return 0; // Read error
+    }
+
+    // Validate TLBC array size
+    if (tlbc_size <= 0) {
+        PyErr_SetString(PyExc_RuntimeError, "Invalid TLBC array size");
+        return 0; // Invalid size
+    }
+
+    if (tlbc_size > MAX_TLBC_SIZE) {
+        PyErr_SetString(PyExc_RuntimeError, "TLBC array size exceeds maximum limit");
+        return 0; // Invalid size
+    }
+
+    // Allocate and read the entire TLBC array
+    size_t array_data_size = tlbc_size * sizeof(void*);
+    tlbc_array = PyMem_RawMalloc(sizeof(Py_ssize_t) + array_data_size);
+    if (!tlbc_array) {
+        PyErr_NoMemory();
+        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate TLBC array");
+        return 0; // Memory error
+    }
+
+    if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, tlbc_array_ptr, sizeof(Py_ssize_t) + array_data_size, tlbc_array) != 0) {
+        PyMem_RawFree(tlbc_array);
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read TLBC array data");
+        return 0; // Read error
+    }
+
+    // Create cache entry
+    entry = PyMem_RawMalloc(sizeof(TLBCCacheEntry));
+    if (!entry) {
+        PyErr_NoMemory();
+        PyMem_RawFree(tlbc_array);
+        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate TLBC cache entry");
+        return 0; // Memory error
+    }
+
+    entry->tlbc_array = tlbc_array;
+    entry->tlbc_array_size = tlbc_size;
+    entry->generation = generation;
+
+    // Store in cache
+    void *key = (void *)code_addr;
+    if (_Py_hashtable_set(unwinder->tlbc_cache, key, entry) < 0) {
+        tlbc_cache_entry_destroy(entry);
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to store TLBC entry in cache");
+        return 0; // Cache error
+    }
+
+    return 1; // Success
+}
+
+#endif
+
+/* ============================================================================
+ * LINE TABLE PARSING FUNCTIONS
+ * ============================================================================ */
+
+static int
+scan_varint(const uint8_t **ptr)
+{
+    unsigned int read = **ptr;
+    *ptr = *ptr + 1;
+    unsigned int val = read & 63;
+    unsigned int shift = 0;
+    while (read & 64) {
+        read = **ptr;
+        *ptr = *ptr + 1;
+        shift += 6;
+        val |= (read & 63) << shift;
+    }
+    return val;
+}
+
+static int
+scan_signed_varint(const uint8_t **ptr)
+{
+    unsigned int uval = scan_varint(ptr);
+    if (uval & 1) {
+        return -(int)(uval >> 1);
+    }
+    else {
+        return uval >> 1;
+    }
+}
+
+bool
+parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, LocationInfo* info)
+{
+    const uint8_t* ptr = (const uint8_t*)(linetable);
+    uintptr_t addr = 0;
+    info->lineno = firstlineno;
+
+    while (*ptr != '\0') {
+        // See InternalDocs/code_objects.md for where these magic numbers are from
+        // and for the decoding algorithm.
+        uint8_t first_byte = *(ptr++);
+        uint8_t code = (first_byte >> 3) & 15;
+        size_t length = (first_byte & 7) + 1;
+        uintptr_t end_addr = addr + length;
+        switch (code) {
+            case PY_CODE_LOCATION_INFO_NONE: {
+                break;
+            }
+            case PY_CODE_LOCATION_INFO_LONG: {
+                int line_delta = scan_signed_varint(&ptr);
+                info->lineno += line_delta;
+                info->end_lineno = info->lineno + scan_varint(&ptr);
+                info->column = scan_varint(&ptr) - 1;
+                info->end_column = scan_varint(&ptr) - 1;
+                break;
+            }
+            case PY_CODE_LOCATION_INFO_NO_COLUMNS: {
+                int line_delta = scan_signed_varint(&ptr);
+                info->lineno += line_delta;
+                info->column = info->end_column = -1;
+                break;
+            }
+            case PY_CODE_LOCATION_INFO_ONE_LINE0:
+            case PY_CODE_LOCATION_INFO_ONE_LINE1:
+            case PY_CODE_LOCATION_INFO_ONE_LINE2: {
+                int line_delta = code - 10;
+                info->lineno += line_delta;
+                info->end_lineno = info->lineno;
+                info->column = *(ptr++);
+                info->end_column = *(ptr++);
+                break;
+            }
+            default: {
+                uint8_t second_byte = *(ptr++);
+                if ((second_byte & 128) != 0) {
+                    return false;
+                }
+                info->column = code << 3 | (second_byte >> 4);
+                info->end_column = info->column + (second_byte & 15);
+                break;
+            }
+        }
+        if (addr <= addrq && end_addr > addrq) {
+            return true;
+        }
+        addr = end_addr;
+    }
+    return false;
+}
+
+/* ============================================================================
+ * CODE OBJECT AND FRAME INFO FUNCTIONS
+ * ============================================================================ */
+
+PyObject *
+make_frame_info(RemoteUnwinderObject *unwinder, PyObject *file, PyObject *line,
+                PyObject *func)
+{
+    RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder);
+    PyObject *info = PyStructSequence_New(state->FrameInfo_Type);
+    if (info == NULL) {
+        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create FrameInfo");
+        return NULL;
+    }
+    Py_INCREF(file);
+    Py_INCREF(line);
+    Py_INCREF(func);
+    PyStructSequence_SetItem(info, 0, file);
+    PyStructSequence_SetItem(info, 1, line);
+    PyStructSequence_SetItem(info, 2, func);
+    return info;
+}
+
+int
+parse_code_object(RemoteUnwinderObject *unwinder,
+                  PyObject **result,
+                  uintptr_t address,
+                  uintptr_t instruction_pointer,
+                  uintptr_t *previous_frame,
+                  int32_t tlbc_index)
+{
+    void *key = (void *)address;
+    CachedCodeMetadata *meta = NULL;
+    PyObject *func = NULL;
+    PyObject *file = NULL;
+    PyObject *linetable = NULL;
+
+#ifdef Py_GIL_DISABLED
+    // In free threading builds, code object addresses might have the low bit set
+    // as a flag, so we need to mask it off to get the real address
+    uintptr_t real_address = address & (~1);
+#else
+    uintptr_t real_address = address;
+#endif
+
+    if (unwinder && unwinder->code_object_cache != NULL) {
+        meta = _Py_hashtable_get(unwinder->code_object_cache, key);
+        if (meta) {
+            STATS_INC(unwinder, code_object_cache_hits);
+        } else {
+            STATS_INC(unwinder, code_object_cache_misses);
+        }
+    }
+
+    if (meta == NULL) {
+        char code_object[SIZEOF_CODE_OBJ];
+        if (_Py_RemoteDebug_PagedReadRemoteMemory(
+                &unwinder->handle, real_address, SIZEOF_CODE_OBJ, code_object) < 0)
+        {
+            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read code object");
+            goto error;
+        }
+
+        func = read_py_str(unwinder,
+            GET_MEMBER(uintptr_t, code_object, unwinder->debug_offsets.code_object.qualname), 1024);
+        if (!func) {
+            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read function name from code object");
+            goto error;
+        }
+
+        file = read_py_str(unwinder,
+            GET_MEMBER(uintptr_t, code_object, unwinder->debug_offsets.code_object.filename), 1024);
+        if (!file) {
+            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read filename from code object");
+            goto error;
+        }
+
+        linetable = read_py_bytes(unwinder,
+            GET_MEMBER(uintptr_t, code_object, unwinder->debug_offsets.code_object.linetable), 4096);
+        if (!linetable) {
+            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read linetable from code object");
+            goto error;
+        }
+
+        meta = PyMem_RawMalloc(sizeof(CachedCodeMetadata));
+        if (!meta) {
+            PyErr_NoMemory();
+            set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate cached code metadata");
+            goto error;
+        }
+
+        meta->func_name = func;
+        meta->file_name = file;
+        meta->linetable = linetable;
+        meta->first_lineno = GET_MEMBER(int, code_object, unwinder->debug_offsets.code_object.firstlineno);
+        meta->addr_code_adaptive = real_address + (uintptr_t)unwinder->debug_offsets.code_object.co_code_adaptive;
+
+        if (unwinder && unwinder->code_object_cache && _Py_hashtable_set(unwinder->code_object_cache, key, meta) < 0) {
+            cached_code_metadata_destroy(meta);
+            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to cache code metadata");
+            goto error;
+        }
+
+        // Ownership transferred to meta
+        func = NULL;
+        file = NULL;
+        linetable = NULL;
+    }
+
+    uintptr_t ip = instruction_pointer;
+    ptrdiff_t addrq;
+
+#ifdef Py_GIL_DISABLED
+    // Handle thread-local bytecode (TLBC) in free threading builds
+    if (tlbc_index == 0 || unwinder->debug_offsets.code_object.co_tlbc == 0 || unwinder == NULL) {
+        // No TLBC or no unwinder - use main bytecode directly
+        addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive;
+        goto done_tlbc;
+    }
+
+    // Try to get TLBC data from cache (we'll get generation from the caller)
+    TLBCCacheEntry *tlbc_entry = get_tlbc_cache_entry(unwinder, real_address, unwinder->tlbc_generation);
+
+    if (!tlbc_entry) {
+        // Cache miss - try to read and cache TLBC array
+        if (!cache_tlbc_array(unwinder, real_address, real_address + unwinder->debug_offsets.code_object.co_tlbc, unwinder->tlbc_generation)) {
+            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to cache TLBC array");
+            goto error;
+        }
+        tlbc_entry = get_tlbc_cache_entry(unwinder, real_address, unwinder->tlbc_generation);
+    }
+
+    if (tlbc_entry && tlbc_index < tlbc_entry->tlbc_array_size) {
+        // Use cached TLBC data
+        uintptr_t *entries = (uintptr_t *)((char *)tlbc_entry->tlbc_array + sizeof(Py_ssize_t));
+        uintptr_t tlbc_bytecode_addr = entries[tlbc_index];
+
+        if (tlbc_bytecode_addr != 0) {
+            // Calculate offset from TLBC bytecode
+            addrq = (uint16_t *)ip - (uint16_t *)tlbc_bytecode_addr;
+            goto done_tlbc;
+        }
+    }
+
+    // Fall back to main bytecode
+    addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive;
+
+done_tlbc:
+#else
+    // Non-free-threaded build, always use the main bytecode
+    (void)tlbc_index; // Suppress unused parameter warning
+    (void)unwinder;   // Suppress unused parameter warning
+    addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive;
+#endif
+    ;  // Empty statement to avoid C23 extension warning
+    LocationInfo info = {0};
+    bool ok = parse_linetable(addrq, PyBytes_AS_STRING(meta->linetable),
+                              meta->first_lineno, &info);
+    if (!ok) {
+        info.lineno = -1;
+    }
+
+    PyObject *lineno = PyLong_FromLong(info.lineno);
+    if (!lineno) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create line number object");
+        goto error;
+    }
+
+    PyObject *tuple = make_frame_info(unwinder, meta->file_name, lineno, meta->func_name);
+    Py_DECREF(lineno);
+    if (!tuple) {
+        goto error;
+    }
+
+    *result = tuple;
+    return 0;
+
+error:
+    Py_XDECREF(func);
+    Py_XDECREF(file);
+    Py_XDECREF(linetable);
+    return -1;
+}
diff --git a/Modules/_remote_debugging/frame_cache.c b/Modules/_remote_debugging/frame_cache.c
new file mode 100644
index 00000000000..4598b9dc353
--- /dev/null
+++ b/Modules/_remote_debugging/frame_cache.c
@@ -0,0 +1,236 @@
+/******************************************************************************
+ * Remote Debugging Module - Frame Cache
+ *
+ * This file contains functions for caching frame information to optimize
+ * repeated stack unwinding for profiling.
+ ******************************************************************************/
+
+#include "_remote_debugging.h"
+
+/* ============================================================================
+ * FRAME CACHE - stores (address, frame_info) pairs per thread
+ * Uses preallocated fixed-size arrays for efficiency and bounded memory.
+ * ============================================================================ */
+
+int
+frame_cache_init(RemoteUnwinderObject *unwinder)
+{
+    unwinder->frame_cache = PyMem_Calloc(FRAME_CACHE_MAX_THREADS, sizeof(FrameCacheEntry));
+    if (!unwinder->frame_cache) {
+        PyErr_NoMemory();
+        return -1;
+    }
+    return 0;
+}
+
+void
+frame_cache_cleanup(RemoteUnwinderObject *unwinder)
+{
+    if (!unwinder->frame_cache) {
+        return;
+    }
+    for (int i = 0; i < FRAME_CACHE_MAX_THREADS; i++) {
+        Py_CLEAR(unwinder->frame_cache[i].frame_list);
+    }
+    PyMem_Free(unwinder->frame_cache);
+    unwinder->frame_cache = NULL;
+}
+
+// Find cache entry by thread_id
+FrameCacheEntry *
+frame_cache_find(RemoteUnwinderObject *unwinder, uint64_t thread_id)
+{
+    if (!unwinder->frame_cache || thread_id == 0) {
+        return NULL;
+    }
+    for (int i = 0; i < FRAME_CACHE_MAX_THREADS; i++) {
+        if (unwinder->frame_cache[i].thread_id == thread_id) {
+            return &unwinder->frame_cache[i];
+        }
+    }
+    return NULL;
+}
+
+// Allocate a cache slot for a thread
+// Returns NULL if cache is full (graceful degradation)
+static FrameCacheEntry *
+frame_cache_alloc_slot(RemoteUnwinderObject *unwinder, uint64_t thread_id)
+{
+    if (!unwinder->frame_cache || thread_id == 0) {
+        return NULL;
+    }
+    // First check if thread already has an entry
+    for (int i = 0; i < FRAME_CACHE_MAX_THREADS; i++) {
+        if (unwinder->frame_cache[i].thread_id == thread_id) {
+            return &unwinder->frame_cache[i];
+        }
+    }
+    // Find empty slot
+    for (int i = 0; i < FRAME_CACHE_MAX_THREADS; i++) {
+        if (unwinder->frame_cache[i].thread_id == 0) {
+            return &unwinder->frame_cache[i];
+        }
+    }
+    // Cache full - graceful degradation
+    return NULL;
+}
+
+// Remove cache entries for threads not seen in the result
+// result structure: list of InterpreterInfo, where InterpreterInfo[1] is threads list,
+// and ThreadInfo[0] is the thread_id
+void
+frame_cache_invalidate_stale(RemoteUnwinderObject *unwinder, PyObject *result)
+{
+    if (!unwinder->frame_cache || !result || !PyList_Check(result)) {
+        return;
+    }
+
+    // Build array of seen thread IDs from result
+    uint64_t seen_threads[FRAME_CACHE_MAX_THREADS];
+    int num_seen = 0;
+
+    Py_ssize_t num_interps = PyList_GET_SIZE(result);
+    for (Py_ssize_t i = 0; i < num_interps && num_seen < FRAME_CACHE_MAX_THREADS; i++) {
+        PyObject *interp_info = PyList_GET_ITEM(result, i);
+        PyObject *threads = PyStructSequence_GetItem(interp_info, 1);
+        if (!threads || !PyList_Check(threads)) {
+            continue;
+        }
+        Py_ssize_t num_threads = PyList_GET_SIZE(threads);
+        for (Py_ssize_t j = 0; j < num_threads && num_seen < FRAME_CACHE_MAX_THREADS; j++) {
+            PyObject *thread_info = PyList_GET_ITEM(threads, j);
+            PyObject *tid_obj = PyStructSequence_GetItem(thread_info, 0);
+            if (tid_obj) {
+                uint64_t tid = PyLong_AsUnsignedLongLong(tid_obj);
+                if (!PyErr_Occurred()) {
+                    seen_threads[num_seen++] = tid;
+                } else {
+                    PyErr_Clear();
+                }
+            }
+        }
+    }
+
+    // Invalidate entries not in seen list
+    for (int i = 0; i < FRAME_CACHE_MAX_THREADS; i++) {
+        if (unwinder->frame_cache[i].thread_id == 0) {
+            continue;
+        }
+        int found = 0;
+        for (int j = 0; j < num_seen; j++) {
+            if (unwinder->frame_cache[i].thread_id == seen_threads[j]) {
+                found = 1;
+                break;
+            }
+        }
+        if (!found) {
+            // Clear this entry
+            Py_CLEAR(unwinder->frame_cache[i].frame_list);
+            unwinder->frame_cache[i].thread_id = 0;
+            unwinder->frame_cache[i].num_addrs = 0;
+            STATS_INC(unwinder, stale_cache_invalidations);
+        }
+    }
+}
+
+// Find last_profiled_frame in cache and extend frame_info with cached continuation
+// If frame_addrs is provided (not NULL), also extends it with cached addresses
+int
+frame_cache_lookup_and_extend(
+    RemoteUnwinderObject *unwinder,
+    uint64_t thread_id,
+    uintptr_t last_profiled_frame,
+    PyObject *frame_info,
+    uintptr_t *frame_addrs,
+    Py_ssize_t *num_addrs,
+    Py_ssize_t max_addrs)
+{
+    if (!unwinder->frame_cache || last_profiled_frame == 0) {
+        return 0;
+    }
+
+    FrameCacheEntry *entry = frame_cache_find(unwinder, thread_id);
+    if (!entry || !entry->frame_list) {
+        return 0;
+    }
+
+    // Find the index where last_profiled_frame matches
+    Py_ssize_t start_idx = -1;
+    for (Py_ssize_t i = 0; i < entry->num_addrs; i++) {
+        if (entry->addrs[i] == last_profiled_frame) {
+            start_idx = i;
+            break;
+        }
+    }
+
+    if (start_idx < 0) {
+        return 0;  // Not found
+    }
+
+    Py_ssize_t num_frames = PyList_GET_SIZE(entry->frame_list);
+
+    // Extend frame_info with frames from start_idx onwards
+    PyObject *slice = PyList_GetSlice(entry->frame_list, start_idx, num_frames);
+    if (!slice) {
+        return -1;
+    }
+
+    Py_ssize_t cur_size = PyList_GET_SIZE(frame_info);
+    int result = PyList_SetSlice(frame_info, cur_size, cur_size, slice);
+    Py_DECREF(slice);
+
+    if (result < 0) {
+        return -1;
+    }
+
+    // Also extend frame_addrs with cached addresses if provided
+    if (frame_addrs) {
+        for (Py_ssize_t i = start_idx; i < entry->num_addrs && *num_addrs < max_addrs; i++) {
+            frame_addrs[(*num_addrs)++] = entry->addrs[i];
+        }
+    }
+
+    return 1;
+}
+
+// Store frame list with addresses in cache
+// Returns: 1 = stored successfully, 0 = not stored (graceful degradation), -1 = error
+int
+frame_cache_store(
+    RemoteUnwinderObject *unwinder,
+    uint64_t thread_id,
+    PyObject *frame_list,
+    const uintptr_t *addrs,
+    Py_ssize_t num_addrs)
+{
+    if (!unwinder->frame_cache || thread_id == 0) {
+        return 0;
+    }
+
+    // Clamp to max frames
+    if (num_addrs > FRAME_CACHE_MAX_FRAMES) {
+        num_addrs = FRAME_CACHE_MAX_FRAMES;
+    }
+
+    FrameCacheEntry *entry = frame_cache_alloc_slot(unwinder, thread_id);
+    if (!entry) {
+        // Cache full - graceful degradation
+        return 0;
+    }
+
+    // Clear old frame_list if replacing
+    Py_CLEAR(entry->frame_list);
+
+    // Store full frame list (don't truncate to num_addrs - frames beyond the
+    // address array limit are still valid and needed for full cache hits)
+    Py_ssize_t num_frames = PyList_GET_SIZE(frame_list);
+    entry->frame_list = PyList_GetSlice(frame_list, 0, num_frames);
+    if (!entry->frame_list) {
+        return -1;
+    }
+    entry->thread_id = thread_id;
+    memcpy(entry->addrs, addrs, num_addrs * sizeof(uintptr_t));
+    entry->num_addrs = num_addrs;
+
+    return 1;
+}
diff --git a/Modules/_remote_debugging/frames.c b/Modules/_remote_debugging/frames.c
new file mode 100644
index 00000000000..eaf3287c6fe
--- /dev/null
+++ b/Modules/_remote_debugging/frames.c
@@ -0,0 +1,598 @@
+/******************************************************************************
+ * Remote Debugging Module - Frame Functions
+ *
+ * This file contains functions for parsing interpreter frames and
+ * managing stack chunks from remote process memory.
+ ******************************************************************************/
+
+#include "_remote_debugging.h"
+
+/* ============================================================================
+ * STACK CHUNK MANAGEMENT FUNCTIONS
+ * ============================================================================ */
+
+void
+cleanup_stack_chunks(StackChunkList *chunks)
+{
+    for (size_t i = 0; i < chunks->count; ++i) {
+        PyMem_RawFree(chunks->chunks[i].local_copy);
+    }
+    PyMem_RawFree(chunks->chunks);
+}
+
+static int
+process_single_stack_chunk(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t chunk_addr,
+    StackChunkInfo *chunk_info
+) {
+    // Start with default size assumption
+    size_t current_size = _PY_DATA_STACK_CHUNK_SIZE;
+
+    char *this_chunk = PyMem_RawMalloc(current_size);
+    if (!this_chunk) {
+        PyErr_NoMemory();
+        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate stack chunk buffer");
+        return -1;
+    }
+
+    if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, chunk_addr, current_size, this_chunk) < 0) {
+        PyMem_RawFree(this_chunk);
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read stack chunk");
+        return -1;
+    }
+
+    // Check actual size and reread if necessary
+    size_t actual_size = GET_MEMBER(size_t, this_chunk, offsetof(_PyStackChunk, size));
+    if (actual_size != current_size) {
+        this_chunk = PyMem_RawRealloc(this_chunk, actual_size);
+        if (!this_chunk) {
+            PyErr_NoMemory();
+            set_exception_cause(unwinder, PyExc_MemoryError, "Failed to reallocate stack chunk buffer");
+            return -1;
+        }
+
+        if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, chunk_addr, actual_size, this_chunk) < 0) {
+            PyMem_RawFree(this_chunk);
+            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to reread stack chunk with correct size");
+            return -1;
+        }
+        current_size = actual_size;
+    }
+
+    chunk_info->remote_addr = chunk_addr;
+    chunk_info->size = current_size;
+    chunk_info->local_copy = this_chunk;
+    return 0;
+}
+
+int
+copy_stack_chunks(RemoteUnwinderObject *unwinder,
+                  uintptr_t tstate_addr,
+                  StackChunkList *out_chunks)
+{
+    uintptr_t chunk_addr;
+    StackChunkInfo *chunks = NULL;
+    size_t count = 0;
+    size_t max_chunks = 16;
+
+    if (read_ptr(unwinder, tstate_addr + (uintptr_t)unwinder->debug_offsets.thread_state.datastack_chunk, &chunk_addr)) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read initial stack chunk address");
+        return -1;
+    }
+
+    chunks = PyMem_RawMalloc(max_chunks * sizeof(StackChunkInfo));
+    if (!chunks) {
+        PyErr_NoMemory();
+        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate stack chunks array");
+        return -1;
+    }
+
+    while (chunk_addr != 0) {
+        // Grow array if needed
+        if (count >= max_chunks) {
+            max_chunks *= 2;
+            StackChunkInfo *new_chunks = PyMem_RawRealloc(chunks, max_chunks * sizeof(StackChunkInfo));
+            if (!new_chunks) {
+                PyErr_NoMemory();
+                set_exception_cause(unwinder, PyExc_MemoryError, "Failed to grow stack chunks array");
+                goto error;
+            }
+            chunks = new_chunks;
+        }
+
+        // Process this chunk
+        if (process_single_stack_chunk(unwinder, chunk_addr, &chunks[count]) < 0) {
+            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process stack chunk");
+            goto error;
+        }
+
+        // Get next chunk address and increment count
+        chunk_addr = GET_MEMBER(uintptr_t, chunks[count].local_copy, offsetof(_PyStackChunk, previous));
+        count++;
+    }
+
+    out_chunks->chunks = chunks;
+    out_chunks->count = count;
+    return 0;
+
+error:
+    for (size_t i = 0; i < count; ++i) {
+        PyMem_RawFree(chunks[i].local_copy);
+    }
+    PyMem_RawFree(chunks);
+    return -1;
+}
+
+void *
+find_frame_in_chunks(StackChunkList *chunks, uintptr_t remote_ptr)
+{
+    for (size_t i = 0; i < chunks->count; ++i) {
+        uintptr_t base = chunks->chunks[i].remote_addr + offsetof(_PyStackChunk, data);
+        size_t payload = chunks->chunks[i].size - offsetof(_PyStackChunk, data);
+
+        if (remote_ptr >= base && remote_ptr < base + payload) {
+            return (char *)chunks->chunks[i].local_copy + (remote_ptr - chunks->chunks[i].remote_addr);
+        }
+    }
+    return NULL;
+}
+
+/* ============================================================================
+ * FRAME PARSING FUNCTIONS
+ * ============================================================================ */
+
+int
+is_frame_valid(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t frame_addr,
+    uintptr_t code_object_addr
+) {
+    if ((void*)code_object_addr == NULL) {
+        return 0;
+    }
+
+    void* frame = (void*)frame_addr;
+
+    char owner = GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner);
+    if (owner == FRAME_OWNED_BY_INTERPRETER) {
+        return 0;  // C frame or sentinel base frame
+    }
+
+    if (owner != FRAME_OWNED_BY_GENERATOR && owner != FRAME_OWNED_BY_THREAD) {
+        PyErr_Format(PyExc_RuntimeError, "Unhandled frame owner %d.\n", owner);
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Unhandled frame owner type in async frame");
+        return -1;
+    }
+    return 1;
+}
+
+int
+parse_frame_object(
+    RemoteUnwinderObject *unwinder,
+    PyObject** result,
+    uintptr_t address,
+    uintptr_t* address_of_code_object,
+    uintptr_t* previous_frame
+) {
+    char frame[SIZEOF_INTERP_FRAME];
+    *address_of_code_object = 0;
+
+    Py_ssize_t bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory(
+        &unwinder->handle,
+        address,
+        SIZEOF_INTERP_FRAME,
+        frame
+    );
+    if (bytes_read < 0) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read interpreter frame");
+        return -1;
+    }
+    STATS_INC(unwinder, memory_reads);
+    STATS_ADD(unwinder, memory_bytes_read, SIZEOF_INTERP_FRAME);
+
+    *previous_frame = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.previous);
+    uintptr_t code_object = GET_MEMBER_NO_TAG(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable);
+    int frame_valid = is_frame_valid(unwinder, (uintptr_t)frame, code_object);
+    if (frame_valid != 1) {
+        return frame_valid;
+    }
+
+    uintptr_t instruction_pointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.instr_ptr);
+
+    // Get tlbc_index for free threading builds
+    int32_t tlbc_index = 0;
+#ifdef Py_GIL_DISABLED
+    if (unwinder->debug_offsets.interpreter_frame.tlbc_index != 0) {
+        tlbc_index = GET_MEMBER(int32_t, frame, unwinder->debug_offsets.interpreter_frame.tlbc_index);
+    }
+#endif
+
+    *address_of_code_object = code_object;
+    return parse_code_object(unwinder, result, code_object, instruction_pointer, previous_frame, tlbc_index);
+}
+
+int
+parse_frame_from_chunks(
+    RemoteUnwinderObject *unwinder,
+    PyObject **result,
+    uintptr_t address,
+    uintptr_t *previous_frame,
+    uintptr_t *stackpointer,
+    StackChunkList *chunks
+) {
+    void *frame_ptr = find_frame_in_chunks(chunks, address);
+    if (!frame_ptr) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Frame not found in stack chunks");
+        return -1;
+    }
+
+    char *frame = (char *)frame_ptr;
+    *previous_frame = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.previous);
+    *stackpointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.stackpointer);
+    uintptr_t code_object = GET_MEMBER_NO_TAG(uintptr_t, frame_ptr, unwinder->debug_offsets.interpreter_frame.executable);
+    int frame_valid = is_frame_valid(unwinder, (uintptr_t)frame, code_object);
+    if (frame_valid != 1) {
+        return frame_valid;
+    }
+
+    uintptr_t instruction_pointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.instr_ptr);
+
+    // Get tlbc_index for free threading builds
+    int32_t tlbc_index = 0;
+#ifdef Py_GIL_DISABLED
+    if (unwinder->debug_offsets.interpreter_frame.tlbc_index != 0) {
+        tlbc_index = GET_MEMBER(int32_t, frame, unwinder->debug_offsets.interpreter_frame.tlbc_index);
+    }
+#endif
+
+    return parse_code_object(unwinder, result, code_object, instruction_pointer, previous_frame, tlbc_index);
+}
+
+/* ============================================================================
+ * FRAME CHAIN PROCESSING
+ * ============================================================================ */
+
+int
+process_frame_chain(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t initial_frame_addr,
+    StackChunkList *chunks,
+    PyObject *frame_info,
+    uintptr_t base_frame_addr,
+    uintptr_t gc_frame,
+    uintptr_t last_profiled_frame,
+    int *stopped_at_cached_frame,
+    uintptr_t *frame_addrs,      // optional: C array to receive frame addresses
+    Py_ssize_t *num_addrs,       // in/out: current count / updated count
+    Py_ssize_t max_addrs)        // max capacity of frame_addrs array
+{
+    uintptr_t frame_addr = initial_frame_addr;
+    uintptr_t prev_frame_addr = 0;
+    uintptr_t last_frame_addr = 0;  // Track last frame visited for validation
+    const size_t MAX_FRAMES = 1024 + 512;
+    size_t frame_count = 0;
+
+    // Initialize output flag
+    if (stopped_at_cached_frame) {
+        *stopped_at_cached_frame = 0;
+    }
+
+    // Quick check: if current_frame == last_profiled_frame, entire stack is unchanged
+    if (last_profiled_frame != 0 && initial_frame_addr == last_profiled_frame) {
+        if (stopped_at_cached_frame) {
+            *stopped_at_cached_frame = 1;
+        }
+        return 0;
+    }
+
+    while ((void*)frame_addr != NULL) {
+        // Check if we've reached the cached frame - if so, stop here
+        if (last_profiled_frame != 0 && frame_addr == last_profiled_frame) {
+            if (stopped_at_cached_frame) {
+                *stopped_at_cached_frame = 1;
+            }
+            break;
+        }
+        PyObject *frame = NULL;
+        uintptr_t next_frame_addr = 0;
+        uintptr_t stackpointer = 0;
+        last_frame_addr = frame_addr;  // Remember this frame address
+
+        if (++frame_count > MAX_FRAMES) {
+            PyErr_SetString(PyExc_RuntimeError, "Too many stack frames (possible infinite loop)");
+            set_exception_cause(unwinder, PyExc_RuntimeError, "Frame chain iteration limit exceeded");
+            return -1;
+        }
+
+        if (parse_frame_from_chunks(unwinder, &frame, frame_addr, &next_frame_addr, &stackpointer, chunks) < 0) {
+            PyErr_Clear();
+            uintptr_t address_of_code_object = 0;
+            if (parse_frame_object(unwinder, &frame, frame_addr, &address_of_code_object ,&next_frame_addr) < 0) {
+                set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse frame object in chain");
+                return -1;
+            }
+        }
+        if (frame == NULL && PyList_GET_SIZE(frame_info) == 0) {
+            const char *e = "Failed to parse initial frame in chain";
+            PyErr_SetString(PyExc_RuntimeError, e);
+            return -1;
+        }
+        PyObject *extra_frame = NULL;
+        // This frame kicked off the current GC collection:
+        if (unwinder->gc && frame_addr == gc_frame) {
+            _Py_DECLARE_STR(gc, "<GC>");
+            extra_frame = &_Py_STR(gc);
+        }
+        // Otherwise, check for native frames to insert:
+        else if (unwinder->native &&
+                 // We've reached an interpreter trampoline frame:
+                 frame == NULL &&
+                 // Bottommost frame is always native, so skip that one:
+                 next_frame_addr &&
+                 // Only suppress native frames if GC tracking is enabled and the next frame will be a GC frame:
+                 !(unwinder->gc && next_frame_addr == gc_frame))
+        {
+            _Py_DECLARE_STR(native, "<native>");
+            extra_frame = &_Py_STR(native);
+        }
+        if (extra_frame) {
+            PyObject *extra_frame_info = make_frame_info(
+                unwinder, _Py_LATIN1_CHR('~'), _PyLong_GetZero(), extra_frame);
+            if (extra_frame_info == NULL) {
+                return -1;
+            }
+            if (PyList_Append(frame_info, extra_frame_info) < 0) {
+                Py_DECREF(extra_frame_info);
+                set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append extra frame");
+                return -1;
+            }
+            // Extra frames use 0 as address (they're synthetic)
+            if (frame_addrs && *num_addrs < max_addrs) {
+                frame_addrs[(*num_addrs)++] = 0;
+            }
+            Py_DECREF(extra_frame_info);
+        }
+        if (frame) {
+            if (prev_frame_addr && frame_addr != prev_frame_addr) {
+                const char *f = "Broken frame chain: expected frame at 0x%lx, got 0x%lx";
+                PyErr_Format(PyExc_RuntimeError, f, prev_frame_addr, frame_addr);
+                Py_DECREF(frame);
+                set_exception_cause(unwinder, PyExc_RuntimeError, "Frame chain consistency check failed");
+                return -1;
+            }
+
+            if (PyList_Append(frame_info, frame) < 0) {
+                Py_DECREF(frame);
+                set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append frame");
+                return -1;
+            }
+            // Track the address for this frame
+            if (frame_addrs && *num_addrs < max_addrs) {
+                frame_addrs[(*num_addrs)++] = frame_addr;
+            }
+            Py_DECREF(frame);
+        }
+
+        prev_frame_addr = next_frame_addr;
+        frame_addr = next_frame_addr;
+    }
+
+    // Validate we reached the base frame (sentinel at bottom of stack)
+    // Only validate if we walked the full chain (didn't stop at cached frame)
+    // and base_frame_addr is provided (non-zero)
+    int stopped_early = stopped_at_cached_frame && *stopped_at_cached_frame;
+    if (!stopped_early && base_frame_addr != 0 && last_frame_addr != base_frame_addr) {
+        PyErr_Format(PyExc_RuntimeError,
+            "Incomplete sample: did not reach base frame (expected 0x%lx, got 0x%lx)",
+            base_frame_addr, last_frame_addr);
+        return -1;
+    }
+
+    return 0;
+}
+
+// Clear last_profiled_frame for all threads in the target process.
+// This must be called at the start of profiling to avoid stale values
+// from previous profilers causing us to stop frame walking early.
+int
+clear_last_profiled_frames(RemoteUnwinderObject *unwinder)
+{
+    uintptr_t current_interp = unwinder->interpreter_addr;
+    uintptr_t zero = 0;
+
+    while (current_interp != 0) {
+        // Get first thread in this interpreter
+        uintptr_t tstate_addr;
+        if (_Py_RemoteDebug_PagedReadRemoteMemory(
+                &unwinder->handle,
+                current_interp + unwinder->debug_offsets.interpreter_state.threads_head,
+                sizeof(void*),
+                &tstate_addr) < 0) {
+            // Non-fatal: just skip clearing
+            PyErr_Clear();
+            return 0;
+        }
+
+        // Iterate all threads in this interpreter
+        while (tstate_addr != 0) {
+            // Clear last_profiled_frame
+            uintptr_t lpf_addr = tstate_addr + unwinder->debug_offsets.thread_state.last_profiled_frame;
+            if (_Py_RemoteDebug_WriteRemoteMemory(&unwinder->handle, lpf_addr,
+                                                  sizeof(uintptr_t), &zero) < 0) {
+                // Non-fatal: just continue
+                PyErr_Clear();
+            }
+
+            // Move to next thread
+            if (_Py_RemoteDebug_PagedReadRemoteMemory(
+                    &unwinder->handle,
+                    tstate_addr + unwinder->debug_offsets.thread_state.next,
+                    sizeof(void*),
+                    &tstate_addr) < 0) {
+                PyErr_Clear();
+                break;
+            }
+        }
+
+        // Move to next interpreter
+        if (_Py_RemoteDebug_PagedReadRemoteMemory(
+                &unwinder->handle,
+                current_interp + unwinder->debug_offsets.interpreter_state.next,
+                sizeof(void*),
+                &current_interp) < 0) {
+            PyErr_Clear();
+            break;
+        }
+    }
+
+    return 0;
+}
+
+// Fast path: check if we have a full cache hit (parent stack unchanged)
+// A "full hit" means current frame == last profiled frame, so we can reuse
+// cached parent frames. We always read the current frame from memory to get
+// updated line numbers (the line within a frame can change between samples).
+// Returns: 1 if full hit (frame_info populated with current frame + cached parents),
+//          0 if miss, -1 on error
+static int
+try_full_cache_hit(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t frame_addr,
+    uintptr_t last_profiled_frame,
+    uint64_t thread_id,
+    PyObject *frame_info)
+{
+    if (!unwinder->frame_cache || last_profiled_frame == 0) {
+        return 0;
+    }
+    // Full hit only if current frame == last profiled frame
+    if (frame_addr != last_profiled_frame) {
+        return 0;
+    }
+
+    FrameCacheEntry *entry = frame_cache_find(unwinder, thread_id);
+    if (!entry || !entry->frame_list) {
+        return 0;
+    }
+
+    // Verify first address matches (sanity check)
+    if (entry->num_addrs == 0 || entry->addrs[0] != frame_addr) {
+        return 0;
+    }
+
+    // Always read the current frame from memory to get updated line number
+    PyObject *current_frame = NULL;
+    uintptr_t code_object_addr = 0;
+    uintptr_t previous_frame = 0;
+    int parse_result = parse_frame_object(unwinder, &current_frame, frame_addr,
+                                          &code_object_addr, &previous_frame);
+    if (parse_result < 0) {
+        return -1;
+    }
+
+    // Get cached parent frames first (before modifying frame_info)
+    Py_ssize_t cached_size = PyList_GET_SIZE(entry->frame_list);
+    PyObject *parent_slice = NULL;
+    if (cached_size > 1) {
+        parent_slice = PyList_GetSlice(entry->frame_list, 1, cached_size);
+        if (!parent_slice) {
+            Py_XDECREF(current_frame);
+            return -1;
+        }
+    }
+
+    // Now safe to modify frame_info - add current frame if valid
+    if (current_frame != NULL) {
+        if (PyList_Append(frame_info, current_frame) < 0) {
+            Py_DECREF(current_frame);
+            Py_XDECREF(parent_slice);
+            return -1;
+        }
+        Py_DECREF(current_frame);
+        STATS_ADD(unwinder, frames_read_from_memory, 1);
+    }
+
+    // Extend with cached parent frames
+    if (parent_slice) {
+        Py_ssize_t cur_size = PyList_GET_SIZE(frame_info);
+        int result = PyList_SetSlice(frame_info, cur_size, cur_size, parent_slice);
+        Py_DECREF(parent_slice);
+        if (result < 0) {
+            return -1;
+        }
+        STATS_ADD(unwinder, frames_read_from_cache, cached_size - 1);
+    }
+
+    STATS_INC(unwinder, frame_cache_hits);
+    return 1;
+}
+
+// High-level helper: collect frames with cache optimization
+// Returns complete frame_info list, handling all cache logic internally
+int
+collect_frames_with_cache(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t frame_addr,
+    StackChunkList *chunks,
+    PyObject *frame_info,
+    uintptr_t gc_frame,
+    uintptr_t last_profiled_frame,
+    uint64_t thread_id)
+{
+    // Fast path: check for full cache hit first (no allocations needed)
+    int full_hit = try_full_cache_hit(unwinder, frame_addr, last_profiled_frame,
+                                       thread_id, frame_info);
+    if (full_hit != 0) {
+        return full_hit < 0 ? -1 : 0;  // Either error or success
+    }
+
+    uintptr_t addrs[FRAME_CACHE_MAX_FRAMES];
+    Py_ssize_t num_addrs = 0;
+    Py_ssize_t frames_before = PyList_GET_SIZE(frame_info);
+
+    int stopped_at_cached = 0;
+    if (process_frame_chain(unwinder, frame_addr, chunks, frame_info, 0, gc_frame,
+                            last_profiled_frame, &stopped_at_cached,
+                            addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES) < 0) {
+        return -1;
+    }
+
+    // Track frames read from memory (frames added by process_frame_chain)
+    STATS_ADD(unwinder, frames_read_from_memory, PyList_GET_SIZE(frame_info) - frames_before);
+
+    // If stopped at cached frame, extend with cached continuation (both frames and addresses)
+    if (stopped_at_cached) {
+        Py_ssize_t frames_before_cache = PyList_GET_SIZE(frame_info);
+        int cache_result = frame_cache_lookup_and_extend(unwinder, thread_id, last_profiled_frame,
+                                                         frame_info, addrs, &num_addrs,
+                                                         FRAME_CACHE_MAX_FRAMES);
+        if (cache_result < 0) {
+            return -1;
+        }
+        if (cache_result == 0) {
+            // Cache miss - continue walking from last_profiled_frame to get the rest
+            STATS_INC(unwinder, frame_cache_misses);
+            Py_ssize_t frames_before_walk = PyList_GET_SIZE(frame_info);
+            if (process_frame_chain(unwinder, last_profiled_frame, chunks, frame_info, 0, gc_frame,
+                                    0, NULL, addrs, &num_addrs, FRAME_CACHE_MAX_FRAMES) < 0) {
+                return -1;
+            }
+            STATS_ADD(unwinder, frames_read_from_memory, PyList_GET_SIZE(frame_info) - frames_before_walk);
+        } else {
+            // Partial cache hit
+            STATS_INC(unwinder, frame_cache_partial_hits);
+            STATS_ADD(unwinder, frames_read_from_cache, PyList_GET_SIZE(frame_info) - frames_before_cache);
+        }
+    } else if (last_profiled_frame == 0) {
+        // No cache involvement (no last_profiled_frame or cache disabled)
+        STATS_INC(unwinder, frame_cache_misses);
+    }
+
+    // Store in cache (frame_cache_store handles truncation if num_addrs > FRAME_CACHE_MAX_FRAMES)
+    if (frame_cache_store(unwinder, thread_id, frame_info, addrs, num_addrs) < 0) {
+        return -1;
+    }
+
+    return 0;
+}
diff --git a/Modules/_remote_debugging/module.c b/Modules/_remote_debugging/module.c
new file mode 100644
index 00000000000..123e4f5c4d7
--- /dev/null
+++ b/Modules/_remote_debugging/module.c
@@ -0,0 +1,1108 @@
+/******************************************************************************
+ * Remote Debugging Module - Main Module Implementation
+ *
+ * This file contains the main module initialization, the RemoteUnwinder
+ * class implementation, and utility functions.
+ ******************************************************************************/
+
+#include "_remote_debugging.h"
+#include "clinic/module.c.h"
+
+/* ============================================================================
+ * STRUCTSEQ TYPE DEFINITIONS
+ * ============================================================================ */
+
+// TaskInfo structseq type
+static PyStructSequence_Field TaskInfo_fields[] = {
+    {"task_id", "Task ID (memory address)"},
+    {"task_name", "Task name"},
+    {"coroutine_stack", "Coroutine call stack"},
+    {"awaited_by", "Tasks awaiting this task"},
+    {NULL}
+};
+
+PyStructSequence_Desc TaskInfo_desc = {
+    "_remote_debugging.TaskInfo",
+    "Information about an asyncio task",
+    TaskInfo_fields,
+    4
+};
+
+// FrameInfo structseq type
+static PyStructSequence_Field FrameInfo_fields[] = {
+    {"filename", "Source code filename"},
+    {"lineno", "Line number"},
+    {"funcname", "Function name"},
+    {NULL}
+};
+
+PyStructSequence_Desc FrameInfo_desc = {
+    "_remote_debugging.FrameInfo",
+    "Information about a frame",
+    FrameInfo_fields,
+    3
+};
+
+// CoroInfo structseq type
+static PyStructSequence_Field CoroInfo_fields[] = {
+    {"call_stack", "Coroutine call stack"},
+    {"task_name", "Task name"},
+    {NULL}
+};
+
+PyStructSequence_Desc CoroInfo_desc = {
+    "_remote_debugging.CoroInfo",
+    "Information about a coroutine",
+    CoroInfo_fields,
+    2
+};
+
+// ThreadInfo structseq type
+static PyStructSequence_Field ThreadInfo_fields[] = {
+    {"thread_id", "Thread ID"},
+    {"status", "Thread status (flags: HAS_GIL, ON_CPU, UNKNOWN or legacy enum)"},
+    {"frame_info", "Frame information"},
+    {NULL}
+};
+
+PyStructSequence_Desc ThreadInfo_desc = {
+    "_remote_debugging.ThreadInfo",
+    "Information about a thread",
+    ThreadInfo_fields,
+    3
+};
+
+// InterpreterInfo structseq type
+static PyStructSequence_Field InterpreterInfo_fields[] = {
+    {"interpreter_id", "Interpreter ID"},
+    {"threads", "List of threads in this interpreter"},
+    {NULL}
+};
+
+PyStructSequence_Desc InterpreterInfo_desc = {
+    "_remote_debugging.InterpreterInfo",
+    "Information about an interpreter",
+    InterpreterInfo_fields,
+    2
+};
+
+// AwaitedInfo structseq type
+static PyStructSequence_Field AwaitedInfo_fields[] = {
+    {"thread_id", "Thread ID"},
+    {"awaited_by", "List of tasks awaited by this thread"},
+    {NULL}
+};
+
+PyStructSequence_Desc AwaitedInfo_desc = {
+    "_remote_debugging.AwaitedInfo",
+    "Information about what a thread is awaiting",
+    AwaitedInfo_fields,
+    2
+};
+
+/* ============================================================================
+ * UTILITY FUNCTIONS
+ * ============================================================================ */
+
+void
+cached_code_metadata_destroy(void *ptr)
+{
+    CachedCodeMetadata *meta = (CachedCodeMetadata *)ptr;
+    Py_DECREF(meta->func_name);
+    Py_DECREF(meta->file_name);
+    Py_DECREF(meta->linetable);
+    PyMem_RawFree(meta);
+}
+
+RemoteDebuggingState *
+RemoteDebugging_GetState(PyObject *module)
+{
+    void *state = _PyModule_GetState(module);
+    assert(state != NULL);
+    return (RemoteDebuggingState *)state;
+}
+
+RemoteDebuggingState *
+RemoteDebugging_GetStateFromType(PyTypeObject *type)
+{
+    PyObject *module = PyType_GetModule(type);
+    assert(module != NULL);
+    return RemoteDebugging_GetState(module);
+}
+
+RemoteDebuggingState *
+RemoteDebugging_GetStateFromObject(PyObject *obj)
+{
+    RemoteUnwinderObject *unwinder = (RemoteUnwinderObject *)obj;
+    if (unwinder->cached_state == NULL) {
+        unwinder->cached_state = RemoteDebugging_GetStateFromType(Py_TYPE(obj));
+    }
+    return unwinder->cached_state;
+}
+
+int
+RemoteDebugging_InitState(RemoteDebuggingState *st)
+{
+    return 0;
+}
+
+int
+is_prerelease_version(uint64_t version)
+{
+    return (version & 0xF0) != 0xF0;
+}
+
+int
+validate_debug_offsets(struct _Py_DebugOffsets *debug_offsets)
+{
+    if (memcmp(debug_offsets->cookie, _Py_Debug_Cookie, sizeof(debug_offsets->cookie)) != 0) {
+        // The remote is probably running a Python version predating debug offsets.
+        PyErr_SetString(
+            PyExc_RuntimeError,
+            "Can't determine the Python version of the remote process");
+        return -1;
+    }
+
+    // Assume debug offsets could change from one pre-release version to another,
+    // or one minor version to another, but are stable across patch versions.
+    if (is_prerelease_version(Py_Version) && Py_Version != debug_offsets->version) {
+        PyErr_SetString(
+            PyExc_RuntimeError,
+            "Can't attach from a pre-release Python interpreter"
+            " to a process running a different Python version");
+        return -1;
+    }
+
+    if (is_prerelease_version(debug_offsets->version) && Py_Version != debug_offsets->version) {
+        PyErr_SetString(
+            PyExc_RuntimeError,
+            "Can't attach to a pre-release Python interpreter"
+            " from a process running a different Python version");
+        return -1;
+    }
+
+    unsigned int remote_major = (debug_offsets->version >> 24) & 0xFF;
+    unsigned int remote_minor = (debug_offsets->version >> 16) & 0xFF;
+
+    if (PY_MAJOR_VERSION != remote_major || PY_MINOR_VERSION != remote_minor) {
+        PyErr_Format(
+            PyExc_RuntimeError,
+            "Can't attach from a Python %d.%d process to a Python %d.%d process",
+            PY_MAJOR_VERSION, PY_MINOR_VERSION, remote_major, remote_minor);
+        return -1;
+    }
+
+    // The debug offsets differ between free threaded and non-free threaded builds.
+    if (_Py_Debug_Free_Threaded && !debug_offsets->free_threaded) {
+        PyErr_SetString(
+            PyExc_RuntimeError,
+            "Cannot attach from a free-threaded Python process"
+            " to a process running a non-free-threaded version");
+        return -1;
+    }
+
+    if (!_Py_Debug_Free_Threaded && debug_offsets->free_threaded) {
+        PyErr_SetString(
+            PyExc_RuntimeError,
+            "Cannot attach to a free-threaded Python process"
+            " from a process running a non-free-threaded version");
+        return -1;
+    }
+
+    return 0;
+}
+
+/* ============================================================================
+ * REMOTEUNWINDER CLASS IMPLEMENTATION
+ * ============================================================================ */
+
+/*[clinic input]
+module _remote_debugging
+class _remote_debugging.RemoteUnwinder "RemoteUnwinderObject *" "&RemoteUnwinder_Type"
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=12b4dce200381115]*/
+
+/*[clinic input]
+@permit_long_summary
+@permit_long_docstring_body
+_remote_debugging.RemoteUnwinder.__init__
+    pid: int
+    *
+    all_threads: bool = False
+    only_active_thread: bool = False
+    mode: int = 0
+    debug: bool = False
+    skip_non_matching_threads: bool = True
+    native: bool = False
+    gc: bool = False
+    cache_frames: bool = False
+    stats: bool = False
+
+Initialize a new RemoteUnwinder object for debugging a remote Python process.
+
+Args:
+    pid: Process ID of the target Python process to debug
+    all_threads: If True, initialize state for all threads in the process.
+                If False, only initialize for the main thread.
+    only_active_thread: If True, only sample the thread holding the GIL.
+    mode: Profiling mode: 0=WALL (wall-time), 1=CPU (cpu-time), 2=GIL (gil-time).
+                       Cannot be used together with all_threads=True.
+    debug: If True, chain exceptions to explain the sequence of events that
+           lead to the exception.
+    skip_non_matching_threads: If True, skip threads that don't match the selected mode.
+                              If False, include all threads regardless of mode.
+    native: If True, include artificial "<native>" frames to denote calls to
+            non-Python code.
+    gc: If True, include artificial "<GC>" frames to denote active garbage
+        collection.
+    cache_frames: If True, enable frame caching optimization to avoid re-reading
+                 unchanged parent frames between samples.
+    stats: If True, collect statistics about cache hits, memory reads, etc.
+           Use get_stats() to retrieve the collected statistics.
+
+The RemoteUnwinder provides functionality to inspect and debug a running Python
+process, including examining thread states, stack frames and other runtime data.
+
+Raises:
+    PermissionError: If access to the target process is denied
+    OSError: If unable to attach to the target process or access its memory
+    RuntimeError: If unable to read debug information from the target process
+    ValueError: If both all_threads and only_active_thread are True
+[clinic start generated code]*/
+
+static int
+_remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
+                                               int pid, int all_threads,
+                                               int only_active_thread,
+                                               int mode, int debug,
+                                               int skip_non_matching_threads,
+                                               int native, int gc,
+                                               int cache_frames, int stats)
+/*[clinic end generated code: output=b34ef8cce013c975 input=df2221ef114c3d6a]*/
+{
+    // Validate that all_threads and only_active_thread are not both True
+    if (all_threads && only_active_thread) {
+        PyErr_SetString(PyExc_ValueError,
+                       "all_threads and only_active_thread cannot both be True");
+        return -1;
+    }
+
+#ifdef Py_GIL_DISABLED
+    if (only_active_thread) {
+        PyErr_SetString(PyExc_ValueError,
+                       "only_active_thread is not supported in free-threaded builds");
+        return -1;
+    }
+#endif
+
+    self->native = native;
+    self->gc = gc;
+    self->cache_frames = cache_frames;
+    self->collect_stats = stats;
+    self->stale_invalidation_counter = 0;
+    self->debug = debug;
+    self->only_active_thread = only_active_thread;
+    self->mode = mode;
+    self->skip_non_matching_threads = skip_non_matching_threads;
+    self->cached_state = NULL;
+    self->frame_cache = NULL;
+    // Initialize stats to zero
+    memset(&self->stats, 0, sizeof(self->stats));
+    if (_Py_RemoteDebug_InitProcHandle(&self->handle, pid) < 0) {
+        set_exception_cause(self, PyExc_RuntimeError, "Failed to initialize process handle");
+        return -1;
+    }
+
+    self->runtime_start_address = _Py_RemoteDebug_GetPyRuntimeAddress(&self->handle);
+    if (self->runtime_start_address == 0) {
+        set_exception_cause(self, PyExc_RuntimeError, "Failed to get Python runtime address");
+        return -1;
+    }
+
+    if (_Py_RemoteDebug_ReadDebugOffsets(&self->handle,
+                                         &self->runtime_start_address,
+                                         &self->debug_offsets) < 0)
+    {
+        set_exception_cause(self, PyExc_RuntimeError, "Failed to read debug offsets");
+        return -1;
+    }
+
+    // Validate that the debug offsets are valid
+    if(validate_debug_offsets(&self->debug_offsets) == -1) {
+        set_exception_cause(self, PyExc_RuntimeError, "Invalid debug offsets found");
+        return -1;
+    }
+
+    // Try to read async debug offsets, but don't fail if they're not available
+    self->async_debug_offsets_available = 1;
+    if (read_async_debug(self) < 0) {
+        PyErr_Clear();
+        memset(&self->async_debug_offsets, 0, sizeof(self->async_debug_offsets));
+        self->async_debug_offsets_available = 0;
+    }
+
+    if (populate_initial_state_data(all_threads, self, self->runtime_start_address,
+                    &self->interpreter_addr ,&self->tstate_addr) < 0)
+    {
+        set_exception_cause(self, PyExc_RuntimeError, "Failed to populate initial state data");
+        return -1;
+    }
+
+    self->code_object_cache = _Py_hashtable_new_full(
+        _Py_hashtable_hash_ptr,
+        _Py_hashtable_compare_direct,
+        NULL,  // keys are stable pointers, don't destroy
+        cached_code_metadata_destroy,
+        NULL
+    );
+    if (self->code_object_cache == NULL) {
+        PyErr_NoMemory();
+        set_exception_cause(self, PyExc_MemoryError, "Failed to create code object cache");
+        return -1;
+    }
+
+#ifdef Py_GIL_DISABLED
+    // Initialize TLBC cache
+    self->tlbc_generation = 0;
+    self->tlbc_cache = _Py_hashtable_new_full(
+        _Py_hashtable_hash_ptr,
+        _Py_hashtable_compare_direct,
+        NULL,  // keys are stable pointers, don't destroy
+        tlbc_cache_entry_destroy,
+        NULL
+    );
+    if (self->tlbc_cache == NULL) {
+        _Py_hashtable_destroy(self->code_object_cache);
+        PyErr_NoMemory();
+        set_exception_cause(self, PyExc_MemoryError, "Failed to create TLBC cache");
+        return -1;
+    }
+#endif
+
+#if defined(__APPLE__)
+    self->thread_id_offset = 0;
+#endif
+
+#ifdef MS_WINDOWS
+    self->win_process_buffer = NULL;
+    self->win_process_buffer_size = 0;
+#endif
+
+    if (cache_frames && frame_cache_init(self) < 0) {
+        return -1;
+    }
+
+    // Clear stale last_profiled_frame values from previous profilers
+    // This prevents us from stopping frame walking early due to stale values
+    if (cache_frames) {
+        clear_last_profiled_frames(self);
+    }
+
+    return 0;
+}
+
+/*[clinic input]
+@permit_long_docstring_body
+@critical_section
+_remote_debugging.RemoteUnwinder.get_stack_trace
+
+Returns stack traces for all interpreters and threads in process.
+
+Each element in the returned list is a tuple of (interpreter_id, thread_list), where:
+- interpreter_id is the interpreter identifier
+- thread_list is a list of tuples (thread_id, frame_list) for threads in that interpreter
+  - thread_id is the OS thread identifier
+  - frame_list is a list of tuples (function_name, filename, line_number) representing
+    the Python stack frames for that thread, ordered from most recent to oldest
+
+The threads returned depend on the initialization parameters:
+- If only_active_thread was True: returns only the thread holding the GIL across all interpreters
+- If all_threads was True: returns all threads across all interpreters
+- Otherwise: returns only the main thread of each interpreter
+
+Example:
+    [
+        (0, [  # Main interpreter
+            (1234, [
+                ('process_data', 'worker.py', 127),
+                ('run_worker', 'worker.py', 45),
+                ('main', 'app.py', 23)
+            ]),
+            (1235, [
+                ('handle_request', 'server.py', 89),
+                ('serve_forever', 'server.py', 52)
+            ])
+        ]),
+        (1, [  # Sub-interpreter
+            (1236, [
+                ('sub_worker', 'sub.py', 15)
+            ])
+        ])
+    ]
+
+Raises:
+    RuntimeError: If there is an error copying memory from the target process
+    OSError: If there is an error accessing the target process
+    PermissionError: If access to the target process is denied
+    UnicodeDecodeError: If there is an error decoding strings from the target process
+
+[clinic start generated code]*/
+
+static PyObject *
+_remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self)
+/*[clinic end generated code: output=666192b90c69d567 input=bcff01c73cccc1c0]*/
+{
+    STATS_INC(self, total_samples);
+
+    PyObject* result = PyList_New(0);
+    if (!result) {
+        set_exception_cause(self, PyExc_MemoryError, "Failed to create stack trace result list");
+        return NULL;
+    }
+
+    // Iterate over all interpreters
+    uintptr_t current_interpreter = self->interpreter_addr;
+    while (current_interpreter != 0) {
+        // Read interpreter state to get the interpreter ID
+        char interp_state_buffer[INTERP_STATE_BUFFER_SIZE];
+        if (_Py_RemoteDebug_PagedReadRemoteMemory(
+                &self->handle,
+                current_interpreter,
+                INTERP_STATE_BUFFER_SIZE,
+                interp_state_buffer) < 0) {
+            set_exception_cause(self, PyExc_RuntimeError, "Failed to read interpreter state buffer");
+            Py_CLEAR(result);
+            goto exit;
+        }
+
+        uintptr_t gc_frame = 0;
+        if (self->gc) {
+            gc_frame = GET_MEMBER(uintptr_t, interp_state_buffer,
+                                  self->debug_offsets.interpreter_state.gc
+                                  + self->debug_offsets.gc.frame);
+        }
+
+        int64_t interpreter_id = GET_MEMBER(int64_t, interp_state_buffer,
+                self->debug_offsets.interpreter_state.id);
+
+        // Get code object generation from buffer
+        uint64_t code_object_generation = GET_MEMBER(uint64_t, interp_state_buffer,
+                self->debug_offsets.interpreter_state.code_object_generation);
+
+        if (code_object_generation != self->code_object_generation) {
+            self->code_object_generation = code_object_generation;
+            _Py_hashtable_clear(self->code_object_cache);
+        }
+
+#ifdef Py_GIL_DISABLED
+        // Check TLBC generation and invalidate cache if needed
+        uint32_t current_tlbc_generation = GET_MEMBER(uint32_t, interp_state_buffer,
+                                                      self->debug_offsets.interpreter_state.tlbc_generation);
+        if (current_tlbc_generation != self->tlbc_generation) {
+            self->tlbc_generation = current_tlbc_generation;
+            _Py_hashtable_clear(self->tlbc_cache);
+        }
+#endif
+
+        // Create a list to hold threads for this interpreter
+        PyObject *interpreter_threads = PyList_New(0);
+        if (!interpreter_threads) {
+            set_exception_cause(self, PyExc_MemoryError, "Failed to create interpreter threads list");
+            Py_CLEAR(result);
+            goto exit;
+        }
+
+        // Get the GIL holder for this interpreter (needed for GIL_WAIT logic)
+        uintptr_t gil_holder_tstate = 0;
+        int gil_locked = GET_MEMBER(int, interp_state_buffer,
+            self->debug_offsets.interpreter_state.gil_runtime_state_locked);
+        if (gil_locked) {
+            gil_holder_tstate = (uintptr_t)GET_MEMBER(PyThreadState*, interp_state_buffer,
+                self->debug_offsets.interpreter_state.gil_runtime_state_holder);
+        }
+
+        uintptr_t current_tstate;
+        if (self->only_active_thread) {
+            // Find the GIL holder for THIS interpreter
+            if (!gil_locked) {
+                // This interpreter's GIL is not locked, skip it
+                Py_DECREF(interpreter_threads);
+                goto next_interpreter;
+            }
+
+            current_tstate = gil_holder_tstate;
+        } else if (self->tstate_addr == 0) {
+            // Get all threads for this interpreter
+            current_tstate = GET_MEMBER(uintptr_t, interp_state_buffer,
+                    self->debug_offsets.interpreter_state.threads_head);
+        } else {
+            // Target specific thread (only process first interpreter)
+            current_tstate = self->tstate_addr;
+        }
+
+        while (current_tstate != 0) {
+            PyObject* frame_info = unwind_stack_for_thread(self, &current_tstate,
+                                                           gil_holder_tstate,
+                                                           gc_frame);
+            if (!frame_info) {
+                // Check if this was an intentional skip due to mode-based filtering
+                if ((self->mode == PROFILING_MODE_CPU || self->mode == PROFILING_MODE_GIL) && !PyErr_Occurred()) {
+                    // Thread was skipped due to mode filtering, continue to next thread
+                    continue;
+                }
+                // This was an actual error
+                Py_DECREF(interpreter_threads);
+                set_exception_cause(self, PyExc_RuntimeError, "Failed to unwind stack for thread");
+                Py_CLEAR(result);
+                goto exit;
+            }
+
+            if (PyList_Append(interpreter_threads, frame_info) == -1) {
+                Py_DECREF(frame_info);
+                Py_DECREF(interpreter_threads);
+                set_exception_cause(self, PyExc_RuntimeError, "Failed to append thread frame info");
+                Py_CLEAR(result);
+                goto exit;
+            }
+            Py_DECREF(frame_info);
+
+            // If targeting specific thread or only active thread, process just one
+            if (self->tstate_addr || self->only_active_thread) {
+                break;
+            }
+        }
+
+        // Create the InterpreterInfo StructSequence
+        RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)self);
+        PyObject *interpreter_info = PyStructSequence_New(state->InterpreterInfo_Type);
+        if (!interpreter_info) {
+            Py_DECREF(interpreter_threads);
+            set_exception_cause(self, PyExc_MemoryError, "Failed to create InterpreterInfo");
+            Py_CLEAR(result);
+            goto exit;
+        }
+
+        PyObject *interp_id = PyLong_FromLongLong(interpreter_id);
+        if (!interp_id) {
+            Py_DECREF(interpreter_threads);
+            Py_DECREF(interpreter_info);
+            set_exception_cause(self, PyExc_MemoryError, "Failed to create interpreter ID");
+            Py_CLEAR(result);
+            goto exit;
+        }
+
+        PyStructSequence_SetItem(interpreter_info, 0, interp_id);  // steals reference
+        PyStructSequence_SetItem(interpreter_info, 1, interpreter_threads);  // steals reference
+
+        // Add this interpreter to the result list
+        if (PyList_Append(result, interpreter_info) == -1) {
+            Py_DECREF(interpreter_info);
+            set_exception_cause(self, PyExc_RuntimeError, "Failed to append interpreter info");
+            Py_CLEAR(result);
+            goto exit;
+        }
+        Py_DECREF(interpreter_info);
+
+next_interpreter:
+
+        // Get the next interpreter address
+        current_interpreter = GET_MEMBER(uintptr_t, interp_state_buffer,
+                self->debug_offsets.interpreter_state.next);
+
+        // If we're targeting a specific thread, stop after first interpreter
+        if (self->tstate_addr != 0) {
+            break;
+        }
+    }
+
+exit:
+    // Invalidate cache entries for threads not seen in this sample.
+    // Only do this every 1024 iterations to avoid performance overhead.
+    if (self->cache_frames && result) {
+        if (++self->stale_invalidation_counter >= 1024) {
+            self->stale_invalidation_counter = 0;
+            frame_cache_invalidate_stale(self, result);
+        }
+    }
+    _Py_RemoteDebug_ClearCache(&self->handle);
+    return result;
+}
+
+/*[clinic input]
+@permit_long_summary
+@permit_long_docstring_body
+@critical_section
+_remote_debugging.RemoteUnwinder.get_all_awaited_by
+
+Get all tasks and their awaited_by relationships from the remote process.
+
+This provides a tree structure showing which tasks are waiting for other tasks.
+
+For each task, returns:
+1. The call stack frames leading to where the task is currently executing
+2. The name of the task
+3. A list of tasks that this task is waiting for, with their own frames/names/etc
+
+Returns a list of [frames, task_name, subtasks] where:
+- frames: List of (func_name, filename, lineno) showing the call stack
+- task_name: String identifier for the task
+- subtasks: List of tasks being awaited by this task, in same format
+
+Raises:
+    RuntimeError: If AsyncioDebug section is not available in the remote process
+    MemoryError: If memory allocation fails
+    OSError: If reading from the remote process fails
+
+Example output:
+[
+    # Task c2_root waiting for two subtasks
+    [
+        # Call stack of c2_root
+        [("c5", "script.py", 10), ("c4", "script.py", 14)],
+        "c2_root",
+        [
+            # First subtask (sub_main_2) and what it's waiting for
+            [
+                [("c1", "script.py", 23)],
+                "sub_main_2",
+                [...]
+            ],
+            # Second subtask and its waiters
+            [...]
+        ]
+    ]
+]
+[clinic start generated code]*/
+
+static PyObject *
+_remote_debugging_RemoteUnwinder_get_all_awaited_by_impl(RemoteUnwinderObject *self)
+/*[clinic end generated code: output=6a49cd345e8aec53 input=307f754cbe38250c]*/
+{
+    if (ensure_async_debug_offsets(self) < 0) {
+        return NULL;
+    }
+
+    PyObject *result = PyList_New(0);
+    if (result == NULL) {
+        set_exception_cause(self, PyExc_MemoryError, "Failed to create awaited_by result list");
+        goto result_err;
+    }
+
+    // Process all threads
+    if (iterate_threads(self, process_thread_for_awaited_by, result) < 0) {
+        goto result_err;
+    }
+
+    uintptr_t head_addr = self->interpreter_addr
+        + (uintptr_t)self->async_debug_offsets.asyncio_interpreter_state.asyncio_tasks_head;
+
+    // On top of a per-thread task lists used by default by asyncio to avoid
+    // contention, there is also a fallback per-interpreter list of tasks;
+    // any tasks still pending when a thread is destroyed will be moved to the
+    // per-interpreter task list.  It's unlikely we'll find anything here, but
+    // interesting for debugging.
+    if (append_awaited_by(self, 0, head_addr, result))
+    {
+        set_exception_cause(self, PyExc_RuntimeError, "Failed to append interpreter awaited_by in get_all_awaited_by");
+        goto result_err;
+    }
+
+    _Py_RemoteDebug_ClearCache(&self->handle);
+    return result;
+
+result_err:
+    _Py_RemoteDebug_ClearCache(&self->handle);
+    Py_XDECREF(result);
+    return NULL;
+}
+
+/*[clinic input]
+@permit_long_summary
+@permit_long_docstring_body
+@critical_section
+_remote_debugging.RemoteUnwinder.get_async_stack_trace
+
+Get the currently running async tasks and their dependency graphs from the remote process.
+
+This returns information about running tasks and all tasks that are waiting for them,
+forming a complete dependency graph for each thread's active task.
+
+For each thread with a running task, returns the running task plus all tasks that
+transitively depend on it (tasks waiting for the running task, tasks waiting for
+those tasks, etc.).
+
+Returns a list of per-thread results, where each thread result contains:
+- Thread ID
+- List of task information for the running task and all its waiters
+
+Each task info contains:
+- Task ID (memory address)
+- Task name
+- Call stack frames: List of (func_name, filename, lineno)
+- List of tasks waiting for this task (recursive structure)
+
+Raises:
+    RuntimeError: If AsyncioDebug section is not available in the target process
+    MemoryError: If memory allocation fails
+    OSError: If reading from the remote process fails
+
+Example output (similar structure to get_all_awaited_by but only for running tasks):
+[
+    # Thread 140234 results
+    (140234, [
+        # Running task and its complete waiter dependency graph
+        (4345585712, 'main_task',
+         [("run_server", "server.py", 127), ("main", "app.py", 23)],
+         [
+             # Tasks waiting for main_task
+             (4345585800, 'worker_1', [...], [...]),
+             (4345585900, 'worker_2', [...], [...])
+         ])
+    ])
+]
+
+[clinic start generated code]*/
+
+static PyObject *
+_remote_debugging_RemoteUnwinder_get_async_stack_trace_impl(RemoteUnwinderObject *self)
+/*[clinic end generated code: output=6433d52b55e87bbe input=6129b7d509a887c9]*/
+{
+    if (ensure_async_debug_offsets(self) < 0) {
+        return NULL;
+    }
+
+    PyObject *result = PyList_New(0);
+    if (result == NULL) {
+        set_exception_cause(self, PyExc_MemoryError, "Failed to create result list in get_async_stack_trace");
+        return NULL;
+    }
+
+    // Process all threads
+    if (iterate_threads(self, process_thread_for_async_stack_trace, result) < 0) {
+        goto result_err;
+    }
+
+    _Py_RemoteDebug_ClearCache(&self->handle);
+    return result;
+result_err:
+    _Py_RemoteDebug_ClearCache(&self->handle);
+    Py_XDECREF(result);
+    return NULL;
+}
+
+/*[clinic input]
+@permit_long_docstring_body
+@critical_section
+_remote_debugging.RemoteUnwinder.get_stats
+
+Get collected statistics about profiling performance.
+
+Returns a dictionary containing statistics about cache performance,
+memory reads, and other profiling metrics. Only available if the
+RemoteUnwinder was created with stats=True.
+
+Returns:
+    dict: A dictionary containing:
+        - total_samples: Total number of get_stack_trace calls
+        - frame_cache_hits: Full cache hits (entire stack unchanged)
+        - frame_cache_misses: Cache misses requiring full walk
+        - frame_cache_partial_hits: Partial hits (stopped at cached frame)
+        - frames_read_from_cache: Total frames retrieved from cache
+        - frames_read_from_memory: Total frames read from remote memory
+        - memory_reads: Total remote memory read operations
+        - memory_bytes_read: Total bytes read from remote memory
+        - code_object_cache_hits: Code object cache hits
+        - code_object_cache_misses: Code object cache misses
+        - stale_cache_invalidations: Times stale cache entries were cleared
+        - frame_cache_hit_rate: Percentage of samples that hit the cache
+        - code_object_cache_hit_rate: Percentage of code object lookups that hit cache
+
+Raises:
+    RuntimeError: If stats collection was not enabled (stats=False)
+[clinic start generated code]*/
+
+static PyObject *
+_remote_debugging_RemoteUnwinder_get_stats_impl(RemoteUnwinderObject *self)
+/*[clinic end generated code: output=21e36477122be2a0 input=75fef4134c12a8c9]*/
+{
+    if (!self->collect_stats) {
+        PyErr_SetString(PyExc_RuntimeError,
+                       "Statistics collection was not enabled. "
+                       "Create RemoteUnwinder with stats=True to collect statistics.");
+        return NULL;
+    }
+
+    PyObject *result = PyDict_New();
+    if (!result) {
+        return NULL;
+    }
+
+#define ADD_STAT(name) do { \
+    PyObject *val = PyLong_FromUnsignedLongLong(self->stats.name); \
+    if (!val || PyDict_SetItemString(result, #name, val) < 0) { \
+        Py_XDECREF(val); \
+        Py_DECREF(result); \
+        return NULL; \
+    } \
+    Py_DECREF(val); \
+} while(0)
+
+    ADD_STAT(total_samples);
+    ADD_STAT(frame_cache_hits);
+    ADD_STAT(frame_cache_misses);
+    ADD_STAT(frame_cache_partial_hits);
+    ADD_STAT(frames_read_from_cache);
+    ADD_STAT(frames_read_from_memory);
+    ADD_STAT(memory_reads);
+    ADD_STAT(memory_bytes_read);
+    ADD_STAT(code_object_cache_hits);
+    ADD_STAT(code_object_cache_misses);
+    ADD_STAT(stale_cache_invalidations);
+
+#undef ADD_STAT
+
+    // Calculate and add derived statistics
+    // Hit rate is calculated as (hits + partial_hits) / total_cache_lookups
+    double frame_cache_hit_rate = 0.0;
+    uint64_t total_cache_lookups = self->stats.frame_cache_hits + self->stats.frame_cache_partial_hits + self->stats.frame_cache_misses;
+    if (total_cache_lookups > 0) {
+        frame_cache_hit_rate = 100.0 * (double)(self->stats.frame_cache_hits + self->stats.frame_cache_partial_hits)
+                               / (double)total_cache_lookups;
+    }
+    PyObject *hit_rate = PyFloat_FromDouble(frame_cache_hit_rate);
+    if (!hit_rate || PyDict_SetItemString(result, "frame_cache_hit_rate", hit_rate) < 0) {
+        Py_XDECREF(hit_rate);
+        Py_DECREF(result);
+        return NULL;
+    }
+    Py_DECREF(hit_rate);
+
+    double code_object_hit_rate = 0.0;
+    uint64_t total_code_lookups = self->stats.code_object_cache_hits + self->stats.code_object_cache_misses;
+    if (total_code_lookups > 0) {
+        code_object_hit_rate = 100.0 * (double)self->stats.code_object_cache_hits / (double)total_code_lookups;
+    }
+    PyObject *code_hit_rate = PyFloat_FromDouble(code_object_hit_rate);
+    if (!code_hit_rate || PyDict_SetItemString(result, "code_object_cache_hit_rate", code_hit_rate) < 0) {
+        Py_XDECREF(code_hit_rate);
+        Py_DECREF(result);
+        return NULL;
+    }
+    Py_DECREF(code_hit_rate);
+
+    return result;
+}
+
+static PyMethodDef RemoteUnwinder_methods[] = {
+    _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_STACK_TRACE_METHODDEF
+    _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_ALL_AWAITED_BY_METHODDEF
+    _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_ASYNC_STACK_TRACE_METHODDEF
+    _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_STATS_METHODDEF
+    {NULL, NULL}
+};
+
+static void
+RemoteUnwinder_dealloc(PyObject *op)
+{
+    RemoteUnwinderObject *self = RemoteUnwinder_CAST(op);
+    PyTypeObject *tp = Py_TYPE(self);
+    if (self->code_object_cache) {
+        _Py_hashtable_destroy(self->code_object_cache);
+    }
+#ifdef MS_WINDOWS
+    if(self->win_process_buffer != NULL) {
+        PyMem_Free(self->win_process_buffer);
+    }
+#endif
+
+#ifdef Py_GIL_DISABLED
+    if (self->tlbc_cache) {
+        _Py_hashtable_destroy(self->tlbc_cache);
+    }
+#endif
+    if (self->handle.pid != 0) {
+        _Py_RemoteDebug_ClearCache(&self->handle);
+        _Py_RemoteDebug_CleanupProcHandle(&self->handle);
+    }
+    frame_cache_cleanup(self);
+    PyObject_Del(self);
+    Py_DECREF(tp);
+}
+
+static PyType_Slot RemoteUnwinder_slots[] = {
+    {Py_tp_doc, (void *)"RemoteUnwinder(pid): Inspect stack of a remote Python process."},
+    {Py_tp_methods, RemoteUnwinder_methods},
+    {Py_tp_init, _remote_debugging_RemoteUnwinder___init__},
+    {Py_tp_dealloc, RemoteUnwinder_dealloc},
+    {0, NULL}
+};
+
+static PyType_Spec RemoteUnwinder_spec = {
+    .name = "_remote_debugging.RemoteUnwinder",
+    .basicsize = sizeof(RemoteUnwinderObject),
+    .flags = (
+        Py_TPFLAGS_DEFAULT
+        | Py_TPFLAGS_IMMUTABLETYPE
+    ),
+    .slots = RemoteUnwinder_slots,
+};
+
+/* ============================================================================
+ * MODULE INITIALIZATION
+ * ============================================================================ */
+
+static int
+_remote_debugging_exec(PyObject *m)
+{
+    RemoteDebuggingState *st = RemoteDebugging_GetState(m);
+#define CREATE_TYPE(mod, type, spec)                                        \
+    do {                                                                    \
+        type = (PyTypeObject *)PyType_FromMetaclass(NULL, mod, spec, NULL); \
+        if (type == NULL) {                                                 \
+            return -1;                                                      \
+        }                                                                   \
+    } while (0)
+
+    CREATE_TYPE(m, st->RemoteDebugging_Type, &RemoteUnwinder_spec);
+
+    if (PyModule_AddType(m, st->RemoteDebugging_Type) < 0) {
+        return -1;
+    }
+
+    // Initialize structseq types
+    st->TaskInfo_Type = PyStructSequence_NewType(&TaskInfo_desc);
+    if (st->TaskInfo_Type == NULL) {
+        return -1;
+    }
+    if (PyModule_AddType(m, st->TaskInfo_Type) < 0) {
+        return -1;
+    }
+
+    st->FrameInfo_Type = PyStructSequence_NewType(&FrameInfo_desc);
+    if (st->FrameInfo_Type == NULL) {
+        return -1;
+    }
+    if (PyModule_AddType(m, st->FrameInfo_Type) < 0) {
+        return -1;
+    }
+
+    st->CoroInfo_Type = PyStructSequence_NewType(&CoroInfo_desc);
+    if (st->CoroInfo_Type == NULL) {
+        return -1;
+    }
+    if (PyModule_AddType(m, st->CoroInfo_Type) < 0) {
+        return -1;
+    }
+
+    st->ThreadInfo_Type = PyStructSequence_NewType(&ThreadInfo_desc);
+    if (st->ThreadInfo_Type == NULL) {
+        return -1;
+    }
+    if (PyModule_AddType(m, st->ThreadInfo_Type) < 0) {
+        return -1;
+    }
+
+    st->InterpreterInfo_Type = PyStructSequence_NewType(&InterpreterInfo_desc);
+    if (st->InterpreterInfo_Type == NULL) {
+        return -1;
+    }
+    if (PyModule_AddType(m, st->InterpreterInfo_Type) < 0) {
+        return -1;
+    }
+
+    st->AwaitedInfo_Type = PyStructSequence_NewType(&AwaitedInfo_desc);
+    if (st->AwaitedInfo_Type == NULL) {
+        return -1;
+    }
+    if (PyModule_AddType(m, st->AwaitedInfo_Type) < 0) {
+        return -1;
+    }
+#ifdef Py_GIL_DISABLED
+    PyUnstable_Module_SetGIL(m, Py_MOD_GIL_NOT_USED);
+#endif
+    int rc = PyModule_AddIntConstant(m, "PROCESS_VM_READV_SUPPORTED", HAVE_PROCESS_VM_READV);
+    if (rc < 0) {
+        return -1;
+    }
+
+    // Add thread status flag constants
+    if (PyModule_AddIntConstant(m, "THREAD_STATUS_HAS_GIL", THREAD_STATUS_HAS_GIL) < 0) {
+        return -1;
+    }
+    if (PyModule_AddIntConstant(m, "THREAD_STATUS_ON_CPU", THREAD_STATUS_ON_CPU) < 0) {
+        return -1;
+    }
+    if (PyModule_AddIntConstant(m, "THREAD_STATUS_UNKNOWN", THREAD_STATUS_UNKNOWN) < 0) {
+        return -1;
+    }
+    if (PyModule_AddIntConstant(m, "THREAD_STATUS_GIL_REQUESTED", THREAD_STATUS_GIL_REQUESTED) < 0) {
+        return -1;
+    }
+
+    if (RemoteDebugging_InitState(st) < 0) {
+        return -1;
+    }
+    return 0;
+}
+
+static int
+remote_debugging_traverse(PyObject *mod, visitproc visit, void *arg)
+{
+    RemoteDebuggingState *state = RemoteDebugging_GetState(mod);
+    Py_VISIT(state->RemoteDebugging_Type);
+    Py_VISIT(state->TaskInfo_Type);
+    Py_VISIT(state->FrameInfo_Type);
+    Py_VISIT(state->CoroInfo_Type);
+    Py_VISIT(state->ThreadInfo_Type);
+    Py_VISIT(state->InterpreterInfo_Type);
+    Py_VISIT(state->AwaitedInfo_Type);
+    return 0;
+}
+
+static int
+remote_debugging_clear(PyObject *mod)
+{
+    RemoteDebuggingState *state = RemoteDebugging_GetState(mod);
+    Py_CLEAR(state->RemoteDebugging_Type);
+    Py_CLEAR(state->TaskInfo_Type);
+    Py_CLEAR(state->FrameInfo_Type);
+    Py_CLEAR(state->CoroInfo_Type);
+    Py_CLEAR(state->ThreadInfo_Type);
+    Py_CLEAR(state->InterpreterInfo_Type);
+    Py_CLEAR(state->AwaitedInfo_Type);
+    return 0;
+}
+
+static void
+remote_debugging_free(void *mod)
+{
+    (void)remote_debugging_clear((PyObject *)mod);
+}
+
+static PyModuleDef_Slot remote_debugging_slots[] = {
+    {Py_mod_exec, _remote_debugging_exec},
+    {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
+    {Py_mod_gil, Py_MOD_GIL_NOT_USED},
+    {0, NULL},
+};
+
+static PyMethodDef remote_debugging_methods[] = {
+    {NULL, NULL, 0, NULL},
+};
+
+static struct PyModuleDef remote_debugging_module = {
+    PyModuleDef_HEAD_INIT,
+    .m_name = "_remote_debugging",
+    .m_size = sizeof(RemoteDebuggingState),
+    .m_methods = remote_debugging_methods,
+    .m_slots = remote_debugging_slots,
+    .m_traverse = remote_debugging_traverse,
+    .m_clear = remote_debugging_clear,
+    .m_free = remote_debugging_free,
+};
+
+PyMODINIT_FUNC
+PyInit__remote_debugging(void)
+{
+    return PyModuleDef_Init(&remote_debugging_module);
+}
diff --git a/Modules/_remote_debugging/object_reading.c b/Modules/_remote_debugging/object_reading.c
new file mode 100644
index 00000000000..2f465ca0cac
--- /dev/null
+++ b/Modules/_remote_debugging/object_reading.c
@@ -0,0 +1,247 @@
+/******************************************************************************
+ * Remote Debugging Module - Object Reading Functions
+ *
+ * This file contains functions for reading Python objects from remote
+ * process memory, including strings, bytes, and integers.
+ ******************************************************************************/
+
+#include "_remote_debugging.h"
+
+/* ============================================================================
+ * MEMORY READING FUNCTIONS
+ * ============================================================================ */
+
+#define DEFINE_MEMORY_READER(type_name, c_type, error_msg) \
+int \
+read_##type_name(RemoteUnwinderObject *unwinder, uintptr_t address, c_type *result) \
+{ \
+    int res = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, address, sizeof(c_type), result); \
+    if (res < 0) { \
+        set_exception_cause(unwinder, PyExc_RuntimeError, error_msg); \
+        return -1; \
+    } \
+    return 0; \
+}
+
+DEFINE_MEMORY_READER(ptr, uintptr_t, "Failed to read pointer from remote memory")
+DEFINE_MEMORY_READER(Py_ssize_t, Py_ssize_t, "Failed to read Py_ssize_t from remote memory")
+DEFINE_MEMORY_READER(char, char, "Failed to read char from remote memory")
+
+int
+read_py_ptr(RemoteUnwinderObject *unwinder, uintptr_t address, uintptr_t *ptr_addr)
+{
+    if (read_ptr(unwinder, address, ptr_addr)) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read Python pointer");
+        return -1;
+    }
+    *ptr_addr &= ~Py_TAG_BITS;
+    return 0;
+}
+
+/* ============================================================================
+ * PYTHON OBJECT READING FUNCTIONS
+ * ============================================================================ */
+
+PyObject *
+read_py_str(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t address,
+    Py_ssize_t max_len
+) {
+    PyObject *result = NULL;
+    char *buf = NULL;
+
+    // Read the entire PyUnicodeObject at once
+    char unicode_obj[SIZEOF_UNICODE_OBJ];
+    int res = _Py_RemoteDebug_PagedReadRemoteMemory(
+        &unwinder->handle,
+        address,
+        SIZEOF_UNICODE_OBJ,
+        unicode_obj
+    );
+    if (res < 0) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read PyUnicodeObject");
+        goto err;
+    }
+
+    Py_ssize_t len = GET_MEMBER(Py_ssize_t, unicode_obj, unwinder->debug_offsets.unicode_object.length);
+    if (len < 0 || len > max_len) {
+        PyErr_Format(PyExc_RuntimeError,
+                     "Invalid string length (%zd) at 0x%lx", len, address);
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Invalid string length in remote Unicode object");
+        return NULL;
+    }
+
+    buf = (char *)PyMem_RawMalloc(len+1);
+    if (buf == NULL) {
+        PyErr_NoMemory();
+        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate buffer for string reading");
+        return NULL;
+    }
+
+    size_t offset = (size_t)unwinder->debug_offsets.unicode_object.asciiobject_size;
+    res = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, address + offset, len, buf);
+    if (res < 0) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read string data from remote memory");
+        goto err;
+    }
+    buf[len] = '\0';
+
+    result = PyUnicode_FromStringAndSize(buf, len);
+    if (result == NULL) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create PyUnicode from remote string data");
+        goto err;
+    }
+
+    PyMem_RawFree(buf);
+    assert(result != NULL);
+    return result;
+
+err:
+    if (buf != NULL) {
+        PyMem_RawFree(buf);
+    }
+    return NULL;
+}
+
+PyObject *
+read_py_bytes(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t address,
+    Py_ssize_t max_len
+) {
+    PyObject *result = NULL;
+    char *buf = NULL;
+
+    // Read the entire PyBytesObject at once
+    char bytes_obj[SIZEOF_BYTES_OBJ];
+    int res = _Py_RemoteDebug_PagedReadRemoteMemory(
+        &unwinder->handle,
+        address,
+        SIZEOF_BYTES_OBJ,
+        bytes_obj
+    );
+    if (res < 0) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read PyBytesObject");
+        goto err;
+    }
+
+    Py_ssize_t len = GET_MEMBER(Py_ssize_t, bytes_obj, unwinder->debug_offsets.bytes_object.ob_size);
+    if (len < 0 || len > max_len) {
+        PyErr_Format(PyExc_RuntimeError,
+                     "Invalid bytes length (%zd) at 0x%lx", len, address);
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Invalid bytes length in remote bytes object");
+        return NULL;
+    }
+
+    buf = (char *)PyMem_RawMalloc(len+1);
+    if (buf == NULL) {
+        PyErr_NoMemory();
+        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate buffer for bytes reading");
+        return NULL;
+    }
+
+    size_t offset = (size_t)unwinder->debug_offsets.bytes_object.ob_sval;
+    res = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, address + offset, len, buf);
+    if (res < 0) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read bytes data from remote memory");
+        goto err;
+    }
+    buf[len] = '\0';
+
+    result = PyBytes_FromStringAndSize(buf, len);
+    if (result == NULL) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create PyBytes from remote bytes data");
+        goto err;
+    }
+
+    PyMem_RawFree(buf);
+    assert(result != NULL);
+    return result;
+
+err:
+    if (buf != NULL) {
+        PyMem_RawFree(buf);
+    }
+    return NULL;
+}
+
+long
+read_py_long(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t address
+)
+{
+    unsigned int shift = PYLONG_BITS_IN_DIGIT;
+
+    // Read the entire PyLongObject at once
+    char long_obj[SIZEOF_LONG_OBJ];
+    int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory(
+        &unwinder->handle,
+        address,
+        (size_t)unwinder->debug_offsets.long_object.size,
+        long_obj);
+    if (bytes_read < 0) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read PyLongObject");
+        return -1;
+    }
+
+    uintptr_t lv_tag = GET_MEMBER(uintptr_t, long_obj, unwinder->debug_offsets.long_object.lv_tag);
+    int negative = (lv_tag & 3) == 2;
+    Py_ssize_t size = lv_tag >> 3;
+
+    if (size == 0) {
+        return 0;
+    }
+
+    // If the long object has inline digits, use them directly
+    digit *digits;
+    if (size <= _PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS) {
+        // For small integers, digits are inline in the long_value.ob_digit array
+        digits = (digit *)PyMem_RawMalloc(size * sizeof(digit));
+        if (!digits) {
+            PyErr_NoMemory();
+            set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate digits for small PyLong");
+            return -1;
+        }
+        memcpy(digits, long_obj + unwinder->debug_offsets.long_object.ob_digit, size * sizeof(digit));
+    } else {
+        // For larger integers, we need to read the digits separately
+        digits = (digit *)PyMem_RawMalloc(size * sizeof(digit));
+        if (!digits) {
+            PyErr_NoMemory();
+            set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate digits for large PyLong");
+            return -1;
+        }
+
+        bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory(
+            &unwinder->handle,
+            address + (uintptr_t)unwinder->debug_offsets.long_object.ob_digit,
+            sizeof(digit) * size,
+            digits
+        );
+        if (bytes_read < 0) {
+            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read PyLong digits from remote memory");
+            goto error;
+        }
+    }
+
+    long long value = 0;
+
+    // In theory this can overflow, but because of llvm/llvm-project#16778
+    // we can't use __builtin_mul_overflow because it fails to link with
+    // __muloti4 on aarch64. In practice this is fine because all we're
+    // testing here are task numbers that would fit in a single byte.
+    for (Py_ssize_t i = 0; i < size; ++i) {
+        long long factor = digits[i] * (1UL << (Py_ssize_t)(shift * i));
+        value += factor;
+    }
+    PyMem_RawFree(digits);
+    if (negative) {
+        value *= -1;
+    }
+    return (long)value;
+error:
+    PyMem_RawFree(digits);
+    return -1;
+}
diff --git a/Modules/_remote_debugging/threads.c b/Modules/_remote_debugging/threads.c
new file mode 100644
index 00000000000..774338f9dc2
--- /dev/null
+++ b/Modules/_remote_debugging/threads.c
@@ -0,0 +1,457 @@
+/******************************************************************************
+ * Remote Debugging Module - Thread Functions
+ *
+ * This file contains functions for iterating threads and determining
+ * thread status in remote process memory.
+ ******************************************************************************/
+
+#include "_remote_debugging.h"
+
+#ifndef MS_WINDOWS
+#include <unistd.h>
+#endif
+
+/* ============================================================================
+ * THREAD ITERATION FUNCTIONS
+ * ============================================================================ */
+
+int
+iterate_threads(
+    RemoteUnwinderObject *unwinder,
+    thread_processor_func processor,
+    void *context
+) {
+    uintptr_t thread_state_addr;
+    unsigned long tid = 0;
+
+    if (0 > _Py_RemoteDebug_PagedReadRemoteMemory(
+                &unwinder->handle,
+                unwinder->interpreter_addr + (uintptr_t)unwinder->debug_offsets.interpreter_state.threads_main,
+                sizeof(void*),
+                &thread_state_addr))
+    {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read main thread state");
+        return -1;
+    }
+
+    while (thread_state_addr != 0) {
+        if (0 > _Py_RemoteDebug_PagedReadRemoteMemory(
+                    &unwinder->handle,
+                    thread_state_addr + (uintptr_t)unwinder->debug_offsets.thread_state.native_thread_id,
+                    sizeof(tid),
+                    &tid))
+        {
+            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read thread ID");
+            return -1;
+        }
+
+        // Call the processor function for this thread
+        if (processor(unwinder, thread_state_addr, tid, context) < 0) {
+            return -1;
+        }
+
+        // Move to next thread
+        if (0 > _Py_RemoteDebug_PagedReadRemoteMemory(
+                    &unwinder->handle,
+                    thread_state_addr + (uintptr_t)unwinder->debug_offsets.thread_state.next,
+                    sizeof(void*),
+                    &thread_state_addr))
+        {
+            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read next thread state");
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+/* ============================================================================
+ * INTERPRETER STATE AND THREAD DISCOVERY FUNCTIONS
+ * ============================================================================ */
+
+int
+populate_initial_state_data(
+    int all_threads,
+    RemoteUnwinderObject *unwinder,
+    uintptr_t runtime_start_address,
+    uintptr_t *interpreter_state,
+    uintptr_t *tstate
+) {
+    uintptr_t interpreter_state_list_head =
+        (uintptr_t)unwinder->debug_offsets.runtime_state.interpreters_head;
+
+    uintptr_t address_of_interpreter_state;
+    int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory(
+            &unwinder->handle,
+            runtime_start_address + interpreter_state_list_head,
+            sizeof(void*),
+            &address_of_interpreter_state);
+    if (bytes_read < 0) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read interpreter state address");
+        return -1;
+    }
+
+    if (address_of_interpreter_state == 0) {
+        PyErr_SetString(PyExc_RuntimeError, "No interpreter state found");
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Interpreter state is NULL");
+        return -1;
+    }
+
+    *interpreter_state = address_of_interpreter_state;
+
+    if (all_threads) {
+        *tstate = 0;
+        return 0;
+    }
+
+    uintptr_t address_of_thread = address_of_interpreter_state +
+            (uintptr_t)unwinder->debug_offsets.interpreter_state.threads_main;
+
+    if (_Py_RemoteDebug_PagedReadRemoteMemory(
+            &unwinder->handle,
+            address_of_thread,
+            sizeof(void*),
+            tstate) < 0) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read main thread state address");
+        return -1;
+    }
+
+    return 0;
+}
+
+int
+find_running_frame(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t address_of_thread,
+    uintptr_t *frame
+) {
+    if ((void*)address_of_thread != NULL) {
+        int err = read_ptr(
+            unwinder,
+            address_of_thread + (uintptr_t)unwinder->debug_offsets.thread_state.current_frame,
+            frame);
+        if (err) {
+            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read current frame pointer");
+            return -1;
+        }
+        return 0;
+    }
+
+    *frame = (uintptr_t)NULL;
+    return 0;
+}
+
+/* ============================================================================
+ * THREAD STATUS FUNCTIONS
+ * ============================================================================ */
+
+int
+get_thread_status(RemoteUnwinderObject *unwinder, uint64_t tid, uint64_t pthread_id) {
+#if defined(__APPLE__) && TARGET_OS_OSX
+   if (unwinder->thread_id_offset == 0) {
+        uint64_t *tids = (uint64_t *)PyMem_Malloc(MAX_NATIVE_THREADS * sizeof(uint64_t));
+        if (!tids) {
+            PyErr_NoMemory();
+            return -1;
+        }
+        int n = proc_pidinfo(unwinder->handle.pid, PROC_PIDLISTTHREADS, 0, tids, MAX_NATIVE_THREADS * sizeof(uint64_t)) / sizeof(uint64_t);
+        if (n <= 0) {
+            PyMem_Free(tids);
+            return THREAD_STATE_UNKNOWN;
+        }
+        uint64_t min_offset = UINT64_MAX;
+        for (int i = 0; i < n; i++) {
+            uint64_t offset = tids[i] - pthread_id;
+            if (offset < min_offset) {
+                min_offset = offset;
+            }
+        }
+        unwinder->thread_id_offset = min_offset;
+        PyMem_Free(tids);
+    }
+    struct proc_threadinfo ti;
+    uint64_t tid_with_offset = pthread_id + unwinder->thread_id_offset;
+    if (proc_pidinfo(unwinder->handle.pid, PROC_PIDTHREADINFO, tid_with_offset, &ti, sizeof(ti)) != sizeof(ti)) {
+        return THREAD_STATE_UNKNOWN;
+    }
+    if (ti.pth_run_state == TH_STATE_RUNNING) {
+        return THREAD_STATE_RUNNING;
+    }
+    return THREAD_STATE_IDLE;
+#elif defined(__linux__)
+    char stat_path[256];
+    char buffer[2048] = "";
+
+    snprintf(stat_path, sizeof(stat_path), "/proc/%d/task/%lu/stat", unwinder->handle.pid, tid);
+
+    int fd = open(stat_path, O_RDONLY);
+    if (fd == -1) {
+        return THREAD_STATE_UNKNOWN;
+    }
+
+    if (read(fd, buffer, 2047) == 0) {
+        close(fd);
+        return THREAD_STATE_UNKNOWN;
+    }
+    close(fd);
+
+    char *p = strchr(buffer, ')');
+    if (!p) {
+        return THREAD_STATE_UNKNOWN;
+    }
+
+    p += 2;  // Skip ") "
+    if (*p == ' ') {
+        p++;
+    }
+
+    switch (*p) {
+        case 'R':  // Running
+            return THREAD_STATE_RUNNING;
+        case 'S':  // Interruptible sleep
+        case 'D':  // Uninterruptible sleep
+        case 'T':  // Stopped
+        case 'Z':  // Zombie
+        case 'I':  // Idle kernel thread
+            return THREAD_STATE_IDLE;
+        default:
+            return THREAD_STATE_UNKNOWN;
+    }
+#elif defined(MS_WINDOWS)
+    ULONG n;
+    NTSTATUS status = NtQuerySystemInformation(
+        SystemProcessInformation,
+        unwinder->win_process_buffer,
+        unwinder->win_process_buffer_size,
+        &n
+    );
+    if (status == STATUS_INFO_LENGTH_MISMATCH) {
+        // Buffer was too small so we reallocate a larger one and try again.
+        unwinder->win_process_buffer_size = n;
+        PVOID new_buffer = PyMem_Realloc(unwinder->win_process_buffer, n);
+        if (!new_buffer) {
+            return -1;
+        }
+        unwinder->win_process_buffer = new_buffer;
+        return get_thread_status(unwinder, tid, pthread_id);
+    }
+    if (status != STATUS_SUCCESS) {
+        return -1;
+    }
+
+    SYSTEM_PROCESS_INFORMATION *pi = (SYSTEM_PROCESS_INFORMATION *)unwinder->win_process_buffer;
+    while ((ULONG)(ULONG_PTR)pi->UniqueProcessId != unwinder->handle.pid) {
+        if (pi->NextEntryOffset == 0) {
+            // We didn't find the process
+            return -1;
+        }
+        pi = (SYSTEM_PROCESS_INFORMATION *)(((BYTE *)pi) + pi->NextEntryOffset);
+    }
+
+    SYSTEM_THREAD_INFORMATION *ti = (SYSTEM_THREAD_INFORMATION *)((char *)pi + sizeof(SYSTEM_PROCESS_INFORMATION));
+    for (size_t i = 0; i < pi->NumberOfThreads; i++, ti++) {
+        if (ti->ClientId.UniqueThread == (HANDLE)tid) {
+            return ti->ThreadState != WIN32_THREADSTATE_RUNNING ? THREAD_STATE_IDLE : THREAD_STATE_RUNNING;
+        }
+    }
+
+    return -1;
+#else
+    return THREAD_STATE_UNKNOWN;
+#endif
+}
+
+/* ============================================================================
+ * STACK UNWINDING FUNCTIONS
+ * ============================================================================ */
+
+typedef struct {
+    unsigned int initialized:1;
+    unsigned int bound:1;
+    unsigned int unbound:1;
+    unsigned int bound_gilstate:1;
+    unsigned int active:1;
+    unsigned int finalizing:1;
+    unsigned int cleared:1;
+    unsigned int finalized:1;
+    unsigned int :24;
+} _thread_status;
+
+PyObject*
+unwind_stack_for_thread(
+    RemoteUnwinderObject *unwinder,
+    uintptr_t *current_tstate,
+    uintptr_t gil_holder_tstate,
+    uintptr_t gc_frame
+) {
+    PyObject *frame_info = NULL;
+    PyObject *thread_id = NULL;
+    PyObject *result = NULL;
+    StackChunkList chunks = {0};
+
+    char ts[SIZEOF_THREAD_STATE];
+    int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory(
+        &unwinder->handle, *current_tstate, (size_t)unwinder->debug_offsets.thread_state.size, ts);
+    if (bytes_read < 0) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read thread state");
+        goto error;
+    }
+    STATS_INC(unwinder, memory_reads);
+    STATS_ADD(unwinder, memory_bytes_read, unwinder->debug_offsets.thread_state.size);
+
+    long tid = GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.native_thread_id);
+
+    // Read GC collecting state from the interpreter (before any skip checks)
+    uintptr_t interp_addr = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.interp);
+
+    // Read the GC runtime state from the interpreter state
+    uintptr_t gc_addr = interp_addr + unwinder->debug_offsets.interpreter_state.gc;
+    char gc_state[SIZEOF_GC_RUNTIME_STATE];
+    if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, gc_addr, unwinder->debug_offsets.gc.size, gc_state) < 0) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read GC state");
+        goto error;
+    }
+    STATS_INC(unwinder, memory_reads);
+    STATS_ADD(unwinder, memory_bytes_read, unwinder->debug_offsets.gc.size);
+
+    // Calculate thread status using flags (always)
+    int status_flags = 0;
+
+    // Check GIL status
+    int has_gil = 0;
+    int gil_requested = 0;
+#ifdef Py_GIL_DISABLED
+    int active = GET_MEMBER(_thread_status, ts, unwinder->debug_offsets.thread_state.status).active;
+    has_gil = active;
+#else
+    // Read holds_gil directly from thread state
+    has_gil = GET_MEMBER(int, ts, unwinder->debug_offsets.thread_state.holds_gil);
+
+    // Check if thread is actively requesting the GIL
+    if (unwinder->debug_offsets.thread_state.gil_requested != 0) {
+        gil_requested = GET_MEMBER(int, ts, unwinder->debug_offsets.thread_state.gil_requested);
+    }
+
+    // Set GIL_REQUESTED flag if thread is waiting
+    if (!has_gil && gil_requested) {
+        status_flags |= THREAD_STATUS_GIL_REQUESTED;
+    }
+#endif
+    if (has_gil) {
+        status_flags |= THREAD_STATUS_HAS_GIL;
+        // gh-142207 for remote debugging.
+        gil_requested = 0;
+    }
+
+    // Check CPU status
+    long pthread_id = GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.thread_id);
+
+    // Optimization: only check CPU status if needed by mode because it's expensive
+    int cpu_status = -1;
+    if (unwinder->mode == PROFILING_MODE_CPU || unwinder->mode == PROFILING_MODE_ALL) {
+        cpu_status = get_thread_status(unwinder, tid, pthread_id);
+    }
+
+    if (cpu_status == -1) {
+        status_flags |= THREAD_STATUS_UNKNOWN;
+    } else if (cpu_status == THREAD_STATE_RUNNING) {
+        status_flags |= THREAD_STATUS_ON_CPU;
+    }
+
+    // Check if we should skip this thread based on mode
+    int should_skip = 0;
+    if (unwinder->skip_non_matching_threads) {
+        if (unwinder->mode == PROFILING_MODE_CPU) {
+            // Skip if not on CPU
+            should_skip = !(status_flags & THREAD_STATUS_ON_CPU);
+        } else if (unwinder->mode == PROFILING_MODE_GIL) {
+            // Skip if doesn't have GIL
+            should_skip = !(status_flags & THREAD_STATUS_HAS_GIL);
+        }
+        // PROFILING_MODE_WALL and PROFILING_MODE_ALL never skip
+    }
+
+    if (should_skip) {
+        // Advance to next thread and return NULL to skip processing
+        *current_tstate = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.next);
+        return NULL;
+    }
+
+    uintptr_t frame_addr = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.current_frame);
+    uintptr_t base_frame_addr = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.base_frame);
+
+    frame_info = PyList_New(0);
+    if (!frame_info) {
+        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create frame info list");
+        goto error;
+    }
+
+    // In cache mode, copying stack chunks is more expensive than direct memory reads
+    if (!unwinder->cache_frames) {
+        if (copy_stack_chunks(unwinder, *current_tstate, &chunks) < 0) {
+            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to copy stack chunks");
+            goto error;
+        }
+    }
+
+    if (unwinder->cache_frames) {
+        // Use cache to avoid re-reading unchanged parent frames
+        uintptr_t last_profiled_frame = GET_MEMBER(uintptr_t, ts,
+            unwinder->debug_offsets.thread_state.last_profiled_frame);
+        if (collect_frames_with_cache(unwinder, frame_addr, &chunks, frame_info,
+                                      gc_frame, last_profiled_frame, tid) < 0) {
+            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to collect frames");
+            goto error;
+        }
+        // Update last_profiled_frame for next sample
+        uintptr_t lpf_addr = *current_tstate + unwinder->debug_offsets.thread_state.last_profiled_frame;
+        if (_Py_RemoteDebug_WriteRemoteMemory(&unwinder->handle, lpf_addr,
+                                              sizeof(uintptr_t), &frame_addr) < 0) {
+            PyErr_Clear();  // Non-fatal
+        }
+    } else {
+        // No caching - process entire frame chain with base_frame validation
+        if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info,
+                                base_frame_addr, gc_frame, 0, NULL, NULL, NULL, 0) < 0) {
+            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process frame chain");
+            goto error;
+        }
+    }
+
+    *current_tstate = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.next);
+
+    thread_id = PyLong_FromLongLong(tid);
+    if (thread_id == NULL) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create thread ID");
+        goto error;
+    }
+
+    RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder);
+    result = PyStructSequence_New(state->ThreadInfo_Type);
+    if (result == NULL) {
+        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create ThreadInfo");
+        goto error;
+    }
+
+    // Always use status_flags
+    PyObject *py_status = PyLong_FromLong(status_flags);
+    if (py_status == NULL) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create thread status");
+        goto error;
+    }
+
+    // py_status contains status flags (bitfield)
+    PyStructSequence_SetItem(result, 0, thread_id);
+    PyStructSequence_SetItem(result, 1, py_status);  // Steals reference
+    PyStructSequence_SetItem(result, 2, frame_info); // Steals reference
+
+    cleanup_stack_chunks(&chunks);
+    return result;
+
+error:
+    Py_XDECREF(frame_info);
+    Py_XDECREF(thread_id);
+    Py_XDECREF(result);
+    cleanup_stack_chunks(&chunks);
+    return NULL;
+}
diff --git a/Modules/_remote_debugging_module.c b/Modules/_remote_debugging_module.c
deleted file mode 100644
index 6544e3a0ce6..00000000000
--- a/Modules/_remote_debugging_module.c
+++ /dev/null
@@ -1,3602 +0,0 @@
-/******************************************************************************
- * Python Remote Debugging Module
- *
- * This module provides functionality to debug Python processes remotely by
- * reading their memory and reconstructing stack traces and asyncio task states.
- ******************************************************************************/
-
-#define _GNU_SOURCE
-
-/* ============================================================================
- * HEADERS AND INCLUDES
- * ============================================================================ */
-
-#include <assert.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#ifndef Py_BUILD_CORE_BUILTIN
-#    define Py_BUILD_CORE_MODULE 1
-#endif
-#include "Python.h"
-#include <internal/pycore_debug_offsets.h>  // _Py_DebugOffsets
-#include <internal/pycore_frame.h>          // FRAME_SUSPENDED_YIELD_FROM
-#include <internal/pycore_interpframe.h>    // FRAME_OWNED_BY_INTERPRETER
-#include <internal/pycore_llist.h>          // struct llist_node
-#include <internal/pycore_long.h>           // _PyLong_GetZero
-#include <internal/pycore_stackref.h>       // Py_TAG_BITS
-#include "../Python/remote_debug.h"
-
-#ifndef HAVE_PROCESS_VM_READV
-#    define HAVE_PROCESS_VM_READV 0
-#endif
-
-// Returns thread status using proc_pidinfo, caches thread_id_offset on first use (macOS only)
-#if defined(__APPLE__) && TARGET_OS_OSX
-#include <libproc.h>
-#include <sys/types.h>
-#define MAX_NATIVE_THREADS 4096
-#endif
-
-#ifdef MS_WINDOWS
-#include <windows.h>
-#include <winternl.h>
-// ntstatus.h conflicts with windows.h so we have to define the NTSTATUS values we need
-#define STATUS_SUCCESS                   ((NTSTATUS)0x00000000L)
-#define STATUS_INFO_LENGTH_MISMATCH      ((NTSTATUS)0xC0000004L)
-typedef enum _WIN32_THREADSTATE {
-    WIN32_THREADSTATE_INITIALIZED = 0,   // Recognized by the kernel
-    WIN32_THREADSTATE_READY       = 1,   // Prepared to run on the next available processor
-    WIN32_THREADSTATE_RUNNING     = 2,   // Currently executing
-    WIN32_THREADSTATE_STANDBY     = 3,   // About to run, only one thread may be in this state at a time
-    WIN32_THREADSTATE_TERMINATED  = 4,   // Finished executing
-    WIN32_THREADSTATE_WAITING     = 5,   // Not ready for the processor, when ready, it will be rescheduled
-    WIN32_THREADSTATE_TRANSITION  = 6,   // Waiting for resources other than the processor
-    WIN32_THREADSTATE_UNKNOWN     = 7    // Thread state is unknown
-} WIN32_THREADSTATE;
-#endif
-
-/* ============================================================================
- * TYPE DEFINITIONS AND STRUCTURES
- * ============================================================================ */
-
-#define GET_MEMBER(type, obj, offset) (*(type*)((char*)(obj) + (offset)))
-#define CLEAR_PTR_TAG(ptr) (((uintptr_t)(ptr) & ~Py_TAG_BITS))
-#define GET_MEMBER_NO_TAG(type, obj, offset) (type)(CLEAR_PTR_TAG(*(type*)((char*)(obj) + (offset))))
-
-/* Size macros for opaque buffers */
-#define SIZEOF_BYTES_OBJ sizeof(PyBytesObject)
-#define SIZEOF_CODE_OBJ sizeof(PyCodeObject)
-#define SIZEOF_GEN_OBJ sizeof(PyGenObject)
-#define SIZEOF_INTERP_FRAME sizeof(_PyInterpreterFrame)
-#define SIZEOF_LLIST_NODE sizeof(struct llist_node)
-#define SIZEOF_PAGE_CACHE_ENTRY sizeof(page_cache_entry_t)
-#define SIZEOF_PYOBJECT sizeof(PyObject)
-#define SIZEOF_SET_OBJ sizeof(PySetObject)
-#define SIZEOF_TASK_OBJ 4096
-#define SIZEOF_THREAD_STATE sizeof(PyThreadState)
-#define SIZEOF_TYPE_OBJ sizeof(PyTypeObject)
-#define SIZEOF_UNICODE_OBJ sizeof(PyUnicodeObject)
-#define SIZEOF_LONG_OBJ sizeof(PyLongObject)
-#define SIZEOF_GC_RUNTIME_STATE sizeof(struct _gc_runtime_state)
-#define SIZEOF_INTERPRETER_STATE sizeof(PyInterpreterState)
-
-// Calculate the minimum buffer size needed to read interpreter state fields
-// We need to read code_object_generation and potentially tlbc_generation
-#ifndef MAX
-#define MAX(a, b) ((a) > (b) ? (a) : (b))
-#endif
-
-#ifdef Py_GIL_DISABLED
-#define INTERP_STATE_MIN_SIZE MAX(MAX(MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \
-                                              offsetof(PyInterpreterState, tlbc_indices.tlbc_generation) + sizeof(uint32_t)), \
-                                          offsetof(PyInterpreterState, threads.head) + sizeof(void*)), \
-                                      offsetof(PyInterpreterState, _gil.last_holder) + sizeof(PyThreadState*)), \
-                                  offsetof(PyInterpreterState, gc.frame) + sizeof(_PyInterpreterFrame *))
-#else
-#define INTERP_STATE_MIN_SIZE MAX(MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \
-                                          offsetof(PyInterpreterState, threads.head) + sizeof(void*)), \
-                                      offsetof(PyInterpreterState, _gil.last_holder) + sizeof(PyThreadState*)), \
-                                  offsetof(PyInterpreterState, gc.frame) + sizeof(_PyInterpreterFrame *))
-#endif
-#define INTERP_STATE_BUFFER_SIZE MAX(INTERP_STATE_MIN_SIZE, 256)
-
-#define MAX_TLBC_SIZE 2048
-
-// Copied from Modules/_asynciomodule.c because it's not exported
-
-struct _Py_AsyncioModuleDebugOffsets {
-    struct _asyncio_task_object {
-        uint64_t size;
-        uint64_t task_name;
-        uint64_t task_awaited_by;
-        uint64_t task_is_task;
-        uint64_t task_awaited_by_is_set;
-        uint64_t task_coro;
-        uint64_t task_node;
-    } asyncio_task_object;
-    struct _asyncio_interpreter_state {
-        uint64_t size;
-        uint64_t asyncio_tasks_head;
-    } asyncio_interpreter_state;
-    struct _asyncio_thread_state {
-        uint64_t size;
-        uint64_t asyncio_running_loop;
-        uint64_t asyncio_running_task;
-        uint64_t asyncio_tasks_head;
-    } asyncio_thread_state;
-};
-
-/* ============================================================================
- * STRUCTSEQ TYPE DEFINITIONS
- * ============================================================================ */
-
-// TaskInfo structseq type - replaces 4-tuple (task_id, task_name, coroutine_stack, awaited_by)
-static PyStructSequence_Field TaskInfo_fields[] = {
-    {"task_id", "Task ID (memory address)"},
-    {"task_name", "Task name"},
-    {"coroutine_stack", "Coroutine call stack"},
-    {"awaited_by", "Tasks awaiting this task"},
-    {NULL}
-};
-
-static PyStructSequence_Desc TaskInfo_desc = {
-    "_remote_debugging.TaskInfo",
-    "Information about an asyncio task",
-    TaskInfo_fields,
-    4
-};
-
-// FrameInfo structseq type - replaces 3-tuple (filename, lineno, funcname)
-static PyStructSequence_Field FrameInfo_fields[] = {
-    {"filename", "Source code filename"},
-    {"lineno", "Line number"},
-    {"funcname", "Function name"},
-    {NULL}
-};
-
-static PyStructSequence_Desc FrameInfo_desc = {
-    "_remote_debugging.FrameInfo",
-    "Information about a frame",
-    FrameInfo_fields,
-    3
-};
-
-// CoroInfo structseq type - replaces 2-tuple (call_stack, task_name)
-static PyStructSequence_Field CoroInfo_fields[] = {
-    {"call_stack", "Coroutine call stack"},
-    {"task_name", "Task name"},
-    {NULL}
-};
-
-static PyStructSequence_Desc CoroInfo_desc = {
-    "_remote_debugging.CoroInfo",
-    "Information about a coroutine",
-    CoroInfo_fields,
-    2
-};
-
-// ThreadInfo structseq type - replaces 2-tuple (thread_id, frame_info)
-static PyStructSequence_Field ThreadInfo_fields[] = {
-    {"thread_id", "Thread ID"},
-    {"status", "Thread status (flags: HAS_GIL, ON_CPU, UNKNOWN or legacy enum)"},
-    {"frame_info", "Frame information"},
-    {NULL}
-};
-
-static PyStructSequence_Desc ThreadInfo_desc = {
-    "_remote_debugging.ThreadInfo",
-    "Information about a thread",
-    ThreadInfo_fields,
-    3
-};
-
-// InterpreterInfo structseq type - replaces 2-tuple (interpreter_id, thread_list)
-static PyStructSequence_Field InterpreterInfo_fields[] = {
-    {"interpreter_id", "Interpreter ID"},
-    {"threads", "List of threads in this interpreter"},
-    {NULL}
-};
-
-static PyStructSequence_Desc InterpreterInfo_desc = {
-    "_remote_debugging.InterpreterInfo",
-    "Information about an interpreter",
-    InterpreterInfo_fields,
-    2
-};
-
-// AwaitedInfo structseq type - replaces 2-tuple (tid, awaited_by_list)
-static PyStructSequence_Field AwaitedInfo_fields[] = {
-    {"thread_id", "Thread ID"},
-    {"awaited_by", "List of tasks awaited by this thread"},
-    {NULL}
-};
-
-static PyStructSequence_Desc AwaitedInfo_desc = {
-    "_remote_debugging.AwaitedInfo",
-    "Information about what a thread is awaiting",
-    AwaitedInfo_fields,
-    2
-};
-
-typedef struct {
-    PyObject *func_name;
-    PyObject *file_name;
-    int first_lineno;
-    PyObject *linetable;  // bytes
-    uintptr_t addr_code_adaptive;
-} CachedCodeMetadata;
-
-typedef struct {
-    /* Types */
-    PyTypeObject *RemoteDebugging_Type;
-    PyTypeObject *TaskInfo_Type;
-    PyTypeObject *FrameInfo_Type;
-    PyTypeObject *CoroInfo_Type;
-    PyTypeObject *ThreadInfo_Type;
-    PyTypeObject *InterpreterInfo_Type;
-    PyTypeObject *AwaitedInfo_Type;
-} RemoteDebuggingState;
-
-enum _ThreadState {
-    THREAD_STATE_RUNNING,
-    THREAD_STATE_IDLE,
-    THREAD_STATE_GIL_WAIT,
-    THREAD_STATE_UNKNOWN
-};
-
-enum _ProfilingMode {
-    PROFILING_MODE_WALL = 0,
-    PROFILING_MODE_CPU = 1,
-    PROFILING_MODE_GIL = 2,
-    PROFILING_MODE_ALL = 3  // Combines GIL + CPU checks
-};
-
-// Thread status flags (can be combined)
-#define THREAD_STATUS_HAS_GIL        (1 << 0)  // Thread has the GIL
-#define THREAD_STATUS_ON_CPU         (1 << 1)  // Thread is running on CPU
-#define THREAD_STATUS_UNKNOWN        (1 << 2)  // Status could not be determined
-#define THREAD_STATUS_GIL_REQUESTED  (1 << 3)  // Thread is waiting for the GIL
-
-typedef struct {
-    PyObject_HEAD
-    proc_handle_t handle;
-    uintptr_t runtime_start_address;
-    struct _Py_DebugOffsets debug_offsets;
-    int async_debug_offsets_available;
-    struct _Py_AsyncioModuleDebugOffsets async_debug_offsets;
-    uintptr_t interpreter_addr;
-    uintptr_t tstate_addr;
-    uint64_t code_object_generation;
-    _Py_hashtable_t *code_object_cache;
-    int debug;
-    int only_active_thread;
-    int mode;  // Use enum _ProfilingMode values
-    int skip_non_matching_threads;  // New option to skip threads that don't match mode
-    int native;
-    int gc;
-    RemoteDebuggingState *cached_state;  // Cached module state
-#ifdef Py_GIL_DISABLED
-    // TLBC cache invalidation tracking
-    uint32_t tlbc_generation;  // Track TLBC index pool changes
-    _Py_hashtable_t *tlbc_cache;  // Cache of TLBC arrays by code object address
-#endif
-#ifdef __APPLE__
-    uint64_t thread_id_offset;
-#endif
-#ifdef MS_WINDOWS
-    PVOID win_process_buffer;
-    ULONG win_process_buffer_size;
-#endif
-} RemoteUnwinderObject;
-
-#define RemoteUnwinder_CAST(op) ((RemoteUnwinderObject *)(op))
-
-typedef struct
-{
-    int lineno;
-    int end_lineno;
-    int column;
-    int end_column;
-} LocationInfo;
-
-typedef struct {
-    uintptr_t remote_addr;
-    size_t size;
-    void *local_copy;
-} StackChunkInfo;
-
-typedef struct {
-    StackChunkInfo *chunks;
-    size_t count;
-} StackChunkList;
-
-#include "clinic/_remote_debugging_module.c.h"
-
-/*[clinic input]
-module _remote_debugging
-[clinic start generated code]*/
-/*[clinic end generated code: output=da39a3ee5e6b4b0d input=5f507d5b2e76a7f7]*/
-
-
-/* ============================================================================
- * FORWARD DECLARATIONS
- * ============================================================================ */
-
-static inline int
-is_frame_valid(
-    RemoteUnwinderObject *unwinder,
-    uintptr_t frame_addr,
-    uintptr_t code_object_addr
-);
-
-typedef int (*thread_processor_func)(
-    RemoteUnwinderObject *unwinder,
-    uintptr_t thread_state_addr,
-    unsigned long tid,
-    void *context
-);
-
-typedef int (*set_entry_processor_func)(
-    RemoteUnwinderObject *unwinder,
-    uintptr_t key_addr,
-    void *context
-);
-
-
-static int
-parse_task(
-    RemoteUnwinderObject *unwinder,
-    uintptr_t task_address,
-    PyObject *render_to
-);
-
-static int
-parse_coro_chain(
-    RemoteUnwinderObject *unwinder,
-    uintptr_t coro_address,
-    PyObject *render_to
-);
-
-/* Forward declarations for task parsing functions */
-static int parse_frame_object(
-    RemoteUnwinderObject *unwinder,
-    PyObject** result,
-    uintptr_t address,
-    uintptr_t* address_of_code_object,
-    uintptr_t* previous_frame
-);
-
-static int
-parse_async_frame_chain(
-    RemoteUnwinderObject *unwinder,
-    PyObject *calls,
-    uintptr_t address_of_thread,
-    uintptr_t running_task_code_obj
-);
-
-static int read_py_ptr(RemoteUnwinderObject *unwinder, uintptr_t address, uintptr_t *ptr_addr);
-static int read_Py_ssize_t(RemoteUnwinderObject *unwinder, uintptr_t address, Py_ssize_t *size);
-
-static int process_task_and_waiters(RemoteUnwinderObject *unwinder, uintptr_t task_addr, PyObject *result);
-static int process_task_awaited_by(RemoteUnwinderObject *unwinder, uintptr_t task_address, set_entry_processor_func processor, void *context);
-static int find_running_task_in_thread(RemoteUnwinderObject *unwinder, uintptr_t thread_state_addr, uintptr_t *running_task_addr);
-static int get_task_code_object(RemoteUnwinderObject *unwinder, uintptr_t task_addr, uintptr_t *code_obj_addr);
-static int append_awaited_by(RemoteUnwinderObject *unwinder, unsigned long tid, uintptr_t head_addr, PyObject *result);
-
-/* ============================================================================
- * UTILITY FUNCTIONS AND HELPERS
- * ============================================================================ */
-
-#define set_exception_cause(unwinder, exc_type, message) \
-    if (unwinder->debug) { \
-        _set_debug_exception_cause(exc_type, message); \
-    }
-
-static void
-cached_code_metadata_destroy(void *ptr)
-{
-    CachedCodeMetadata *meta = (CachedCodeMetadata *)ptr;
-    Py_DECREF(meta->func_name);
-    Py_DECREF(meta->file_name);
-    Py_DECREF(meta->linetable);
-    PyMem_RawFree(meta);
-}
-
-static inline RemoteDebuggingState *
-RemoteDebugging_GetState(PyObject *module)
-{
-    void *state = _PyModule_GetState(module);
-    assert(state != NULL);
-    return (RemoteDebuggingState *)state;
-}
-
-static inline RemoteDebuggingState *
-RemoteDebugging_GetStateFromType(PyTypeObject *type)
-{
-    PyObject *module = PyType_GetModule(type);
-    assert(module != NULL);
-    return RemoteDebugging_GetState(module);
-}
-
-static inline RemoteDebuggingState *
-RemoteDebugging_GetStateFromObject(PyObject *obj)
-{
-    RemoteUnwinderObject *unwinder = (RemoteUnwinderObject *)obj;
-    if (unwinder->cached_state == NULL) {
-        unwinder->cached_state = RemoteDebugging_GetStateFromType(Py_TYPE(obj));
-    }
-    return unwinder->cached_state;
-}
-
-static inline int
-RemoteDebugging_InitState(RemoteDebuggingState *st)
-{
-    return 0;
-}
-
-static int
-is_prerelease_version(uint64_t version)
-{
-    return (version & 0xF0) != 0xF0;
-}
-
-static inline int
-validate_debug_offsets(struct _Py_DebugOffsets *debug_offsets)
-{
-    if (memcmp(debug_offsets->cookie, _Py_Debug_Cookie, sizeof(debug_offsets->cookie)) != 0) {
-        // The remote is probably running a Python version predating debug offsets.
-        PyErr_SetString(
-            PyExc_RuntimeError,
-            "Can't determine the Python version of the remote process");
-        return -1;
-    }
-
-    // Assume debug offsets could change from one pre-release version to another,
-    // or one minor version to another, but are stable across patch versions.
-    if (is_prerelease_version(Py_Version) && Py_Version != debug_offsets->version) {
-        PyErr_SetString(
-            PyExc_RuntimeError,
-            "Can't attach from a pre-release Python interpreter"
-            " to a process running a different Python version");
-        return -1;
-    }
-
-    if (is_prerelease_version(debug_offsets->version) && Py_Version != debug_offsets->version) {
-        PyErr_SetString(
-            PyExc_RuntimeError,
-            "Can't attach to a pre-release Python interpreter"
-            " from a process running a different Python version");
-        return -1;
-    }
-
-    unsigned int remote_major = (debug_offsets->version >> 24) & 0xFF;
-    unsigned int remote_minor = (debug_offsets->version >> 16) & 0xFF;
-
-    if (PY_MAJOR_VERSION != remote_major || PY_MINOR_VERSION != remote_minor) {
-        PyErr_Format(
-            PyExc_RuntimeError,
-            "Can't attach from a Python %d.%d process to a Python %d.%d process",
-            PY_MAJOR_VERSION, PY_MINOR_VERSION, remote_major, remote_minor);
-        return -1;
-    }
-
-    // The debug offsets differ between free threaded and non-free threaded builds.
-    if (_Py_Debug_Free_Threaded && !debug_offsets->free_threaded) {
-        PyErr_SetString(
-            PyExc_RuntimeError,
-            "Cannot attach from a free-threaded Python process"
-            " to a process running a non-free-threaded version");
-        return -1;
-    }
-
-    if (!_Py_Debug_Free_Threaded && debug_offsets->free_threaded) {
-        PyErr_SetString(
-            PyExc_RuntimeError,
-            "Cannot attach to a free-threaded Python process"
-            " from a process running a non-free-threaded version");
-        return -1;
-    }
-
-    return 0;
-}
-
-// Generic function to iterate through all threads
-static int
-iterate_threads(
-    RemoteUnwinderObject *unwinder,
-    thread_processor_func processor,
-    void *context
-) {
-    uintptr_t thread_state_addr;
-    unsigned long tid = 0;
-
-    if (0 > _Py_RemoteDebug_PagedReadRemoteMemory(
-                &unwinder->handle,
-                unwinder->interpreter_addr + (uintptr_t)unwinder->debug_offsets.interpreter_state.threads_main,
-                sizeof(void*),
-                &thread_state_addr))
-    {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read main thread state");
-        return -1;
-    }
-
-    while (thread_state_addr != 0) {
-        if (0 > _Py_RemoteDebug_PagedReadRemoteMemory(
-                    &unwinder->handle,
-                    thread_state_addr + (uintptr_t)unwinder->debug_offsets.thread_state.native_thread_id,
-                    sizeof(tid),
-                    &tid))
-        {
-            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read thread ID");
-            return -1;
-        }
-
-        // Call the processor function for this thread
-        if (processor(unwinder, thread_state_addr, tid, context) < 0) {
-            return -1;
-        }
-
-        // Move to next thread
-        if (0 > _Py_RemoteDebug_PagedReadRemoteMemory(
-                    &unwinder->handle,
-                    thread_state_addr + (uintptr_t)unwinder->debug_offsets.thread_state.next,
-                    sizeof(void*),
-                    &thread_state_addr))
-        {
-            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read next thread state");
-            return -1;
-        }
-    }
-
-    return 0;
-}
-
-// Generic function to iterate through set entries
-static int
-iterate_set_entries(
-    RemoteUnwinderObject *unwinder,
-    uintptr_t set_addr,
-    set_entry_processor_func processor,
-    void *context
-) {
-    char set_object[SIZEOF_SET_OBJ];
-    if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, set_addr,
-                                              SIZEOF_SET_OBJ, set_object) < 0) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read set object");
-        return -1;
-    }
-
-    Py_ssize_t num_els = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.used);
-    Py_ssize_t set_len = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.mask) + 1;
-    uintptr_t table_ptr = GET_MEMBER(uintptr_t, set_object, unwinder->debug_offsets.set_object.table);
-
-    Py_ssize_t i = 0;
-    Py_ssize_t els = 0;
-    while (i < set_len && els < num_els) {
-        uintptr_t key_addr;
-        if (read_py_ptr(unwinder, table_ptr, &key_addr) < 0) {
-            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read set entry key");
-            return -1;
-        }
-
-        if ((void*)key_addr != NULL) {
-            Py_ssize_t ref_cnt;
-            if (read_Py_ssize_t(unwinder, table_ptr, &ref_cnt) < 0) {
-                set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read set entry ref count");
-                return -1;
-            }
-
-            if (ref_cnt) {
-                // Process this valid set entry
-                if (processor(unwinder, key_addr, context) < 0) {
-                    return -1;
-                }
-                els++;
-            }
-        }
-        table_ptr += sizeof(void*) * 2;
-        i++;
-    }
-
-    return 0;
-}
-
-// Processor function for task waiters
-static int
-process_waiter_task(
-    RemoteUnwinderObject *unwinder,
-    uintptr_t key_addr,
-    void *context
-) {
-    PyObject *result = (PyObject *)context;
-    return process_task_and_waiters(unwinder, key_addr, result);
-}
-
-// Processor function for parsing tasks in sets
-static int
-process_task_parser(
-    RemoteUnwinderObject *unwinder,
-    uintptr_t key_addr,
-    void *context
-) {
-    PyObject *awaited_by = (PyObject *)context;
-    return parse_task(unwinder, key_addr, awaited_by);
-}
-
-/* ============================================================================
- * MEMORY READING FUNCTIONS
- * ============================================================================ */
-
-#define DEFINE_MEMORY_READER(type_name, c_type, error_msg) \
-static inline int \
-read_##type_name(RemoteUnwinderObject *unwinder, uintptr_t address, c_type *result) \
-{ \
-    int res = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, address, sizeof(c_type), result); \
-    if (res < 0) { \
-        set_exception_cause(unwinder, PyExc_RuntimeError, error_msg); \
-        return -1; \
-    } \
-    return 0; \
-}
-
-DEFINE_MEMORY_READER(ptr, uintptr_t, "Failed to read pointer from remote memory")
-DEFINE_MEMORY_READER(Py_ssize_t, Py_ssize_t, "Failed to read Py_ssize_t from remote memory")
-DEFINE_MEMORY_READER(char, char, "Failed to read char from remote memory")
-
-static int
-read_py_ptr(RemoteUnwinderObject *unwinder, uintptr_t address, uintptr_t *ptr_addr)
-{
-    if (read_ptr(unwinder, address, ptr_addr)) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read Python pointer");
-        return -1;
-    }
-    *ptr_addr &= ~Py_TAG_BITS;
-    return 0;
-}
-
-/* ============================================================================
- * PYTHON OBJECT READING FUNCTIONS
- * ============================================================================ */
-
-static PyObject *
-read_py_str(
-    RemoteUnwinderObject *unwinder,
-    uintptr_t address,
-    Py_ssize_t max_len
-) {
-    PyObject *result = NULL;
-    char *buf = NULL;
-
-    // Read the entire PyUnicodeObject at once
-    char unicode_obj[SIZEOF_UNICODE_OBJ];
-    int res = _Py_RemoteDebug_PagedReadRemoteMemory(
-        &unwinder->handle,
-        address,
-        SIZEOF_UNICODE_OBJ,
-        unicode_obj
-    );
-    if (res < 0) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read PyUnicodeObject");
-        goto err;
-    }
-
-    Py_ssize_t len = GET_MEMBER(Py_ssize_t, unicode_obj, unwinder->debug_offsets.unicode_object.length);
-    if (len < 0 || len > max_len) {
-        PyErr_Format(PyExc_RuntimeError,
-                     "Invalid string length (%zd) at 0x%lx", len, address);
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Invalid string length in remote Unicode object");
-        return NULL;
-    }
-
-    buf = (char *)PyMem_RawMalloc(len+1);
-    if (buf == NULL) {
-        PyErr_NoMemory();
-        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate buffer for string reading");
-        return NULL;
-    }
-
-    size_t offset = (size_t)unwinder->debug_offsets.unicode_object.asciiobject_size;
-    res = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, address + offset, len, buf);
-    if (res < 0) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read string data from remote memory");
-        goto err;
-    }
-    buf[len] = '\0';
-
-    result = PyUnicode_FromStringAndSize(buf, len);
-    if (result == NULL) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create PyUnicode from remote string data");
-        goto err;
-    }
-
-    PyMem_RawFree(buf);
-    assert(result != NULL);
-    return result;
-
-err:
-    if (buf != NULL) {
-        PyMem_RawFree(buf);
-    }
-    return NULL;
-}
-
-static PyObject *
-read_py_bytes(
-    RemoteUnwinderObject *unwinder,
-    uintptr_t address,
-    Py_ssize_t max_len
-) {
-    PyObject *result = NULL;
-    char *buf = NULL;
-
-    // Read the entire PyBytesObject at once
-    char bytes_obj[SIZEOF_BYTES_OBJ];
-    int res = _Py_RemoteDebug_PagedReadRemoteMemory(
-        &unwinder->handle,
-        address,
-        SIZEOF_BYTES_OBJ,
-        bytes_obj
-    );
-    if (res < 0) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read PyBytesObject");
-        goto err;
-    }
-
-    Py_ssize_t len = GET_MEMBER(Py_ssize_t, bytes_obj, unwinder->debug_offsets.bytes_object.ob_size);
-    if (len < 0 || len > max_len) {
-        PyErr_Format(PyExc_RuntimeError,
-                     "Invalid bytes length (%zd) at 0x%lx", len, address);
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Invalid bytes length in remote bytes object");
-        return NULL;
-    }
-
-    buf = (char *)PyMem_RawMalloc(len+1);
-    if (buf == NULL) {
-        PyErr_NoMemory();
-        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate buffer for bytes reading");
-        return NULL;
-    }
-
-    size_t offset = (size_t)unwinder->debug_offsets.bytes_object.ob_sval;
-    res = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, address + offset, len, buf);
-    if (res < 0) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read bytes data from remote memory");
-        goto err;
-    }
-    buf[len] = '\0';
-
-    result = PyBytes_FromStringAndSize(buf, len);
-    if (result == NULL) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create PyBytes from remote bytes data");
-        goto err;
-    }
-
-    PyMem_RawFree(buf);
-    assert(result != NULL);
-    return result;
-
-err:
-    if (buf != NULL) {
-        PyMem_RawFree(buf);
-    }
-    return NULL;
-}
-
-static long
-read_py_long(
-    RemoteUnwinderObject *unwinder,
-    uintptr_t address
-)
-{
-    unsigned int shift = PYLONG_BITS_IN_DIGIT;
-
-    // Read the entire PyLongObject at once
-    char long_obj[SIZEOF_LONG_OBJ];
-    int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory(
-        &unwinder->handle,
-        address,
-        (size_t)unwinder->debug_offsets.long_object.size,
-        long_obj);
-    if (bytes_read < 0) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read PyLongObject");
-        return -1;
-    }
-
-    uintptr_t lv_tag = GET_MEMBER(uintptr_t, long_obj, unwinder->debug_offsets.long_object.lv_tag);
-    int negative = (lv_tag & 3) == 2;
-    Py_ssize_t size = lv_tag >> 3;
-
-    if (size == 0) {
-        return 0;
-    }
-
-    // If the long object has inline digits, use them directly
-    digit *digits;
-    if (size <= _PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS) {
-        // For small integers, digits are inline in the long_value.ob_digit array
-        digits = (digit *)PyMem_RawMalloc(size * sizeof(digit));
-        if (!digits) {
-            PyErr_NoMemory();
-            set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate digits for small PyLong");
-            return -1;
-        }
-        memcpy(digits, long_obj + unwinder->debug_offsets.long_object.ob_digit, size * sizeof(digit));
-    } else {
-        // For larger integers, we need to read the digits separately
-        digits = (digit *)PyMem_RawMalloc(size * sizeof(digit));
-        if (!digits) {
-            PyErr_NoMemory();
-            set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate digits for large PyLong");
-            return -1;
-        }
-
-        bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory(
-            &unwinder->handle,
-            address + (uintptr_t)unwinder->debug_offsets.long_object.ob_digit,
-            sizeof(digit) * size,
-            digits
-        );
-        if (bytes_read < 0) {
-            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read PyLong digits from remote memory");
-            goto error;
-        }
-    }
-
-    long long value = 0;
-
-    // In theory this can overflow, but because of llvm/llvm-project#16778
-    // we can't use __builtin_mul_overflow because it fails to link with
-    // __muloti4 on aarch64. In practice this is fine because all we're
-    // testing here are task numbers that would fit in a single byte.
-    for (Py_ssize_t i = 0; i < size; ++i) {
-        long long factor = digits[i] * (1UL << (Py_ssize_t)(shift * i));
-        value += factor;
-    }
-    PyMem_RawFree(digits);
-    if (negative) {
-        value *= -1;
-    }
-    return (long)value;
-error:
-    PyMem_RawFree(digits);
-    return -1;
-}
-
-/* ============================================================================
- * ASYNCIO DEBUG FUNCTIONS
- * ============================================================================ */
-
-// Get the PyAsyncioDebug section address for any platform
-static uintptr_t
-_Py_RemoteDebug_GetAsyncioDebugAddress(proc_handle_t* handle)
-{
-    uintptr_t address;
-
-#ifdef MS_WINDOWS
-    // On Windows, search for asyncio debug in executable or DLL
-    address = search_windows_map_for_section(handle, "AsyncioD", L"_asyncio");
-    if (address == 0) {
-        // Error out: 'python' substring covers both executable and DLL
-        PyObject *exc = PyErr_GetRaisedException();
-        PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process.");
-        _PyErr_ChainExceptions1(exc);
-    }
-#elif defined(__linux__) && HAVE_PROCESS_VM_READV
-    // On Linux, search for asyncio debug in executable or DLL
-    address = search_linux_map_for_section(handle, "AsyncioDebug", "python");
-    if (address == 0) {
-        // Error out: 'python' substring covers both executable and DLL
-        PyObject *exc = PyErr_GetRaisedException();
-        PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process.");
-        _PyErr_ChainExceptions1(exc);
-    }
-#elif defined(__APPLE__) && TARGET_OS_OSX
-    // On macOS, try libpython first, then fall back to python
-    address = search_map_for_section(handle, "AsyncioDebug", "libpython");
-    if (address == 0) {
-        PyErr_Clear();
-        address = search_map_for_section(handle, "AsyncioDebug", "python");
-    }
-    if (address == 0) {
-        // Error out: 'python' substring covers both executable and DLL
-        PyObject *exc = PyErr_GetRaisedException();
-        PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process.");
-        _PyErr_ChainExceptions1(exc);
-    }
-#else
-    Py_UNREACHABLE();
-#endif
-
-    return address;
-}
-
-static int
-read_async_debug(
-    RemoteUnwinderObject *unwinder
-) {
-    uintptr_t async_debug_addr = _Py_RemoteDebug_GetAsyncioDebugAddress(&unwinder->handle);
-    if (!async_debug_addr) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to get AsyncioDebug address");
-        return -1;
-    }
-
-    size_t size = sizeof(struct _Py_AsyncioModuleDebugOffsets);
-    int result = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, async_debug_addr, size, &unwinder->async_debug_offsets);
-    if (result < 0) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read AsyncioDebug offsets");
-    }
-    return result;
-}
-
-/* ============================================================================
- * ASYNCIO TASK PARSING FUNCTIONS
- * ============================================================================ */
-
-static PyObject *
-parse_task_name(
-    RemoteUnwinderObject *unwinder,
-    uintptr_t task_address
-) {
-    // Read the entire TaskObj at once
-    char task_obj[SIZEOF_TASK_OBJ];
-    int err = _Py_RemoteDebug_PagedReadRemoteMemory(
-        &unwinder->handle,
-        task_address,
-        (size_t)unwinder->async_debug_offsets.asyncio_task_object.size,
-        task_obj);
-    if (err < 0) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task object");
-        return NULL;
-    }
-
-    uintptr_t task_name_addr = GET_MEMBER_NO_TAG(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_name);
-
-    // The task name can be a long or a string so we need to check the type
-    char task_name_obj[SIZEOF_PYOBJECT];
-    err = _Py_RemoteDebug_PagedReadRemoteMemory(
-        &unwinder->handle,
-        task_name_addr,
-        SIZEOF_PYOBJECT,
-        task_name_obj);
-    if (err < 0) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task name object");
-        return NULL;
-    }
-
-    // Now read the type object to get the flags
-    char type_obj[SIZEOF_TYPE_OBJ];
-    err = _Py_RemoteDebug_PagedReadRemoteMemory(
-        &unwinder->handle,
-        GET_MEMBER(uintptr_t, task_name_obj, unwinder->debug_offsets.pyobject.ob_type),
-        SIZEOF_TYPE_OBJ,
-        type_obj);
-    if (err < 0) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task name type object");
-        return NULL;
-    }
-
-    if ((GET_MEMBER(unsigned long, type_obj, unwinder->debug_offsets.type_object.tp_flags) & Py_TPFLAGS_LONG_SUBCLASS)) {
-        long res = read_py_long(unwinder, task_name_addr);
-        if (res == -1) {
-            set_exception_cause(unwinder, PyExc_RuntimeError, "Task name PyLong parsing failed");
-            return NULL;
-        }
-        return PyUnicode_FromFormat("Task-%d", res);
-    }
-
-    if(!(GET_MEMBER(unsigned long, type_obj, unwinder->debug_offsets.type_object.tp_flags) & Py_TPFLAGS_UNICODE_SUBCLASS)) {
-        PyErr_SetString(PyExc_RuntimeError, "Invalid task name object");
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Task name object is neither long nor unicode");
-        return NULL;
-    }
-
-    return read_py_str(
-        unwinder,
-        task_name_addr,
-        255
-    );
-}
-
-static int parse_task_awaited_by(
-    RemoteUnwinderObject *unwinder,
-    uintptr_t task_address,
-    PyObject *awaited_by
-) {
-    return process_task_awaited_by(unwinder, task_address, process_task_parser, awaited_by);
-}
-
-static int
-handle_yield_from_frame(
-    RemoteUnwinderObject *unwinder,
-    uintptr_t gi_iframe_addr,
-    uintptr_t gen_type_addr,
-    PyObject *render_to
-) {
-    // Read the entire interpreter frame at once
-    char iframe[SIZEOF_INTERP_FRAME];
-    int err = _Py_RemoteDebug_PagedReadRemoteMemory(
-        &unwinder->handle,
-        gi_iframe_addr,
-        SIZEOF_INTERP_FRAME,
-        iframe);
-    if (err < 0) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read interpreter frame in yield_from handler");
-        return -1;
-    }
-
-    if (GET_MEMBER(char, iframe, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_GENERATOR) {
-        PyErr_SetString(
-            PyExc_RuntimeError,
-            "generator doesn't own its frame \\_o_/");
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Frame ownership mismatch in yield_from");
-        return -1;
-    }
-
-    uintptr_t stackpointer_addr = GET_MEMBER_NO_TAG(uintptr_t, iframe, unwinder->debug_offsets.interpreter_frame.stackpointer);
-
-    if ((void*)stackpointer_addr != NULL) {
-        uintptr_t gi_await_addr;
-        err = read_py_ptr(
-            unwinder,
-            stackpointer_addr - sizeof(void*),
-            &gi_await_addr);
-        if (err) {
-            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read gi_await address");
-            return -1;
-        }
-
-        if ((void*)gi_await_addr != NULL) {
-            uintptr_t gi_await_addr_type_addr;
-            err = read_ptr(
-                unwinder,
-                gi_await_addr + (uintptr_t)unwinder->debug_offsets.pyobject.ob_type,
-                &gi_await_addr_type_addr);
-            if (err) {
-                set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read gi_await type address");
-                return -1;
-            }
-
-            if (gen_type_addr == gi_await_addr_type_addr) {
-                /* This needs an explanation. We always start with parsing
-                   native coroutine / generator frames. Ultimately they
-                   are awaiting on something. That something can be
-                   a native coroutine frame or... an iterator.
-                   If it's the latter -- we can't continue building
-                   our chain. So the condition to bail out of this is
-                   to do that when the type of the current coroutine
-                   doesn't match the type of whatever it points to
-                   in its cr_await.
-                */
-                err = parse_coro_chain(unwinder, gi_await_addr, render_to);
-                if (err) {
-                    set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse coroutine chain in yield_from");
-                    return -1;
-                }
-            }
-        }
-    }
-
-    return 0;
-}
-
-static int
-parse_coro_chain(
-    RemoteUnwinderObject *unwinder,
-    uintptr_t coro_address,
-    PyObject *render_to
-) {
-    assert((void*)coro_address != NULL);
-
-    // Read the entire generator object at once
-    char gen_object[SIZEOF_GEN_OBJ];
-    int err = _Py_RemoteDebug_PagedReadRemoteMemory(
-        &unwinder->handle,
-        coro_address,
-        SIZEOF_GEN_OBJ,
-        gen_object);
-    if (err < 0) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read generator object in coro chain");
-        return -1;
-    }
-
-    int8_t frame_state = GET_MEMBER(int8_t, gen_object, unwinder->debug_offsets.gen_object.gi_frame_state);
-    if (frame_state == FRAME_CLEARED) {
-        return 0;
-    }
-
-    uintptr_t gen_type_addr = GET_MEMBER(uintptr_t, gen_object, unwinder->debug_offsets.pyobject.ob_type);
-
-    PyObject* name = NULL;
-
-    // Parse the previous frame using the gi_iframe from local copy
-    uintptr_t prev_frame;
-    uintptr_t gi_iframe_addr = coro_address + (uintptr_t)unwinder->debug_offsets.gen_object.gi_iframe;
-    uintptr_t address_of_code_object = 0;
-    if (parse_frame_object(unwinder, &name, gi_iframe_addr, &address_of_code_object, &prev_frame) < 0) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse frame object in coro chain");
-        return -1;
-    }
-
-    if (!name) {
-        return 0;
-    }
-
-    if (PyList_Append(render_to, name)) {
-        Py_DECREF(name);
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append frame to coro chain");
-        return -1;
-    }
-    Py_DECREF(name);
-
-    if (frame_state == FRAME_SUSPENDED_YIELD_FROM) {
-        return handle_yield_from_frame(unwinder, gi_iframe_addr, gen_type_addr, render_to);
-    }
-
-    return 0;
-}
-
-static PyObject*
-create_task_result(
-    RemoteUnwinderObject *unwinder,
-    uintptr_t task_address
-) {
-    PyObject* result = NULL;
-    PyObject *call_stack = NULL;
-    PyObject *tn = NULL;
-    char task_obj[SIZEOF_TASK_OBJ];
-    uintptr_t coro_addr;
-
-    // Create call_stack first since it's the first tuple element
-    call_stack = PyList_New(0);
-    if (call_stack == NULL) {
-        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create call stack list");
-        goto error;
-    }
-
-    // Create task name/address for second tuple element
-    tn = PyLong_FromUnsignedLongLong(task_address);
-    if (tn == NULL) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create task name/address");
-        goto error;
-    }
-
-    // Parse coroutine chain
-    if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, task_address,
-                                              (size_t)unwinder->async_debug_offsets.asyncio_task_object.size,
-                                              task_obj) < 0) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task object for coro chain");
-        goto error;
-    }
-
-    coro_addr = GET_MEMBER_NO_TAG(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_coro);
-
-    if ((void*)coro_addr != NULL) {
-        if (parse_coro_chain(unwinder, coro_addr, call_stack) < 0) {
-            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse coroutine chain");
-            goto error;
-        }
-
-        if (PyList_Reverse(call_stack)) {
-            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to reverse call stack");
-            goto error;
-        }
-    }
-
-    // Create final CoroInfo result
-    RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder);
-    result = PyStructSequence_New(state->CoroInfo_Type);
-    if (result == NULL) {
-        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create CoroInfo");
-        goto error;
-    }
-
-    // PyStructSequence_SetItem steals references, so we don't need to DECREF on success
-    PyStructSequence_SetItem(result, 0, call_stack);  // This steals the reference
-    PyStructSequence_SetItem(result, 1, tn);  // This steals the reference
-
-    return result;
-
-error:
-    Py_XDECREF(result);
-    Py_XDECREF(call_stack);
-    Py_XDECREF(tn);
-    return NULL;
-}
-
-static int
-parse_task(
-    RemoteUnwinderObject *unwinder,
-    uintptr_t task_address,
-    PyObject *render_to
-) {
-    char is_task;
-    PyObject* result = NULL;
-    int err;
-
-    err = read_char(
-        unwinder,
-        task_address + (uintptr_t)unwinder->async_debug_offsets.asyncio_task_object.task_is_task,
-        &is_task);
-    if (err) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read is_task flag");
-        goto error;
-    }
-
-    if (is_task) {
-        result = create_task_result(unwinder, task_address);
-        if (!result) {
-            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create task result");
-            goto error;
-        }
-    } else {
-        // Create an empty CoroInfo for non-task objects
-        RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder);
-        result = PyStructSequence_New(state->CoroInfo_Type);
-        if (result == NULL) {
-            set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create empty CoroInfo");
-            goto error;
-        }
-        PyObject *empty_list = PyList_New(0);
-        if (empty_list == NULL) {
-            set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create empty list");
-            goto error;
-        }
-        PyObject *task_name = PyLong_FromUnsignedLongLong(task_address);
-        if (task_name == NULL) {
-            Py_DECREF(empty_list);
-            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create task name");
-            goto error;
-        }
-        PyStructSequence_SetItem(result, 0, empty_list);  // This steals the reference
-        PyStructSequence_SetItem(result, 1, task_name);  // This steals the reference
-    }
-    if (PyList_Append(render_to, result)) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append task result to render list");
-        goto error;
-    }
-
-    Py_DECREF(result);
-    return 0;
-
-error:
-    Py_XDECREF(result);
-    return -1;
-}
-
-static int
-process_single_task_node(
-    RemoteUnwinderObject *unwinder,
-    uintptr_t task_addr,
-    PyObject **task_info,
-    PyObject *result
-) {
-    PyObject *tn = NULL;
-    PyObject *current_awaited_by = NULL;
-    PyObject *task_id = NULL;
-    PyObject *result_item = NULL;
-    PyObject *coroutine_stack = NULL;
-
-    tn = parse_task_name(unwinder, task_addr);
-    if (tn == NULL) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse task name in single task node");
-        goto error;
-    }
-
-    current_awaited_by = PyList_New(0);
-    if (current_awaited_by == NULL) {
-        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create awaited_by list in single task node");
-        goto error;
-    }
-
-    // Extract the coroutine stack for this task
-    coroutine_stack = PyList_New(0);
-    if (coroutine_stack == NULL) {
-        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create coroutine stack list in single task node");
-        goto error;
-    }
-
-    if (parse_task(unwinder, task_addr, coroutine_stack) < 0) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse task coroutine stack in single task node");
-        goto error;
-    }
-
-    task_id = PyLong_FromUnsignedLongLong(task_addr);
-    if (task_id == NULL) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create task ID in single task node");
-        goto error;
-    }
-
-    RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder);
-    result_item = PyStructSequence_New(state->TaskInfo_Type);
-    if (result_item == NULL) {
-        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create TaskInfo in single task node");
-        goto error;
-    }
-
-    PyStructSequence_SetItem(result_item, 0, task_id);  // steals ref
-    PyStructSequence_SetItem(result_item, 1, tn);  // steals ref
-    PyStructSequence_SetItem(result_item, 2, coroutine_stack);  // steals ref
-    PyStructSequence_SetItem(result_item, 3, current_awaited_by);  // steals ref
-
-    // References transferred to tuple
-    task_id = NULL;
-    tn = NULL;
-    coroutine_stack = NULL;
-    current_awaited_by = NULL;
-
-    if (PyList_Append(result, result_item)) {
-        Py_DECREF(result_item);
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append result item in single task node");
-        return -1;
-    }
-    if (task_info != NULL) {
-        *task_info = result_item;
-    }
-    Py_DECREF(result_item);
-
-    // Get back current_awaited_by reference for parse_task_awaited_by
-    current_awaited_by = PyStructSequence_GetItem(result_item, 3);
-    if (parse_task_awaited_by(unwinder, task_addr, current_awaited_by) < 0) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse awaited_by in single task node");
-        // No cleanup needed here since all references were transferred to result_item
-        // and result_item was already added to result list and decreffed
-        return -1;
-    }
-
-    return 0;
-
-error:
-    Py_XDECREF(tn);
-    Py_XDECREF(current_awaited_by);
-    Py_XDECREF(task_id);
-    Py_XDECREF(result_item);
-    Py_XDECREF(coroutine_stack);
-    return -1;
-}
-
-// Thread processor for get_all_awaited_by
-static int
-process_thread_for_awaited_by(
-    RemoteUnwinderObject *unwinder,
-    uintptr_t thread_state_addr,
-    unsigned long tid,
-    void *context
-) {
-    PyObject *result = (PyObject *)context;
-    uintptr_t head_addr = thread_state_addr + (uintptr_t)unwinder->async_debug_offsets.asyncio_thread_state.asyncio_tasks_head;
-    return append_awaited_by(unwinder, tid, head_addr, result);
-}
-
-// Generic function to process task awaited_by
-static int
-process_task_awaited_by(
-    RemoteUnwinderObject *unwinder,
-    uintptr_t task_address,
-    set_entry_processor_func processor,
-    void *context
-) {
-    // Read the entire TaskObj at once
-    char task_obj[SIZEOF_TASK_OBJ];
-    if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, task_address,
-                                              (size_t)unwinder->async_debug_offsets.asyncio_task_object.size,
-                                              task_obj) < 0) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task object");
-        return -1;
-    }
-
-    uintptr_t task_ab_addr = GET_MEMBER_NO_TAG(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_awaited_by);
-    if ((void*)task_ab_addr == NULL) {
-        return 0;  // No tasks waiting for this one
-    }
-
-    char awaited_by_is_a_set = GET_MEMBER(char, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_awaited_by_is_set);
-
-    if (awaited_by_is_a_set) {
-        return iterate_set_entries(unwinder, task_ab_addr, processor, context);
-    } else {
-        // Single task waiting
-        return processor(unwinder, task_ab_addr, context);
-    }
-}
-
-static int
-process_running_task_chain(
-    RemoteUnwinderObject *unwinder,
-    uintptr_t running_task_addr,
-    uintptr_t thread_state_addr,
-    PyObject *result
-) {
-    uintptr_t running_task_code_obj = 0;
-    if(get_task_code_object(unwinder, running_task_addr, &running_task_code_obj) < 0) {
-        return -1;
-    }
-
-    // First, add this task to the result
-    PyObject *task_info = NULL;
-    if (process_single_task_node(unwinder, running_task_addr, &task_info, result) < 0) {
-        return -1;
-    }
-
-    // Get the chain from the current frame to this task
-    PyObject *coro_chain = PyStructSequence_GET_ITEM(task_info, 2);
-    assert(coro_chain != NULL);
-    if (PyList_GET_SIZE(coro_chain) != 1) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Coro chain is not a single item");
-        return -1;
-    }
-    PyObject *coro_info = PyList_GET_ITEM(coro_chain, 0);
-    assert(coro_info != NULL);
-    PyObject *frame_chain = PyStructSequence_GET_ITEM(coro_info, 0);
-    assert(frame_chain != NULL);
-
-    // Clear the coro_chain
-    if (PyList_Clear(frame_chain) < 0) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to clear coroutine chain");
-        return -1;
-    }
-
-    // Add the chain from the current frame to this task
-    if (parse_async_frame_chain(unwinder, frame_chain, thread_state_addr, running_task_code_obj) < 0) {
-        return -1;
-    }
-
-    // Now find all tasks that are waiting for this task and process them
-    if (process_task_awaited_by(unwinder, running_task_addr, process_waiter_task, result) < 0) {
-        return -1;
-    }
-
-    return 0;
-}
-
-// Thread processor for get_async_stack_trace
-static int
-process_thread_for_async_stack_trace(
-    RemoteUnwinderObject *unwinder,
-    uintptr_t thread_state_addr,
-    unsigned long tid,
-    void *context
-) {
-    PyObject *result = (PyObject *)context;
-
-    // Find running task in this thread
-    uintptr_t running_task_addr;
-    if (find_running_task_in_thread(unwinder, thread_state_addr, &running_task_addr) < 0) {
-        return 0;
-    }
-
-    // If we found a running task, process it and its waiters
-    if ((void*)running_task_addr != NULL) {
-        PyObject *task_list = PyList_New(0);
-        if (task_list == NULL) {
-            set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create task list for thread");
-            return -1;
-        }
-
-        if (process_running_task_chain(unwinder, running_task_addr, thread_state_addr, task_list) < 0) {
-            Py_DECREF(task_list);
-            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process running task chain");
-            return -1;
-        }
-
-        // Create AwaitedInfo structure for this thread
-        PyObject *tid_py = PyLong_FromUnsignedLong(tid);
-        if (tid_py == NULL) {
-            Py_DECREF(task_list);
-            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create thread ID");
-            return -1;
-        }
-
-        RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder);
-        PyObject *awaited_info = PyStructSequence_New(state->AwaitedInfo_Type);
-        if (awaited_info == NULL) {
-            Py_DECREF(tid_py);
-            Py_DECREF(task_list);
-            set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create AwaitedInfo");
-            return -1;
-        }
-
-        PyStructSequence_SetItem(awaited_info, 0, tid_py);  // steals ref
-        PyStructSequence_SetItem(awaited_info, 1, task_list);  // steals ref
-
-        if (PyList_Append(result, awaited_info)) {
-            Py_DECREF(awaited_info);
-            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append AwaitedInfo to result");
-            return -1;
-        }
-        Py_DECREF(awaited_info);
-    }
-
-    return 0;
-}
-
-static int
-process_task_and_waiters(
-    RemoteUnwinderObject *unwinder,
-    uintptr_t task_addr,
-    PyObject *result
-) {
-    // First, add this task to the result
-    if (process_single_task_node(unwinder, task_addr, NULL, result) < 0) {
-        return -1;
-    }
-
-    // Now find all tasks that are waiting for this task and process them
-    return process_task_awaited_by(unwinder, task_addr, process_waiter_task, result);
-}
-
-static int
-find_running_task_in_thread(
-    RemoteUnwinderObject *unwinder,
-    uintptr_t thread_state_addr,
-    uintptr_t *running_task_addr
-) {
-    *running_task_addr = (uintptr_t)NULL;
-
-    uintptr_t address_of_running_loop;
-    int bytes_read = read_py_ptr(
-        unwinder,
-        thread_state_addr + (uintptr_t)unwinder->async_debug_offsets.asyncio_thread_state.asyncio_running_loop,
-        &address_of_running_loop);
-    if (bytes_read == -1) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read running loop address");
-        return -1;
-    }
-
-    // no asyncio loop is now running
-    if ((void*)address_of_running_loop == NULL) {
-        return 0;
-    }
-
-    int err = read_ptr(
-        unwinder,
-        thread_state_addr + (uintptr_t)unwinder->async_debug_offsets.asyncio_thread_state.asyncio_running_task,
-        running_task_addr);
-    if (err) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read running task address");
-        return -1;
-    }
-
-    return 0;
-}
-
-static int
-get_task_code_object(RemoteUnwinderObject *unwinder, uintptr_t task_addr, uintptr_t *code_obj_addr) {
-    uintptr_t running_coro_addr = 0;
-
-    if(read_py_ptr(
-        unwinder,
-        task_addr + (uintptr_t)unwinder->async_debug_offsets.asyncio_task_object.task_coro,
-        &running_coro_addr) < 0) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Running task coro read failed");
-        return -1;
-    }
-
-    if (running_coro_addr == 0) {
-        PyErr_SetString(PyExc_RuntimeError, "Running task coro is NULL");
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Running task coro address is NULL");
-        return -1;
-    }
-
-    // note: genobject's gi_iframe is an embedded struct so the address to
-    // the offset leads directly to its first field: f_executable
-    if (read_py_ptr(
-        unwinder,
-        running_coro_addr + (uintptr_t)unwinder->debug_offsets.gen_object.gi_iframe, code_obj_addr) < 0) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read running task code object");
-        return -1;
-    }
-
-    if (*code_obj_addr == 0) {
-        PyErr_SetString(PyExc_RuntimeError, "Running task code object is NULL");
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Running task code object address is NULL");
-        return -1;
-    }
-
-    return 0;
-}
-
-/* ============================================================================
- * TLBC CACHING FUNCTIONS
- * ============================================================================ */
-
-#ifdef Py_GIL_DISABLED
-
-typedef struct {
-    void *tlbc_array;  // Local copy of the TLBC array
-    Py_ssize_t tlbc_array_size;  // Size of the TLBC array
-    uint32_t generation;  // Generation when this was cached
-} TLBCCacheEntry;
-
-static void
-tlbc_cache_entry_destroy(void *ptr)
-{
-    TLBCCacheEntry *entry = (TLBCCacheEntry *)ptr;
-    if (entry->tlbc_array) {
-        PyMem_RawFree(entry->tlbc_array);
-    }
-    PyMem_RawFree(entry);
-}
-
-static TLBCCacheEntry *
-get_tlbc_cache_entry(RemoteUnwinderObject *self, uintptr_t code_addr, uint32_t current_generation)
-{
-    void *key = (void *)code_addr;
-    TLBCCacheEntry *entry = _Py_hashtable_get(self->tlbc_cache, key);
-
-    if (entry && entry->generation != current_generation) {
-        // Entry is stale, remove it by setting to NULL
-        _Py_hashtable_set(self->tlbc_cache, key, NULL);
-        entry = NULL;
-    }
-
-    return entry;
-}
-
-static int
-cache_tlbc_array(RemoteUnwinderObject *unwinder, uintptr_t code_addr, uintptr_t tlbc_array_addr, uint32_t generation)
-{
-    uintptr_t tlbc_array_ptr;
-    void *tlbc_array = NULL;
-    TLBCCacheEntry *entry = NULL;
-
-    // Read the TLBC array pointer
-    if (read_ptr(unwinder, tlbc_array_addr, &tlbc_array_ptr) != 0) {
-        PyErr_SetString(PyExc_RuntimeError, "Failed to read TLBC array pointer");
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read TLBC array pointer");
-        return 0; // Read error
-    }
-
-    // Validate TLBC array pointer
-    if (tlbc_array_ptr == 0) {
-        PyErr_SetString(PyExc_RuntimeError, "TLBC array pointer is NULL");
-        return 0; // No TLBC array
-    }
-
-    // Read the TLBC array size
-    Py_ssize_t tlbc_size;
-    if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, tlbc_array_ptr, sizeof(tlbc_size), &tlbc_size) != 0) {
-        PyErr_SetString(PyExc_RuntimeError, "Failed to read TLBC array size");
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read TLBC array size");
-        return 0; // Read error
-    }
-
-    // Validate TLBC array size
-    if (tlbc_size <= 0) {
-        PyErr_SetString(PyExc_RuntimeError, "Invalid TLBC array size");
-        return 0; // Invalid size
-    }
-
-    if (tlbc_size > MAX_TLBC_SIZE) {
-        PyErr_SetString(PyExc_RuntimeError, "TLBC array size exceeds maximum limit");
-        return 0; // Invalid size
-    }
-
-    // Allocate and read the entire TLBC array
-    size_t array_data_size = tlbc_size * sizeof(void*);
-    tlbc_array = PyMem_RawMalloc(sizeof(Py_ssize_t) + array_data_size);
-    if (!tlbc_array) {
-        PyErr_NoMemory();
-        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate TLBC array");
-        return 0; // Memory error
-    }
-
-    if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, tlbc_array_ptr, sizeof(Py_ssize_t) + array_data_size, tlbc_array) != 0) {
-        PyMem_RawFree(tlbc_array);
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read TLBC array data");
-        return 0; // Read error
-    }
-
-    // Create cache entry
-    entry = PyMem_RawMalloc(sizeof(TLBCCacheEntry));
-    if (!entry) {
-        PyErr_NoMemory();
-        PyMem_RawFree(tlbc_array);
-        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate TLBC cache entry");
-        return 0; // Memory error
-    }
-
-    entry->tlbc_array = tlbc_array;
-    entry->tlbc_array_size = tlbc_size;
-    entry->generation = generation;
-
-    // Store in cache
-    void *key = (void *)code_addr;
-    if (_Py_hashtable_set(unwinder->tlbc_cache, key, entry) < 0) {
-        tlbc_cache_entry_destroy(entry);
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to store TLBC entry in cache");
-        return 0; // Cache error
-    }
-
-    return 1; // Success
-}
-
-
-
-#endif
-
-/* ============================================================================
- * LINE TABLE PARSING FUNCTIONS
- * ============================================================================ */
-
-static int
-scan_varint(const uint8_t **ptr)
-{
-    unsigned int read = **ptr;
-    *ptr = *ptr + 1;
-    unsigned int val = read & 63;
-    unsigned int shift = 0;
-    while (read & 64) {
-        read = **ptr;
-        *ptr = *ptr + 1;
-        shift += 6;
-        val |= (read & 63) << shift;
-    }
-    return val;
-}
-
-static int
-scan_signed_varint(const uint8_t **ptr)
-{
-    unsigned int uval = scan_varint(ptr);
-    if (uval & 1) {
-        return -(int)(uval >> 1);
-    }
-    else {
-        return uval >> 1;
-    }
-}
-
-static bool
-parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, LocationInfo* info)
-{
-    const uint8_t* ptr = (const uint8_t*)(linetable);
-    uintptr_t addr = 0;
-    info->lineno = firstlineno;
-
-    while (*ptr != '\0') {
-        // See InternalDocs/code_objects.md for where these magic numbers are from
-        // and for the decoding algorithm.
-        uint8_t first_byte = *(ptr++);
-        uint8_t code = (first_byte >> 3) & 15;
-        size_t length = (first_byte & 7) + 1;
-        uintptr_t end_addr = addr + length;
-        switch (code) {
-            case PY_CODE_LOCATION_INFO_NONE: {
-                break;
-            }
-            case PY_CODE_LOCATION_INFO_LONG: {
-                int line_delta = scan_signed_varint(&ptr);
-                info->lineno += line_delta;
-                info->end_lineno = info->lineno + scan_varint(&ptr);
-                info->column = scan_varint(&ptr) - 1;
-                info->end_column = scan_varint(&ptr) - 1;
-                break;
-            }
-            case PY_CODE_LOCATION_INFO_NO_COLUMNS: {
-                int line_delta = scan_signed_varint(&ptr);
-                info->lineno += line_delta;
-                info->column = info->end_column = -1;
-                break;
-            }
-            case PY_CODE_LOCATION_INFO_ONE_LINE0:
-            case PY_CODE_LOCATION_INFO_ONE_LINE1:
-            case PY_CODE_LOCATION_INFO_ONE_LINE2: {
-                int line_delta = code - 10;
-                info->lineno += line_delta;
-                info->end_lineno = info->lineno;
-                info->column = *(ptr++);
-                info->end_column = *(ptr++);
-                break;
-            }
-            default: {
-                uint8_t second_byte = *(ptr++);
-                if ((second_byte & 128) != 0) {
-                    return false;
-                }
-                info->column = code << 3 | (second_byte >> 4);
-                info->end_column = info->column + (second_byte & 15);
-                break;
-            }
-        }
-        if (addr <= addrq && end_addr > addrq) {
-            return true;
-        }
-        addr = end_addr;
-    }
-    return false;
-}
-
-/* ============================================================================
- * CODE OBJECT AND FRAME PARSING FUNCTIONS
- * ============================================================================ */
-
-static PyObject *
-make_frame_info(RemoteUnwinderObject *unwinder, PyObject *file, PyObject *line,
-                PyObject *func)
-{
-    RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder);
-    PyObject *info = PyStructSequence_New(state->FrameInfo_Type);
-    if (info == NULL) {
-        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create FrameInfo");
-        return NULL;
-    }
-    Py_INCREF(file);
-    Py_INCREF(line);
-    Py_INCREF(func);
-    PyStructSequence_SetItem(info, 0, file);
-    PyStructSequence_SetItem(info, 1, line);
-    PyStructSequence_SetItem(info, 2, func);
-    return info;
-}
-
-static int
-parse_code_object(RemoteUnwinderObject *unwinder,
-                  PyObject **result,
-                  uintptr_t address,
-                  uintptr_t instruction_pointer,
-                  uintptr_t *previous_frame,
-                  int32_t tlbc_index)
-{
-    void *key = (void *)address;
-    CachedCodeMetadata *meta = NULL;
-    PyObject *func = NULL;
-    PyObject *file = NULL;
-    PyObject *linetable = NULL;
-
-#ifdef Py_GIL_DISABLED
-    // In free threading builds, code object addresses might have the low bit set
-    // as a flag, so we need to mask it off to get the real address
-    uintptr_t real_address = address & (~1);
-#else
-    uintptr_t real_address = address;
-#endif
-
-    if (unwinder && unwinder->code_object_cache != NULL) {
-        meta = _Py_hashtable_get(unwinder->code_object_cache, key);
-    }
-
-    if (meta == NULL) {
-        char code_object[SIZEOF_CODE_OBJ];
-        if (_Py_RemoteDebug_PagedReadRemoteMemory(
-                &unwinder->handle, real_address, SIZEOF_CODE_OBJ, code_object) < 0)
-        {
-            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read code object");
-            goto error;
-        }
-
-        func = read_py_str(unwinder,
-            GET_MEMBER(uintptr_t, code_object, unwinder->debug_offsets.code_object.qualname), 1024);
-        if (!func) {
-            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read function name from code object");
-            goto error;
-        }
-
-        file = read_py_str(unwinder,
-            GET_MEMBER(uintptr_t, code_object, unwinder->debug_offsets.code_object.filename), 1024);
-        if (!file) {
-            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read filename from code object");
-            goto error;
-        }
-
-        linetable = read_py_bytes(unwinder,
-            GET_MEMBER(uintptr_t, code_object, unwinder->debug_offsets.code_object.linetable), 4096);
-        if (!linetable) {
-            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read linetable from code object");
-            goto error;
-        }
-
-        meta = PyMem_RawMalloc(sizeof(CachedCodeMetadata));
-        if (!meta) {
-            PyErr_NoMemory();
-            set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate cached code metadata");
-            goto error;
-        }
-
-        meta->func_name = func;
-        meta->file_name = file;
-        meta->linetable = linetable;
-        meta->first_lineno = GET_MEMBER(int, code_object, unwinder->debug_offsets.code_object.firstlineno);
-        meta->addr_code_adaptive = real_address + (uintptr_t)unwinder->debug_offsets.code_object.co_code_adaptive;
-
-        if (unwinder && unwinder->code_object_cache && _Py_hashtable_set(unwinder->code_object_cache, key, meta) < 0) {
-            cached_code_metadata_destroy(meta);
-            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to cache code metadata");
-            goto error;
-        }
-
-        // Ownership transferred to meta
-        func = NULL;
-        file = NULL;
-        linetable = NULL;
-    }
-
-    uintptr_t ip = instruction_pointer;
-    ptrdiff_t addrq;
-
-#ifdef Py_GIL_DISABLED
-    // Handle thread-local bytecode (TLBC) in free threading builds
-    if (tlbc_index == 0 || unwinder->debug_offsets.code_object.co_tlbc == 0 || unwinder == NULL) {
-        // No TLBC or no unwinder - use main bytecode directly
-        addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive;
-        goto done_tlbc;
-    }
-
-    // Try to get TLBC data from cache (we'll get generation from the caller)
-    TLBCCacheEntry *tlbc_entry = get_tlbc_cache_entry(unwinder, real_address, unwinder->tlbc_generation);
-
-    if (!tlbc_entry) {
-        // Cache miss - try to read and cache TLBC array
-        if (!cache_tlbc_array(unwinder, real_address, real_address + unwinder->debug_offsets.code_object.co_tlbc, unwinder->tlbc_generation)) {
-            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to cache TLBC array");
-            goto error;
-        }
-        tlbc_entry = get_tlbc_cache_entry(unwinder, real_address, unwinder->tlbc_generation);
-    }
-
-    if (tlbc_entry && tlbc_index < tlbc_entry->tlbc_array_size) {
-        // Use cached TLBC data
-        uintptr_t *entries = (uintptr_t *)((char *)tlbc_entry->tlbc_array + sizeof(Py_ssize_t));
-        uintptr_t tlbc_bytecode_addr = entries[tlbc_index];
-
-        if (tlbc_bytecode_addr != 0) {
-            // Calculate offset from TLBC bytecode
-            addrq = (uint16_t *)ip - (uint16_t *)tlbc_bytecode_addr;
-            goto done_tlbc;
-        }
-    }
-
-    // Fall back to main bytecode
-    addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive;
-
-done_tlbc:
-#else
-    // Non-free-threaded build, always use the main bytecode
-    (void)tlbc_index; // Suppress unused parameter warning
-    (void)unwinder;   // Suppress unused parameter warning
-    addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive;
-#endif
-    ;  // Empty statement to avoid C23 extension warning
-    LocationInfo info = {0};
-    bool ok = parse_linetable(addrq, PyBytes_AS_STRING(meta->linetable),
-                              meta->first_lineno, &info);
-    if (!ok) {
-        info.lineno = -1;
-    }
-
-    PyObject *lineno = PyLong_FromLong(info.lineno);
-    if (!lineno) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create line number object");
-        goto error;
-    }
-
-    PyObject *tuple = make_frame_info(unwinder, meta->file_name, lineno, meta->func_name);
-    Py_DECREF(lineno);
-    if (!tuple) {
-        goto error;
-    }
-
-    *result = tuple;
-    return 0;
-
-error:
-    Py_XDECREF(func);
-    Py_XDECREF(file);
-    Py_XDECREF(linetable);
-    return -1;
-}
-
-/* ============================================================================
- * STACK CHUNK MANAGEMENT FUNCTIONS
- * ============================================================================ */
-
-static void
-cleanup_stack_chunks(StackChunkList *chunks)
-{
-    for (size_t i = 0; i < chunks->count; ++i) {
-        PyMem_RawFree(chunks->chunks[i].local_copy);
-    }
-    PyMem_RawFree(chunks->chunks);
-}
-
-static int
-process_single_stack_chunk(
-    RemoteUnwinderObject *unwinder,
-    uintptr_t chunk_addr,
-    StackChunkInfo *chunk_info
-) {
-    // Start with default size assumption
-    size_t current_size = _PY_DATA_STACK_CHUNK_SIZE;
-
-    char *this_chunk = PyMem_RawMalloc(current_size);
-    if (!this_chunk) {
-        PyErr_NoMemory();
-        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate stack chunk buffer");
-        return -1;
-    }
-
-    if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, chunk_addr, current_size, this_chunk) < 0) {
-        PyMem_RawFree(this_chunk);
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read stack chunk");
-        return -1;
-    }
-
-    // Check actual size and reread if necessary
-    size_t actual_size = GET_MEMBER(size_t, this_chunk, offsetof(_PyStackChunk, size));
-    if (actual_size != current_size) {
-        this_chunk = PyMem_RawRealloc(this_chunk, actual_size);
-        if (!this_chunk) {
-            PyErr_NoMemory();
-            set_exception_cause(unwinder, PyExc_MemoryError, "Failed to reallocate stack chunk buffer");
-            return -1;
-        }
-
-        if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, chunk_addr, actual_size, this_chunk) < 0) {
-            PyMem_RawFree(this_chunk);
-            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to reread stack chunk with correct size");
-            return -1;
-        }
-        current_size = actual_size;
-    }
-
-    chunk_info->remote_addr = chunk_addr;
-    chunk_info->size = current_size;
-    chunk_info->local_copy = this_chunk;
-    return 0;
-}
-
-static int
-copy_stack_chunks(RemoteUnwinderObject *unwinder,
-                  uintptr_t tstate_addr,
-                  StackChunkList *out_chunks)
-{
-    uintptr_t chunk_addr;
-    StackChunkInfo *chunks = NULL;
-    size_t count = 0;
-    size_t max_chunks = 16;
-
-    if (read_ptr(unwinder, tstate_addr + (uintptr_t)unwinder->debug_offsets.thread_state.datastack_chunk, &chunk_addr)) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read initial stack chunk address");
-        return -1;
-    }
-
-    chunks = PyMem_RawMalloc(max_chunks * sizeof(StackChunkInfo));
-    if (!chunks) {
-        PyErr_NoMemory();
-        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate stack chunks array");
-        return -1;
-    }
-
-    while (chunk_addr != 0) {
-        // Grow array if needed
-        if (count >= max_chunks) {
-            max_chunks *= 2;
-            StackChunkInfo *new_chunks = PyMem_RawRealloc(chunks, max_chunks * sizeof(StackChunkInfo));
-            if (!new_chunks) {
-                PyErr_NoMemory();
-                set_exception_cause(unwinder, PyExc_MemoryError, "Failed to grow stack chunks array");
-                goto error;
-            }
-            chunks = new_chunks;
-        }
-
-        // Process this chunk
-        if (process_single_stack_chunk(unwinder, chunk_addr, &chunks[count]) < 0) {
-            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process stack chunk");
-            goto error;
-        }
-
-        // Get next chunk address and increment count
-        chunk_addr = GET_MEMBER(uintptr_t, chunks[count].local_copy, offsetof(_PyStackChunk, previous));
-        count++;
-    }
-
-    out_chunks->chunks = chunks;
-    out_chunks->count = count;
-    return 0;
-
-error:
-    for (size_t i = 0; i < count; ++i) {
-        PyMem_RawFree(chunks[i].local_copy);
-    }
-    PyMem_RawFree(chunks);
-    return -1;
-}
-
-static void *
-find_frame_in_chunks(StackChunkList *chunks, uintptr_t remote_ptr)
-{
-    for (size_t i = 0; i < chunks->count; ++i) {
-        uintptr_t base = chunks->chunks[i].remote_addr + offsetof(_PyStackChunk, data);
-        size_t payload = chunks->chunks[i].size - offsetof(_PyStackChunk, data);
-
-        if (remote_ptr >= base && remote_ptr < base + payload) {
-            return (char *)chunks->chunks[i].local_copy + (remote_ptr - chunks->chunks[i].remote_addr);
-        }
-    }
-    return NULL;
-}
-
-static int
-parse_frame_from_chunks(
-    RemoteUnwinderObject *unwinder,
-    PyObject **result,
-    uintptr_t address,
-    uintptr_t *previous_frame,
-    uintptr_t *stackpointer,
-    StackChunkList *chunks
-) {
-    void *frame_ptr = find_frame_in_chunks(chunks, address);
-    if (!frame_ptr) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Frame not found in stack chunks");
-        return -1;
-    }
-
-    char *frame = (char *)frame_ptr;
-    *previous_frame = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.previous);
-    *stackpointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.stackpointer);
-    uintptr_t code_object = GET_MEMBER_NO_TAG(uintptr_t, frame_ptr, unwinder->debug_offsets.interpreter_frame.executable);
-    int frame_valid = is_frame_valid(unwinder, (uintptr_t)frame, code_object);
-    if (frame_valid != 1) {
-        return frame_valid;
-    }
-
-    uintptr_t instruction_pointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.instr_ptr);
-
-    // Get tlbc_index for free threading builds
-    int32_t tlbc_index = 0;
-#ifdef Py_GIL_DISABLED
-    if (unwinder->debug_offsets.interpreter_frame.tlbc_index != 0) {
-        tlbc_index = GET_MEMBER(int32_t, frame, unwinder->debug_offsets.interpreter_frame.tlbc_index);
-    }
-#endif
-
-    return parse_code_object(unwinder, result, code_object, instruction_pointer, previous_frame, tlbc_index);
-}
-
-/* ============================================================================
- * INTERPRETER STATE AND THREAD DISCOVERY FUNCTIONS
- * ============================================================================ */
-
-static int
-populate_initial_state_data(
-    int all_threads,
-    RemoteUnwinderObject *unwinder,
-    uintptr_t runtime_start_address,
-    uintptr_t *interpreter_state,
-    uintptr_t *tstate
-) {
-    uintptr_t interpreter_state_list_head =
-        (uintptr_t)unwinder->debug_offsets.runtime_state.interpreters_head;
-
-    uintptr_t address_of_interpreter_state;
-    int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory(
-            &unwinder->handle,
-            runtime_start_address + interpreter_state_list_head,
-            sizeof(void*),
-            &address_of_interpreter_state);
-    if (bytes_read < 0) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read interpreter state address");
-        return -1;
-    }
-
-    if (address_of_interpreter_state == 0) {
-        PyErr_SetString(PyExc_RuntimeError, "No interpreter state found");
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Interpreter state is NULL");
-        return -1;
-    }
-
-    *interpreter_state = address_of_interpreter_state;
-
-    if (all_threads) {
-        *tstate = 0;
-        return 0;
-    }
-
-    uintptr_t address_of_thread = address_of_interpreter_state +
-            (uintptr_t)unwinder->debug_offsets.interpreter_state.threads_main;
-
-    if (_Py_RemoteDebug_PagedReadRemoteMemory(
-            &unwinder->handle,
-            address_of_thread,
-            sizeof(void*),
-            tstate) < 0) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read main thread state address");
-        return -1;
-    }
-
-    return 0;
-}
-
-
-static int
-find_running_frame(
-    RemoteUnwinderObject *unwinder,
-    uintptr_t address_of_thread,
-    uintptr_t *frame
-) {
-    if ((void*)address_of_thread != NULL) {
-        int err = read_ptr(
-            unwinder,
-            address_of_thread + (uintptr_t)unwinder->debug_offsets.thread_state.current_frame,
-            frame);
-        if (err) {
-            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read current frame pointer");
-            return -1;
-        }
-        return 0;
-    }
-
-    *frame = (uintptr_t)NULL;
-    return 0;
-}
-
-/* ============================================================================
- * FRAME PARSING FUNCTIONS
- * ============================================================================ */
-
-static inline int
-is_frame_valid(
-    RemoteUnwinderObject *unwinder,
-    uintptr_t frame_addr,
-    uintptr_t code_object_addr
-) {
-    if ((void*)code_object_addr == NULL) {
-        return 0;
-    }
-
-    void* frame = (void*)frame_addr;
-
-    if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_INTERPRETER) {
-        return 0;  // C frame
-    }
-
-    if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_GENERATOR
-        && GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_THREAD) {
-        PyErr_Format(PyExc_RuntimeError, "Unhandled frame owner %d.\n",
-                    GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner));
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Unhandled frame owner type in async frame");
-        return -1;
-    }
-    return 1;
-}
-
-static int
-parse_frame_object(
-    RemoteUnwinderObject *unwinder,
-    PyObject** result,
-    uintptr_t address,
-    uintptr_t* address_of_code_object,
-    uintptr_t* previous_frame
-) {
-    char frame[SIZEOF_INTERP_FRAME];
-    *address_of_code_object = 0;
-
-    Py_ssize_t bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory(
-        &unwinder->handle,
-        address,
-        SIZEOF_INTERP_FRAME,
-        frame
-    );
-    if (bytes_read < 0) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read interpreter frame");
-        return -1;
-    }
-
-    *previous_frame = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.previous);
-    uintptr_t code_object = GET_MEMBER_NO_TAG(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable);
-    int frame_valid = is_frame_valid(unwinder, (uintptr_t)frame, code_object);
-    if (frame_valid != 1) {
-        return frame_valid;
-    }
-
-    uintptr_t instruction_pointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.instr_ptr);
-
-    // Get tlbc_index for free threading builds
-    int32_t tlbc_index = 0;
-#ifdef Py_GIL_DISABLED
-    if (unwinder->debug_offsets.interpreter_frame.tlbc_index != 0) {
-        tlbc_index = GET_MEMBER(int32_t, frame, unwinder->debug_offsets.interpreter_frame.tlbc_index);
-    }
-#endif
-
-    *address_of_code_object = code_object;
-    return parse_code_object(unwinder, result, code_object, instruction_pointer, previous_frame, tlbc_index);
-}
-
-static int
- parse_async_frame_chain(
-    RemoteUnwinderObject *unwinder,
-    PyObject *calls,
-    uintptr_t address_of_thread,
-    uintptr_t running_task_code_obj
-) {
-    uintptr_t address_of_current_frame;
-    if (find_running_frame(unwinder, address_of_thread, &address_of_current_frame) < 0) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Running frame search failed in async chain");
-        return -1;
-    }
-
-    while ((void*)address_of_current_frame != NULL) {
-        PyObject* frame_info = NULL;
-        uintptr_t address_of_code_object;
-        int res = parse_frame_object(
-            unwinder,
-            &frame_info,
-            address_of_current_frame,
-            &address_of_code_object,
-            &address_of_current_frame
-        );
-
-        if (res < 0) {
-            set_exception_cause(unwinder, PyExc_RuntimeError, "Async frame object parsing failed in chain");
-            return -1;
-        }
-
-        if (!frame_info) {
-            continue;
-        }
-
-        if (PyList_Append(calls, frame_info) == -1) {
-            Py_DECREF(frame_info);
-            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append frame info to async chain");
-            return -1;
-        }
-
-        Py_DECREF(frame_info);
-
-        if (address_of_code_object == running_task_code_obj) {
-            break;
-        }
-    }
-
-    return 0;
-}
-
-/* ============================================================================
- * AWAITED BY PARSING FUNCTIONS
- * ============================================================================ */
-
-static int
-append_awaited_by_for_thread(
-    RemoteUnwinderObject *unwinder,
-    uintptr_t head_addr,
-    PyObject *result
-) {
-    char task_node[SIZEOF_LLIST_NODE];
-
-    if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, head_addr,
-                                              sizeof(task_node), task_node) < 0) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read task node head");
-        return -1;
-    }
-
-    size_t iteration_count = 0;
-    const size_t MAX_ITERATIONS = 2 << 15;  // A reasonable upper bound
-
-    while (GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next) != head_addr) {
-        if (++iteration_count > MAX_ITERATIONS) {
-            PyErr_SetString(PyExc_RuntimeError, "Task list appears corrupted");
-            set_exception_cause(unwinder, PyExc_RuntimeError, "Task list iteration limit exceeded");
-            return -1;
-        }
-
-        if (GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next) == 0) {
-            PyErr_SetString(PyExc_RuntimeError,
-                           "Invalid linked list structure reading remote memory");
-            set_exception_cause(unwinder, PyExc_RuntimeError, "NULL pointer in task linked list");
-            return -1;
-        }
-
-        uintptr_t task_addr = (uintptr_t)GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next)
-            - (uintptr_t)unwinder->async_debug_offsets.asyncio_task_object.task_node;
-
-        if (process_single_task_node(unwinder, task_addr, NULL, result) < 0) {
-            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process task node in awaited_by");
-            return -1;
-        }
-
-        // Read next node
-        if (_Py_RemoteDebug_PagedReadRemoteMemory(
-                &unwinder->handle,
-                (uintptr_t)GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next),
-                sizeof(task_node),
-                task_node) < 0) {
-            set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read next task node in awaited_by");
-            return -1;
-        }
-    }
-
-    return 0;
-}
-
-static int
-append_awaited_by(
-    RemoteUnwinderObject *unwinder,
-    unsigned long tid,
-    uintptr_t head_addr,
-    PyObject *result)
-{
-    PyObject *tid_py = PyLong_FromUnsignedLong(tid);
-    if (tid_py == NULL) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create thread ID object");
-        return -1;
-    }
-
-    PyObject* awaited_by_for_thread = PyList_New(0);
-    if (awaited_by_for_thread == NULL) {
-        Py_DECREF(tid_py);
-        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create awaited_by thread list");
-        return -1;
-    }
-
-    RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder);
-    PyObject *result_item = PyStructSequence_New(state->AwaitedInfo_Type);
-    if (result_item == NULL) {
-        Py_DECREF(tid_py);
-        Py_DECREF(awaited_by_for_thread);
-        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create AwaitedInfo");
-        return -1;
-    }
-
-    PyStructSequence_SetItem(result_item, 0, tid_py);  // steals ref
-    PyStructSequence_SetItem(result_item, 1, awaited_by_for_thread);  // steals ref
-    if (PyList_Append(result, result_item)) {
-        Py_DECREF(result_item);
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append awaited_by result item");
-        return -1;
-    }
-    Py_DECREF(result_item);
-
-    if (append_awaited_by_for_thread(unwinder, head_addr, awaited_by_for_thread))
-    {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append awaited_by for thread");
-        return -1;
-    }
-
-    return 0;
-}
-
-/* ============================================================================
- * STACK UNWINDING FUNCTIONS
- * ============================================================================ */
-
-static int
-process_frame_chain(
-    RemoteUnwinderObject *unwinder,
-    uintptr_t initial_frame_addr,
-    StackChunkList *chunks,
-    PyObject *frame_info,
-    uintptr_t gc_frame)
-{
-    uintptr_t frame_addr = initial_frame_addr;
-    uintptr_t prev_frame_addr = 0;
-    const size_t MAX_FRAMES = 1024;
-    size_t frame_count = 0;
-
-    while ((void*)frame_addr != NULL) {
-        PyObject *frame = NULL;
-        uintptr_t next_frame_addr = 0;
-        uintptr_t stackpointer = 0;
-
-        if (++frame_count > MAX_FRAMES) {
-            PyErr_SetString(PyExc_RuntimeError, "Too many stack frames (possible infinite loop)");
-            set_exception_cause(unwinder, PyExc_RuntimeError, "Frame chain iteration limit exceeded");
-            return -1;
-        }
-
-        // Try chunks first, fallback to direct memory read
-        if (parse_frame_from_chunks(unwinder, &frame, frame_addr, &next_frame_addr, &stackpointer, chunks) < 0) {
-            PyErr_Clear();
-            uintptr_t address_of_code_object = 0;
-            if (parse_frame_object(unwinder, &frame, frame_addr, &address_of_code_object ,&next_frame_addr) < 0) {
-                set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse frame object in chain");
-                return -1;
-            }
-        }
-        if (frame == NULL && PyList_GET_SIZE(frame_info) == 0) {
-            // If the first frame is missing, the chain is broken:
-            const char *e = "Failed to parse initial frame in chain";
-            PyErr_SetString(PyExc_RuntimeError, e);
-            return -1;
-        }
-        PyObject *extra_frame = NULL;
-        // This frame kicked off the current GC collection:
-        if (unwinder->gc && frame_addr == gc_frame) {
-            _Py_DECLARE_STR(gc, "<GC>");
-            extra_frame = &_Py_STR(gc);
-        }
-        // Otherwise, check for native frames to insert:
-        else if (unwinder->native &&
-                 // We've reached an interpreter trampoline frame:
-                 frame == NULL &&
-                 // Bottommost frame is always native, so skip that one:
-                 next_frame_addr &&
-                 // Only suppress native frames if GC tracking is enabled and the next frame will be a GC frame:
-                 !(unwinder->gc && next_frame_addr == gc_frame))
-        {
-            _Py_DECLARE_STR(native, "<native>");
-            extra_frame = &_Py_STR(native);
-        }
-        if (extra_frame) {
-            // Use "~" as file and 0 as line, since that's what pstats uses:
-            PyObject *extra_frame_info = make_frame_info(
-                unwinder, _Py_LATIN1_CHR('~'), _PyLong_GetZero(), extra_frame);
-            if (extra_frame_info == NULL) {
-                return -1;
-            }
-            int error = PyList_Append(frame_info, extra_frame_info);
-            Py_DECREF(extra_frame_info);
-            if (error) {
-                const char *e = "Failed to append extra frame to frame info list";
-                set_exception_cause(unwinder, PyExc_RuntimeError, e);
-                return -1;
-            }
-        }
-        if (frame) {
-            if (prev_frame_addr && frame_addr != prev_frame_addr) {
-                const char *f = "Broken frame chain: expected frame at 0x%lx, got 0x%lx";
-                PyErr_Format(PyExc_RuntimeError, f, prev_frame_addr, frame_addr);
-                Py_DECREF(frame);
-                const char *e = "Frame chain consistency check failed";
-                set_exception_cause(unwinder, PyExc_RuntimeError, e);
-                return -1;
-            }
-
-            if (PyList_Append(frame_info, frame) == -1) {
-                Py_DECREF(frame);
-                const char *e = "Failed to append frame to frame info list";
-                set_exception_cause(unwinder, PyExc_RuntimeError, e);
-                return -1;
-            }
-            Py_DECREF(frame);
-        }
-
-        prev_frame_addr = next_frame_addr;
-        frame_addr = next_frame_addr;
-    }
-
-    return 0;
-}
-
-static int
-get_thread_status(RemoteUnwinderObject *unwinder, uint64_t tid, uint64_t pthread_id) {
-#if defined(__APPLE__) && TARGET_OS_OSX
-   if (unwinder->thread_id_offset == 0) {
-        uint64_t *tids = (uint64_t *)PyMem_Malloc(MAX_NATIVE_THREADS * sizeof(uint64_t));
-        if (!tids) {
-            PyErr_NoMemory();
-            return -1;
-        }
-        int n = proc_pidinfo(unwinder->handle.pid, PROC_PIDLISTTHREADS, 0, tids, MAX_NATIVE_THREADS * sizeof(uint64_t)) / sizeof(uint64_t);
-        if (n <= 0) {
-            PyMem_Free(tids);
-            return THREAD_STATE_UNKNOWN;
-        }
-        uint64_t min_offset = UINT64_MAX;
-        for (int i = 0; i < n; i++) {
-            uint64_t offset = tids[i] - pthread_id;
-            if (offset < min_offset) {
-                min_offset = offset;
-            }
-        }
-        unwinder->thread_id_offset = min_offset;
-        PyMem_Free(tids);
-    }
-    struct proc_threadinfo ti;
-    uint64_t tid_with_offset = pthread_id + unwinder->thread_id_offset;
-    if (proc_pidinfo(unwinder->handle.pid, PROC_PIDTHREADINFO, tid_with_offset, &ti, sizeof(ti)) != sizeof(ti)) {
-        return THREAD_STATE_UNKNOWN;
-    }
-    if (ti.pth_run_state == TH_STATE_RUNNING) {
-        return THREAD_STATE_RUNNING;
-    }
-    return THREAD_STATE_IDLE;
-#elif defined(__linux__)
-    char stat_path[256];
-    char buffer[2048] = "";
-
-    snprintf(stat_path, sizeof(stat_path), "/proc/%d/task/%lu/stat", unwinder->handle.pid, tid);
-
-    int fd = open(stat_path, O_RDONLY);
-    if (fd == -1) {
-        return THREAD_STATE_UNKNOWN;
-    }
-
-    if (read(fd, buffer, 2047) == 0) {
-        close(fd);
-        return THREAD_STATE_UNKNOWN;
-    }
-    close(fd);
-
-    char *p = strchr(buffer, ')');
-    if (!p) {
-        return THREAD_STATE_UNKNOWN;
-    }
-
-    p += 2;  // Skip ") "
-    if (*p == ' ') {
-        p++;
-    }
-
-    switch (*p) {
-        case 'R':  // Running
-            return THREAD_STATE_RUNNING;
-        case 'S':  // Interruptible sleep
-        case 'D':  // Uninterruptible sleep
-        case 'T':  // Stopped
-        case 'Z':  // Zombie
-        case 'I':  // Idle kernel thread
-            return THREAD_STATE_IDLE;
-        default:
-            return THREAD_STATE_UNKNOWN;
-    }
-#elif defined(MS_WINDOWS)
-    ULONG n;
-    NTSTATUS status = NtQuerySystemInformation(
-        SystemProcessInformation,
-        unwinder->win_process_buffer,
-        unwinder->win_process_buffer_size,
-        &n
-    );
-    if (status == STATUS_INFO_LENGTH_MISMATCH) {
-        // Buffer was too small so we reallocate a larger one and try again.
-        unwinder->win_process_buffer_size = n;
-        PVOID new_buffer = PyMem_Realloc(unwinder->win_process_buffer, n);
-        if (!new_buffer) {
-            return -1;
-        }
-        unwinder->win_process_buffer = new_buffer;
-        return get_thread_status(unwinder, tid, pthread_id);
-    }
-    if (status != STATUS_SUCCESS) {
-        return -1;
-    }
-
-    SYSTEM_PROCESS_INFORMATION *pi = (SYSTEM_PROCESS_INFORMATION *)unwinder->win_process_buffer;
-    while ((ULONG)(ULONG_PTR)pi->UniqueProcessId != unwinder->handle.pid) {
-        if (pi->NextEntryOffset == 0) {
-            // We didn't find the process
-            return -1;
-        }
-        pi = (SYSTEM_PROCESS_INFORMATION *)(((BYTE *)pi) + pi->NextEntryOffset);
-    }
-
-    SYSTEM_THREAD_INFORMATION *ti = (SYSTEM_THREAD_INFORMATION *)((char *)pi + sizeof(SYSTEM_PROCESS_INFORMATION));
-    for (size_t i = 0; i < pi->NumberOfThreads; i++, ti++) {
-        if (ti->ClientId.UniqueThread == (HANDLE)tid) {
-            return ti->ThreadState != WIN32_THREADSTATE_RUNNING ? THREAD_STATE_IDLE : THREAD_STATE_RUNNING;
-        }
-    }
-
-    return -1;
-#else
-    return THREAD_STATE_UNKNOWN;
-#endif
-}
-
-typedef struct {
-    unsigned int initialized:1;
-    unsigned int bound:1;
-    unsigned int unbound:1;
-    unsigned int bound_gilstate:1;
-    unsigned int active:1;
-    unsigned int finalizing:1;
-    unsigned int cleared:1;
-    unsigned int finalized:1;
-    unsigned int :24;
-} _thread_status;
-
-static PyObject*
-unwind_stack_for_thread(
-    RemoteUnwinderObject *unwinder,
-    uintptr_t *current_tstate,
-    uintptr_t gil_holder_tstate,
-    uintptr_t gc_frame
-) {
-    PyObject *frame_info = NULL;
-    PyObject *thread_id = NULL;
-    PyObject *result = NULL;
-    StackChunkList chunks = {0};
-
-    char ts[SIZEOF_THREAD_STATE];
-    int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory(
-        &unwinder->handle, *current_tstate, (size_t)unwinder->debug_offsets.thread_state.size, ts);
-    if (bytes_read < 0) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read thread state");
-        goto error;
-    }
-
-    long tid = GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.native_thread_id);
-
-    // Read GC collecting state from the interpreter (before any skip checks)
-    uintptr_t interp_addr = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.interp);
-
-    // Read the GC runtime state from the interpreter state
-    uintptr_t gc_addr = interp_addr + unwinder->debug_offsets.interpreter_state.gc;
-    char gc_state[SIZEOF_GC_RUNTIME_STATE];
-    if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, gc_addr, unwinder->debug_offsets.gc.size, gc_state) < 0) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read GC state");
-        goto error;
-    }
-
-    // Calculate thread status using flags (always)
-    int status_flags = 0;
-
-    // Check GIL status
-    int has_gil = 0;
-    int gil_requested = 0;
-#ifdef Py_GIL_DISABLED
-    int active = GET_MEMBER(_thread_status, ts, unwinder->debug_offsets.thread_state.status).active;
-    has_gil = active;
-#else
-    // Read holds_gil directly from thread state
-    has_gil = GET_MEMBER(int, ts, unwinder->debug_offsets.thread_state.holds_gil);
-
-    // Check if thread is actively requesting the GIL
-    if (unwinder->debug_offsets.thread_state.gil_requested != 0) {
-        gil_requested = GET_MEMBER(int, ts, unwinder->debug_offsets.thread_state.gil_requested);
-    }
-
-    // Set GIL_REQUESTED flag if thread is waiting
-    if (!has_gil && gil_requested) {
-        status_flags |= THREAD_STATUS_GIL_REQUESTED;
-    }
-#endif
-    if (has_gil) {
-        status_flags |= THREAD_STATUS_HAS_GIL;
-    }
-
-    // Assert that we never have both HAS_GIL and GIL_REQUESTED set at the same time
-    // This would indicate a race condition in the GIL state tracking
-    assert(!(has_gil && gil_requested));
-
-    // Check CPU status
-    long pthread_id = GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.thread_id);
-    int cpu_status = get_thread_status(unwinder, tid, pthread_id);
-    if (cpu_status == -1) {
-        status_flags |= THREAD_STATUS_UNKNOWN;
-    } else if (cpu_status == THREAD_STATE_RUNNING) {
-        status_flags |= THREAD_STATUS_ON_CPU;
-    }
-
-    // Check if we should skip this thread based on mode
-    int should_skip = 0;
-    if (unwinder->skip_non_matching_threads) {
-        if (unwinder->mode == PROFILING_MODE_CPU) {
-            // Skip if not on CPU
-            should_skip = !(status_flags & THREAD_STATUS_ON_CPU);
-        } else if (unwinder->mode == PROFILING_MODE_GIL) {
-            // Skip if doesn't have GIL
-            should_skip = !(status_flags & THREAD_STATUS_HAS_GIL);
-        }
-        // PROFILING_MODE_WALL and PROFILING_MODE_ALL never skip
-    }
-
-    if (should_skip) {
-        // Advance to next thread and return NULL to skip processing
-        *current_tstate = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.next);
-        return NULL;
-    }
-
-    uintptr_t frame_addr = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.current_frame);
-
-    frame_info = PyList_New(0);
-    if (!frame_info) {
-        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create frame info list");
-        goto error;
-    }
-
-    if (copy_stack_chunks(unwinder, *current_tstate, &chunks) < 0) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to copy stack chunks");
-        goto error;
-    }
-
-    if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info, gc_frame) < 0) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process frame chain");
-        goto error;
-    }
-
-    *current_tstate = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.next);
-
-    thread_id = PyLong_FromLongLong(tid);
-    if (thread_id == NULL) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create thread ID");
-        goto error;
-    }
-
-    RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder);
-    result = PyStructSequence_New(state->ThreadInfo_Type);
-    if (result == NULL) {
-        set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create ThreadInfo");
-        goto error;
-    }
-
-    // Always use status_flags
-    PyObject *py_status = PyLong_FromLong(status_flags);
-    if (py_status == NULL) {
-        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create thread status");
-        goto error;
-    }
-
-    // py_status contains status flags (bitfield)
-    PyStructSequence_SetItem(result, 0, thread_id);
-    PyStructSequence_SetItem(result, 1, py_status);  // Steals reference
-    PyStructSequence_SetItem(result, 2, frame_info); // Steals reference
-
-    cleanup_stack_chunks(&chunks);
-    return result;
-
-error:
-    Py_XDECREF(frame_info);
-    Py_XDECREF(thread_id);
-    Py_XDECREF(result);
-    cleanup_stack_chunks(&chunks);
-    return NULL;
-}
-
-
-/* ============================================================================
- * REMOTEUNWINDER CLASS IMPLEMENTATION
- * ============================================================================ */
-
-/*[clinic input]
-class _remote_debugging.RemoteUnwinder "RemoteUnwinderObject *" "&RemoteUnwinder_Type"
-[clinic start generated code]*/
-/*[clinic end generated code: output=da39a3ee5e6b4b0d input=55f164d8803318be]*/
-
-/*[clinic input]
-@permit_long_summary
-@permit_long_docstring_body
-_remote_debugging.RemoteUnwinder.__init__
-    pid: int
-    *
-    all_threads: bool = False
-    only_active_thread: bool = False
-    mode: int = 0
-    debug: bool = False
-    skip_non_matching_threads: bool = True
-    native: bool = False
-    gc: bool = False
-
-Initialize a new RemoteUnwinder object for debugging a remote Python process.
-
-Args:
-    pid: Process ID of the target Python process to debug
-    all_threads: If True, initialize state for all threads in the process.
-                If False, only initialize for the main thread.
-    only_active_thread: If True, only sample the thread holding the GIL.
-    mode: Profiling mode: 0=WALL (wall-time), 1=CPU (cpu-time), 2=GIL (gil-time).
-                       Cannot be used together with all_threads=True.
-    debug: If True, chain exceptions to explain the sequence of events that
-           lead to the exception.
-    skip_non_matching_threads: If True, skip threads that don't match the selected mode.
-                              If False, include all threads regardless of mode.
-    native: If True, include artificial "<native>" frames to denote calls to
-            non-Python code.
-    gc: If True, include artificial "<GC>" frames to denote active garbage
-        collection.
-
-The RemoteUnwinder provides functionality to inspect and debug a running Python
-process, including examining thread states, stack frames and other runtime data.
-
-Raises:
-    PermissionError: If access to the target process is denied
-    OSError: If unable to attach to the target process or access its memory
-    RuntimeError: If unable to read debug information from the target process
-    ValueError: If both all_threads and only_active_thread are True
-[clinic start generated code]*/
-
-static int
-_remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
-                                               int pid, int all_threads,
-                                               int only_active_thread,
-                                               int mode, int debug,
-                                               int skip_non_matching_threads,
-                                               int native, int gc)
-/*[clinic end generated code: output=e9eb6b4df119f6e0 input=606d099059207df2]*/
-{
-    // Validate that all_threads and only_active_thread are not both True
-    if (all_threads && only_active_thread) {
-        PyErr_SetString(PyExc_ValueError,
-                       "all_threads and only_active_thread cannot both be True");
-        return -1;
-    }
-
-#ifdef Py_GIL_DISABLED
-    if (only_active_thread) {
-        PyErr_SetString(PyExc_ValueError,
-                       "only_active_thread is not supported when Py_GIL_DISABLED is not defined");
-        return -1;
-    }
-#endif
-
-    self->native = native;
-    self->gc = gc;
-    self->debug = debug;
-    self->only_active_thread = only_active_thread;
-    self->mode = mode;
-    self->skip_non_matching_threads = skip_non_matching_threads;
-    self->cached_state = NULL;
-    if (_Py_RemoteDebug_InitProcHandle(&self->handle, pid) < 0) {
-        set_exception_cause(self, PyExc_RuntimeError, "Failed to initialize process handle");
-        return -1;
-    }
-
-    self->runtime_start_address = _Py_RemoteDebug_GetPyRuntimeAddress(&self->handle);
-    if (self->runtime_start_address == 0) {
-        set_exception_cause(self, PyExc_RuntimeError, "Failed to get Python runtime address");
-        return -1;
-    }
-
-    if (_Py_RemoteDebug_ReadDebugOffsets(&self->handle,
-                                         &self->runtime_start_address,
-                                         &self->debug_offsets) < 0)
-    {
-        set_exception_cause(self, PyExc_RuntimeError, "Failed to read debug offsets");
-        return -1;
-    }
-
-    // Validate that the debug offsets are valid
-    if(validate_debug_offsets(&self->debug_offsets) == -1) {
-        set_exception_cause(self, PyExc_RuntimeError, "Invalid debug offsets found");
-        return -1;
-    }
-
-    // Try to read async debug offsets, but don't fail if they're not available
-    self->async_debug_offsets_available = 1;
-    if (read_async_debug(self) < 0) {
-        PyErr_Clear();
-        memset(&self->async_debug_offsets, 0, sizeof(self->async_debug_offsets));
-        self->async_debug_offsets_available = 0;
-    }
-
-    if (populate_initial_state_data(all_threads, self, self->runtime_start_address,
-                    &self->interpreter_addr ,&self->tstate_addr) < 0)
-    {
-        set_exception_cause(self, PyExc_RuntimeError, "Failed to populate initial state data");
-        return -1;
-    }
-
-    self->code_object_cache = _Py_hashtable_new_full(
-        _Py_hashtable_hash_ptr,
-        _Py_hashtable_compare_direct,
-        NULL,  // keys are stable pointers, don't destroy
-        cached_code_metadata_destroy,
-        NULL
-    );
-    if (self->code_object_cache == NULL) {
-        PyErr_NoMemory();
-        set_exception_cause(self, PyExc_MemoryError, "Failed to create code object cache");
-        return -1;
-    }
-
-#ifdef Py_GIL_DISABLED
-    // Initialize TLBC cache
-    self->tlbc_generation = 0;
-    self->tlbc_cache = _Py_hashtable_new_full(
-        _Py_hashtable_hash_ptr,
-        _Py_hashtable_compare_direct,
-        NULL,  // keys are stable pointers, don't destroy
-        tlbc_cache_entry_destroy,
-        NULL
-    );
-    if (self->tlbc_cache == NULL) {
-        _Py_hashtable_destroy(self->code_object_cache);
-        PyErr_NoMemory();
-        set_exception_cause(self, PyExc_MemoryError, "Failed to create TLBC cache");
-        return -1;
-    }
-#endif
-
-#if defined(__APPLE__)
-    self->thread_id_offset = 0;
-#endif
-
-#ifdef MS_WINDOWS
-    self->win_process_buffer = NULL;
-    self->win_process_buffer_size = 0;
-#endif
-
-    return 0;
-}
-
-/*[clinic input]
-@permit_long_docstring_body
-@critical_section
-_remote_debugging.RemoteUnwinder.get_stack_trace
-
-Returns stack traces for all interpreters and threads in process.
-
-Each element in the returned list is a tuple of (interpreter_id, thread_list), where:
-- interpreter_id is the interpreter identifier
-- thread_list is a list of tuples (thread_id, frame_list) for threads in that interpreter
-  - thread_id is the OS thread identifier
-  - frame_list is a list of tuples (function_name, filename, line_number) representing
-    the Python stack frames for that thread, ordered from most recent to oldest
-
-The threads returned depend on the initialization parameters:
-- If only_active_thread was True: returns only the thread holding the GIL across all interpreters
-- If all_threads was True: returns all threads across all interpreters
-- Otherwise: returns only the main thread of each interpreter
-
-Example:
-    [
-        (0, [  # Main interpreter
-            (1234, [
-                ('process_data', 'worker.py', 127),
-                ('run_worker', 'worker.py', 45),
-                ('main', 'app.py', 23)
-            ]),
-            (1235, [
-                ('handle_request', 'server.py', 89),
-                ('serve_forever', 'server.py', 52)
-            ])
-        ]),
-        (1, [  # Sub-interpreter
-            (1236, [
-                ('sub_worker', 'sub.py', 15)
-            ])
-        ])
-    ]
-
-Raises:
-    RuntimeError: If there is an error copying memory from the target process
-    OSError: If there is an error accessing the target process
-    PermissionError: If access to the target process is denied
-    UnicodeDecodeError: If there is an error decoding strings from the target process
-
-[clinic start generated code]*/
-
-static PyObject *
-_remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self)
-/*[clinic end generated code: output=666192b90c69d567 input=bcff01c73cccc1c0]*/
-{
-    PyObject* result = PyList_New(0);
-    if (!result) {
-        set_exception_cause(self, PyExc_MemoryError, "Failed to create stack trace result list");
-        return NULL;
-    }
-
-    // Iterate over all interpreters
-    uintptr_t current_interpreter = self->interpreter_addr;
-    while (current_interpreter != 0) {
-        // Read interpreter state to get the interpreter ID
-        char interp_state_buffer[INTERP_STATE_BUFFER_SIZE];
-        if (_Py_RemoteDebug_PagedReadRemoteMemory(
-                &self->handle,
-                current_interpreter,
-                INTERP_STATE_BUFFER_SIZE,
-                interp_state_buffer) < 0) {
-            set_exception_cause(self, PyExc_RuntimeError, "Failed to read interpreter state buffer");
-            Py_CLEAR(result);
-            goto exit;
-        }
-
-        uintptr_t gc_frame = 0;
-        if (self->gc) {
-            gc_frame = GET_MEMBER(uintptr_t, interp_state_buffer,
-                                  self->debug_offsets.interpreter_state.gc
-                                  + self->debug_offsets.gc.frame);
-        }
-
-        int64_t interpreter_id = GET_MEMBER(int64_t, interp_state_buffer,
-                self->debug_offsets.interpreter_state.id);
-
-        // Get code object generation from buffer
-        uint64_t code_object_generation = GET_MEMBER(uint64_t, interp_state_buffer,
-                self->debug_offsets.interpreter_state.code_object_generation);
-
-        if (code_object_generation != self->code_object_generation) {
-            self->code_object_generation = code_object_generation;
-            _Py_hashtable_clear(self->code_object_cache);
-        }
-
-#ifdef Py_GIL_DISABLED
-        // Check TLBC generation and invalidate cache if needed
-        uint32_t current_tlbc_generation = GET_MEMBER(uint32_t, interp_state_buffer,
-                                                      self->debug_offsets.interpreter_state.tlbc_generation);
-        if (current_tlbc_generation != self->tlbc_generation) {
-            self->tlbc_generation = current_tlbc_generation;
-            _Py_hashtable_clear(self->tlbc_cache);
-        }
-#endif
-
-        // Create a list to hold threads for this interpreter
-        PyObject *interpreter_threads = PyList_New(0);
-        if (!interpreter_threads) {
-            set_exception_cause(self, PyExc_MemoryError, "Failed to create interpreter threads list");
-            Py_CLEAR(result);
-            goto exit;
-        }
-
-        // Get the GIL holder for this interpreter (needed for GIL_WAIT logic)
-        uintptr_t gil_holder_tstate = 0;
-        int gil_locked = GET_MEMBER(int, interp_state_buffer,
-            self->debug_offsets.interpreter_state.gil_runtime_state_locked);
-        if (gil_locked) {
-            gil_holder_tstate = (uintptr_t)GET_MEMBER(PyThreadState*, interp_state_buffer,
-                self->debug_offsets.interpreter_state.gil_runtime_state_holder);
-        }
-
-        uintptr_t current_tstate;
-        if (self->only_active_thread) {
-            // Find the GIL holder for THIS interpreter
-            if (!gil_locked) {
-                // This interpreter's GIL is not locked, skip it
-                Py_DECREF(interpreter_threads);
-                goto next_interpreter;
-            }
-
-            current_tstate = gil_holder_tstate;
-        } else if (self->tstate_addr == 0) {
-            // Get all threads for this interpreter
-            current_tstate = GET_MEMBER(uintptr_t, interp_state_buffer,
-                    self->debug_offsets.interpreter_state.threads_head);
-        } else {
-            // Target specific thread (only process first interpreter)
-            current_tstate = self->tstate_addr;
-        }
-
-        while (current_tstate != 0) {
-            PyObject* frame_info = unwind_stack_for_thread(self, &current_tstate,
-                                                           gil_holder_tstate,
-                                                           gc_frame);
-            if (!frame_info) {
-                // Check if this was an intentional skip due to mode-based filtering
-                if ((self->mode == PROFILING_MODE_CPU || self->mode == PROFILING_MODE_GIL) && !PyErr_Occurred()) {
-                    // Thread was skipped due to mode filtering, continue to next thread
-                    continue;
-                }
-                // This was an actual error
-                Py_DECREF(interpreter_threads);
-                set_exception_cause(self, PyExc_RuntimeError, "Failed to unwind stack for thread");
-                Py_CLEAR(result);
-                goto exit;
-            }
-
-            if (PyList_Append(interpreter_threads, frame_info) == -1) {
-                Py_DECREF(frame_info);
-                Py_DECREF(interpreter_threads);
-                set_exception_cause(self, PyExc_RuntimeError, "Failed to append thread frame info");
-                Py_CLEAR(result);
-                goto exit;
-            }
-            Py_DECREF(frame_info);
-
-            // If targeting specific thread or only active thread, process just one
-            if (self->tstate_addr || self->only_active_thread) {
-                break;
-            }
-        }
-
-        // Create the InterpreterInfo StructSequence
-        RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)self);
-        PyObject *interpreter_info = PyStructSequence_New(state->InterpreterInfo_Type);
-        if (!interpreter_info) {
-            Py_DECREF(interpreter_threads);
-            set_exception_cause(self, PyExc_MemoryError, "Failed to create InterpreterInfo");
-            Py_CLEAR(result);
-            goto exit;
-        }
-
-        PyObject *interp_id = PyLong_FromLongLong(interpreter_id);
-        if (!interp_id) {
-            Py_DECREF(interpreter_threads);
-            Py_DECREF(interpreter_info);
-            set_exception_cause(self, PyExc_MemoryError, "Failed to create interpreter ID");
-            Py_CLEAR(result);
-            goto exit;
-        }
-
-        PyStructSequence_SetItem(interpreter_info, 0, interp_id);  // steals reference
-        PyStructSequence_SetItem(interpreter_info, 1, interpreter_threads);  // steals reference
-
-        // Add this interpreter to the result list
-        if (PyList_Append(result, interpreter_info) == -1) {
-            Py_DECREF(interpreter_info);
-            set_exception_cause(self, PyExc_RuntimeError, "Failed to append interpreter info");
-            Py_CLEAR(result);
-            goto exit;
-        }
-        Py_DECREF(interpreter_info);
-
-next_interpreter:
-
-        // Get the next interpreter address
-        current_interpreter = GET_MEMBER(uintptr_t, interp_state_buffer,
-                self->debug_offsets.interpreter_state.next);
-
-        // If we're targeting a specific thread, stop after first interpreter
-        if (self->tstate_addr != 0) {
-            break;
-        }
-    }
-
-exit:
-   _Py_RemoteDebug_ClearCache(&self->handle);
-    return result;
-}
-
-/*[clinic input]
-@permit_long_summary
-@permit_long_docstring_body
-@critical_section
-_remote_debugging.RemoteUnwinder.get_all_awaited_by
-
-Get all tasks and their awaited_by relationships from the remote process.
-
-This provides a tree structure showing which tasks are waiting for other tasks.
-
-For each task, returns:
-1. The call stack frames leading to where the task is currently executing
-2. The name of the task
-3. A list of tasks that this task is waiting for, with their own frames/names/etc
-
-Returns a list of [frames, task_name, subtasks] where:
-- frames: List of (func_name, filename, lineno) showing the call stack
-- task_name: String identifier for the task
-- subtasks: List of tasks being awaited by this task, in same format
-
-Raises:
-    RuntimeError: If AsyncioDebug section is not available in the remote process
-    MemoryError: If memory allocation fails
-    OSError: If reading from the remote process fails
-
-Example output:
-[
-    # Task c2_root waiting for two subtasks
-    [
-        # Call stack of c2_root
-        [("c5", "script.py", 10), ("c4", "script.py", 14)],
-        "c2_root",
-        [
-            # First subtask (sub_main_2) and what it's waiting for
-            [
-                [("c1", "script.py", 23)],
-                "sub_main_2",
-                [...]
-            ],
-            # Second subtask and its waiters
-            [...]
-        ]
-    ]
-]
-[clinic start generated code]*/
-
-static PyObject *
-_remote_debugging_RemoteUnwinder_get_all_awaited_by_impl(RemoteUnwinderObject *self)
-/*[clinic end generated code: output=6a49cd345e8aec53 input=307f754cbe38250c]*/
-{
-    if (!self->async_debug_offsets_available) {
-        PyErr_SetString(PyExc_RuntimeError, "AsyncioDebug section not available");
-        set_exception_cause(self, PyExc_RuntimeError, "AsyncioDebug section unavailable in get_all_awaited_by");
-        return NULL;
-    }
-
-    PyObject *result = PyList_New(0);
-    if (result == NULL) {
-        set_exception_cause(self, PyExc_MemoryError, "Failed to create awaited_by result list");
-        goto result_err;
-    }
-
-    // Process all threads
-    if (iterate_threads(self, process_thread_for_awaited_by, result) < 0) {
-        goto result_err;
-    }
-
-    uintptr_t head_addr = self->interpreter_addr
-        + (uintptr_t)self->async_debug_offsets.asyncio_interpreter_state.asyncio_tasks_head;
-
-    // On top of a per-thread task lists used by default by asyncio to avoid
-    // contention, there is also a fallback per-interpreter list of tasks;
-    // any tasks still pending when a thread is destroyed will be moved to the
-    // per-interpreter task list.  It's unlikely we'll find anything here, but
-    // interesting for debugging.
-    if (append_awaited_by(self, 0, head_addr, result))
-    {
-        set_exception_cause(self, PyExc_RuntimeError, "Failed to append interpreter awaited_by in get_all_awaited_by");
-        goto result_err;
-    }
-
-    _Py_RemoteDebug_ClearCache(&self->handle);
-    return result;
-
-result_err:
-    _Py_RemoteDebug_ClearCache(&self->handle);
-    Py_XDECREF(result);
-    return NULL;
-}
-
-/*[clinic input]
-@permit_long_summary
-@permit_long_docstring_body
-@critical_section
-_remote_debugging.RemoteUnwinder.get_async_stack_trace
-
-Get the currently running async tasks and their dependency graphs from the remote process.
-
-This returns information about running tasks and all tasks that are waiting for them,
-forming a complete dependency graph for each thread's active task.
-
-For each thread with a running task, returns the running task plus all tasks that
-transitively depend on it (tasks waiting for the running task, tasks waiting for
-those tasks, etc.).
-
-Returns a list of per-thread results, where each thread result contains:
-- Thread ID
-- List of task information for the running task and all its waiters
-
-Each task info contains:
-- Task ID (memory address)
-- Task name
-- Call stack frames: List of (func_name, filename, lineno)
-- List of tasks waiting for this task (recursive structure)
-
-Raises:
-    RuntimeError: If AsyncioDebug section is not available in the target process
-    MemoryError: If memory allocation fails
-    OSError: If reading from the remote process fails
-
-Example output (similar structure to get_all_awaited_by but only for running tasks):
-[
-    # Thread 140234 results
-    (140234, [
-        # Running task and its complete waiter dependency graph
-        (4345585712, 'main_task',
-         [("run_server", "server.py", 127), ("main", "app.py", 23)],
-         [
-             # Tasks waiting for main_task
-             (4345585800, 'worker_1', [...], [...]),
-             (4345585900, 'worker_2', [...], [...])
-         ])
-    ])
-]
-
-[clinic start generated code]*/
-
-static PyObject *
-_remote_debugging_RemoteUnwinder_get_async_stack_trace_impl(RemoteUnwinderObject *self)
-/*[clinic end generated code: output=6433d52b55e87bbe input=6129b7d509a887c9]*/
-{
-    if (!self->async_debug_offsets_available) {
-        PyErr_SetString(PyExc_RuntimeError, "AsyncioDebug section not available");
-        set_exception_cause(self, PyExc_RuntimeError, "AsyncioDebug section unavailable in get_async_stack_trace");
-        return NULL;
-    }
-
-    PyObject *result = PyList_New(0);
-    if (result == NULL) {
-        set_exception_cause(self, PyExc_MemoryError, "Failed to create result list in get_async_stack_trace");
-        return NULL;
-    }
-
-    // Process all threads
-    if (iterate_threads(self, process_thread_for_async_stack_trace, result) < 0) {
-        goto result_err;
-    }
-
-    _Py_RemoteDebug_ClearCache(&self->handle);
-    return result;
-result_err:
-    _Py_RemoteDebug_ClearCache(&self->handle);
-    Py_XDECREF(result);
-    return NULL;
-}
-
-static PyMethodDef RemoteUnwinder_methods[] = {
-    _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_STACK_TRACE_METHODDEF
-    _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_ALL_AWAITED_BY_METHODDEF
-    _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_ASYNC_STACK_TRACE_METHODDEF
-    {NULL, NULL}
-};
-
-static void
-RemoteUnwinder_dealloc(PyObject *op)
-{
-    RemoteUnwinderObject *self = RemoteUnwinder_CAST(op);
-    PyTypeObject *tp = Py_TYPE(self);
-    if (self->code_object_cache) {
-        _Py_hashtable_destroy(self->code_object_cache);
-    }
-#ifdef MS_WINDOWS
-    if(self->win_process_buffer != NULL) {
-        PyMem_Free(self->win_process_buffer);
-    }
-#endif
-
-#ifdef Py_GIL_DISABLED
-    if (self->tlbc_cache) {
-        _Py_hashtable_destroy(self->tlbc_cache);
-    }
-#endif
-    if (self->handle.pid != 0) {
-        _Py_RemoteDebug_ClearCache(&self->handle);
-        _Py_RemoteDebug_CleanupProcHandle(&self->handle);
-    }
-    PyObject_Del(self);
-    Py_DECREF(tp);
-}
-
-static PyType_Slot RemoteUnwinder_slots[] = {
-    {Py_tp_doc, (void *)"RemoteUnwinder(pid): Inspect stack of a remote Python process."},
-    {Py_tp_methods, RemoteUnwinder_methods},
-    {Py_tp_init, _remote_debugging_RemoteUnwinder___init__},
-    {Py_tp_dealloc, RemoteUnwinder_dealloc},
-    {0, NULL}
-};
-
-static PyType_Spec RemoteUnwinder_spec = {
-    .name = "_remote_debugging.RemoteUnwinder",
-    .basicsize = sizeof(RemoteUnwinderObject),
-    .flags = (
-        Py_TPFLAGS_DEFAULT
-        | Py_TPFLAGS_IMMUTABLETYPE
-    ),
-    .slots = RemoteUnwinder_slots,
-};
-
-/* ============================================================================
- * MODULE INITIALIZATION
- * ============================================================================ */
-
-static int
-_remote_debugging_exec(PyObject *m)
-{
-    RemoteDebuggingState *st = RemoteDebugging_GetState(m);
-#define CREATE_TYPE(mod, type, spec)                                        \
-    do {                                                                    \
-        type = (PyTypeObject *)PyType_FromMetaclass(NULL, mod, spec, NULL); \
-        if (type == NULL) {                                                 \
-            return -1;                                                      \
-        }                                                                   \
-    } while (0)
-
-    CREATE_TYPE(m, st->RemoteDebugging_Type, &RemoteUnwinder_spec);
-
-    if (PyModule_AddType(m, st->RemoteDebugging_Type) < 0) {
-        return -1;
-    }
-
-    // Initialize structseq types
-    st->TaskInfo_Type = PyStructSequence_NewType(&TaskInfo_desc);
-    if (st->TaskInfo_Type == NULL) {
-        return -1;
-    }
-    if (PyModule_AddType(m, st->TaskInfo_Type) < 0) {
-        return -1;
-    }
-
-    st->FrameInfo_Type = PyStructSequence_NewType(&FrameInfo_desc);
-    if (st->FrameInfo_Type == NULL) {
-        return -1;
-    }
-    if (PyModule_AddType(m, st->FrameInfo_Type) < 0) {
-        return -1;
-    }
-
-    st->CoroInfo_Type = PyStructSequence_NewType(&CoroInfo_desc);
-    if (st->CoroInfo_Type == NULL) {
-        return -1;
-    }
-    if (PyModule_AddType(m, st->CoroInfo_Type) < 0) {
-        return -1;
-    }
-
-    st->ThreadInfo_Type = PyStructSequence_NewType(&ThreadInfo_desc);
-    if (st->ThreadInfo_Type == NULL) {
-        return -1;
-    }
-    if (PyModule_AddType(m, st->ThreadInfo_Type) < 0) {
-        return -1;
-    }
-
-    st->InterpreterInfo_Type = PyStructSequence_NewType(&InterpreterInfo_desc);
-    if (st->InterpreterInfo_Type == NULL) {
-        return -1;
-    }
-    if (PyModule_AddType(m, st->InterpreterInfo_Type) < 0) {
-        return -1;
-    }
-
-    st->AwaitedInfo_Type = PyStructSequence_NewType(&AwaitedInfo_desc);
-    if (st->AwaitedInfo_Type == NULL) {
-        return -1;
-    }
-    if (PyModule_AddType(m, st->AwaitedInfo_Type) < 0) {
-        return -1;
-    }
-#ifdef Py_GIL_DISABLED
-    PyUnstable_Module_SetGIL(m, Py_MOD_GIL_NOT_USED);
-#endif
-    int rc = PyModule_AddIntConstant(m, "PROCESS_VM_READV_SUPPORTED", HAVE_PROCESS_VM_READV);
-    if (rc < 0) {
-        return -1;
-    }
-
-    // Add thread status flag constants
-    if (PyModule_AddIntConstant(m, "THREAD_STATUS_HAS_GIL", THREAD_STATUS_HAS_GIL) < 0) {
-        return -1;
-    }
-    if (PyModule_AddIntConstant(m, "THREAD_STATUS_ON_CPU", THREAD_STATUS_ON_CPU) < 0) {
-        return -1;
-    }
-    if (PyModule_AddIntConstant(m, "THREAD_STATUS_UNKNOWN", THREAD_STATUS_UNKNOWN) < 0) {
-        return -1;
-    }
-    if (PyModule_AddIntConstant(m, "THREAD_STATUS_GIL_REQUESTED", THREAD_STATUS_GIL_REQUESTED) < 0) {
-        return -1;
-    }
-
-    if (RemoteDebugging_InitState(st) < 0) {
-        return -1;
-    }
-    return 0;
-}
-
-static int
-remote_debugging_traverse(PyObject *mod, visitproc visit, void *arg)
-{
-    RemoteDebuggingState *state = RemoteDebugging_GetState(mod);
-    Py_VISIT(state->RemoteDebugging_Type);
-    Py_VISIT(state->TaskInfo_Type);
-    Py_VISIT(state->FrameInfo_Type);
-    Py_VISIT(state->CoroInfo_Type);
-    Py_VISIT(state->ThreadInfo_Type);
-    Py_VISIT(state->InterpreterInfo_Type);
-    Py_VISIT(state->AwaitedInfo_Type);
-    return 0;
-}
-
-static int
-remote_debugging_clear(PyObject *mod)
-{
-    RemoteDebuggingState *state = RemoteDebugging_GetState(mod);
-    Py_CLEAR(state->RemoteDebugging_Type);
-    Py_CLEAR(state->TaskInfo_Type);
-    Py_CLEAR(state->FrameInfo_Type);
-    Py_CLEAR(state->CoroInfo_Type);
-    Py_CLEAR(state->ThreadInfo_Type);
-    Py_CLEAR(state->InterpreterInfo_Type);
-    Py_CLEAR(state->AwaitedInfo_Type);
-    return 0;
-}
-
-static void
-remote_debugging_free(void *mod)
-{
-    (void)remote_debugging_clear((PyObject *)mod);
-}
-
-static PyModuleDef_Slot remote_debugging_slots[] = {
-    {Py_mod_exec, _remote_debugging_exec},
-    {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
-    {Py_mod_gil, Py_MOD_GIL_NOT_USED},
-    {0, NULL},
-};
-
-static PyMethodDef remote_debugging_methods[] = {
-    {NULL, NULL, 0, NULL},
-};
-
-static struct PyModuleDef remote_debugging_module = {
-    PyModuleDef_HEAD_INIT,
-    .m_name = "_remote_debugging",
-    .m_size = sizeof(RemoteDebuggingState),
-    .m_methods = remote_debugging_methods,
-    .m_slots = remote_debugging_slots,
-    .m_traverse = remote_debugging_traverse,
-    .m_clear = remote_debugging_clear,
-    .m_free = remote_debugging_free,
-};
-
-PyMODINIT_FUNC
-PyInit__remote_debugging(void)
-{
-    return PyModuleDef_Init(&remote_debugging_module);
-}
diff --git a/Modules/_sqlite/module.c b/Modules/_sqlite/module.c
index 5464fd1227a..831dd9219f7 100644
--- a/Modules/_sqlite/module.c
+++ b/Modules/_sqlite/module.c
@@ -774,7 +774,6 @@ module_exec(PyObject *module)
     return 0;
 
 error:
-    sqlite3_shutdown();
     return -1;
 }
 
diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c
index 4e97101b699..59ff9078e6c 100644
--- a/Modules/_sre/sre.c
+++ b/Modules/_sre/sre.c
@@ -2841,20 +2841,25 @@ scanner_dealloc(PyObject *self)
 static int
 scanner_begin(ScannerObject* self)
 {
-    if (self->executing) {
+#ifdef Py_GIL_DISABLED
+    int was_executing = _Py_atomic_exchange_int(&self->executing, 1);
+#else
+    int was_executing = self->executing;
+    self->executing = 1;
+#endif
+    if (was_executing) {
         PyErr_SetString(PyExc_ValueError,
                         "regular expression scanner already executing");
         return 0;
     }
-    self->executing = 1;
     return 1;
 }
 
 static void
 scanner_end(ScannerObject* self)
 {
-    assert(self->executing);
-    self->executing = 0;
+    assert(FT_ATOMIC_LOAD_INT_RELAXED(self->executing));
+    FT_ATOMIC_STORE_INT(self->executing, 0);
 }
 
 /*[clinic input]
diff --git a/Modules/_ssl.c b/Modules/_ssl.c
index 4b75e455f40..25fcea6aaf1 100644
--- a/Modules/_ssl.c
+++ b/Modules/_ssl.c
@@ -1437,14 +1437,14 @@ _get_peer_alt_names (_sslmodulestate *state, X509 *certificate) {
                 }
                 PyTuple_SET_ITEM(t, 0, v);
 
-                if (name->d.ip->length == 4) {
-                    unsigned char *p = name->d.ip->data;
+                if (ASN1_STRING_length(name->d.ip) == 4) {
+                    const unsigned char *p = ASN1_STRING_get0_data(name->d.ip);
                     v = PyUnicode_FromFormat(
                         "%d.%d.%d.%d",
                         p[0], p[1], p[2], p[3]
                     );
-                } else if (name->d.ip->length == 16) {
-                    unsigned char *p = name->d.ip->data;
+                } else if (ASN1_STRING_length(name->d.ip) == 16) {
+                    const unsigned char *p = ASN1_STRING_get0_data(name->d.ip);
                     v = PyUnicode_FromFormat(
                         "%X:%X:%X:%X:%X:%X:%X:%X",
                         p[0] << 8 | p[1],
@@ -1575,8 +1575,9 @@ _get_aia_uri(X509 *certificate, int nid) {
             continue;
         }
         uri = ad->location->d.uniformResourceIdentifier;
-        ostr = PyUnicode_FromStringAndSize((char *)uri->data,
-                                           uri->length);
+        ostr = PyUnicode_FromStringAndSize(
+                   (const char *)ASN1_STRING_get0_data(uri),
+                   ASN1_STRING_length(uri));
         if (ostr == NULL) {
             goto fail;
         }
@@ -1642,8 +1643,9 @@ _get_crl_dp(X509 *certificate) {
                 continue;
             }
             uri = gn->d.uniformResourceIdentifier;
-            ouri = PyUnicode_FromStringAndSize((char *)uri->data,
-                                               uri->length);
+            ouri = PyUnicode_FromStringAndSize(
+                       (const char *)ASN1_STRING_get0_data(uri),
+                       ASN1_STRING_length(uri));
             if (ouri == NULL)
                 goto done;
 
diff --git a/Modules/_testcapi/module.c b/Modules/_testcapi/module.c
index 9349445351e..7b5861bc08e 100644
--- a/Modules/_testcapi/module.c
+++ b/Modules/_testcapi/module.c
@@ -27,6 +27,8 @@ module_from_slots_name(PyObject *self, PyObject *spec)
 {
     PyModuleDef_Slot slots[] = {
         {Py_mod_name, "currently ignored..."},
+        {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
+        {Py_mod_gil, Py_MOD_GIL_NOT_USED},
         {0},
     };
     return PyModule_FromSlotsAndSpec(slots, spec);
@@ -37,6 +39,8 @@ module_from_slots_doc(PyObject *self, PyObject *spec)
 {
     PyModuleDef_Slot slots[] = {
         {Py_mod_doc, "the docstring"},
+        {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
+        {Py_mod_gil, Py_MOD_GIL_NOT_USED},
         {0},
     };
     return PyModule_FromSlotsAndSpec(slots, spec);
@@ -47,6 +51,8 @@ module_from_slots_size(PyObject *self, PyObject *spec)
 {
     PyModuleDef_Slot slots[] = {
         {Py_mod_state_size, (void*)123},
+        {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
+        {Py_mod_gil, Py_MOD_GIL_NOT_USED},
         {0},
     };
     PyObject *mod = PyModule_FromSlotsAndSpec(slots, spec);
@@ -72,6 +78,8 @@ module_from_slots_methods(PyObject *self, PyObject *spec)
 {
     PyModuleDef_Slot slots[] = {
         {Py_mod_methods, a_methoddef_array},
+        {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
+        {Py_mod_gil, Py_MOD_GIL_NOT_USED},
         {0},
     };
     return PyModule_FromSlotsAndSpec(slots, spec);
@@ -90,6 +98,8 @@ module_from_slots_gc(PyObject *self, PyObject *spec)
         {Py_mod_state_traverse, noop_traverse},
         {Py_mod_state_clear, noop_clear},
         {Py_mod_state_free, noop_free},
+        {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
+        {Py_mod_gil, Py_MOD_GIL_NOT_USED},
         {0},
     };
     PyObject *mod = PyModule_FromSlotsAndSpec(slots, spec);
@@ -118,6 +128,8 @@ module_from_slots_token(PyObject *self, PyObject *spec)
 {
     PyModuleDef_Slot slots[] = {
         {Py_mod_token, (void*)&test_token},
+        {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
+        {Py_mod_gil, Py_MOD_GIL_NOT_USED},
         {0},
     };
     PyObject *mod = PyModule_FromSlotsAndSpec(slots, spec);
@@ -144,6 +156,8 @@ module_from_slots_exec(PyObject *self, PyObject *spec)
 {
     PyModuleDef_Slot slots[] = {
         {Py_mod_exec, simple_exec},
+        {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
+        {Py_mod_gil, Py_MOD_GIL_NOT_USED},
         {0},
     };
     PyObject *mod = PyModule_FromSlotsAndSpec(slots, spec);
@@ -175,6 +189,8 @@ module_from_slots_create(PyObject *self, PyObject *spec)
 {
     PyModuleDef_Slot slots[] = {
         {Py_mod_create, create_attr_from_spec},
+        {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
+        {Py_mod_gil, Py_MOD_GIL_NOT_USED},
         {0},
     };
     return PyModule_FromSlotsAndSpec(slots, spec);
@@ -205,6 +221,8 @@ module_from_slots_repeat_slot(PyObject *self, PyObject *spec)
     PyModuleDef_Slot slots[] = {
         {slot_id, "anything"},
         {slot_id, "anything else"},
+        {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
+        {Py_mod_gil, Py_MOD_GIL_NOT_USED},
         {0},
     };
     return PyModule_FromSlotsAndSpec(slots, spec);
@@ -219,6 +237,8 @@ module_from_slots_null_slot(PyObject *self, PyObject *spec)
     }
     PyModuleDef_Slot slots[] = {
         {slot_id, NULL},
+        {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
+        {Py_mod_gil, Py_MOD_GIL_NOT_USED},
         {0},
     };
     return PyModule_FromSlotsAndSpec(slots, spec);
@@ -233,6 +253,8 @@ module_from_def_slot(PyObject *self, PyObject *spec)
     }
     PyModuleDef_Slot slots[] = {
         {slot_id, "anything"},
+        {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
+        {Py_mod_gil, Py_MOD_GIL_NOT_USED},
         {0},
     };
     PyModuleDef def = {
@@ -280,6 +302,7 @@ module_from_def_multiple_exec(PyObject *self, PyObject *spec)
     static PyModuleDef_Slot slots[] = {
         {Py_mod_exec, simple_exec},
         {Py_mod_exec, another_exec},
+        {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
         {Py_mod_gil, Py_MOD_GIL_NOT_USED},
         {0},
     };
diff --git a/Modules/_testcapi/object.c b/Modules/_testcapi/object.c
index 798ef97c495..4c9632c07a9 100644
--- a/Modules/_testcapi/object.c
+++ b/Modules/_testcapi/object.c
@@ -485,6 +485,30 @@ is_uniquely_referenced(PyObject *self, PyObject *op)
 }
 
 
+static PyObject *
+pyobject_dump(PyObject *self, PyObject *args)
+{
+    PyObject *op;
+    int release_gil = 0;
+
+    if (!PyArg_ParseTuple(args, "O|i", &op, &release_gil)) {
+        return NULL;
+    }
+    NULLABLE(op);
+
+    if (release_gil) {
+        Py_BEGIN_ALLOW_THREADS
+        PyUnstable_Object_Dump(op);
+        Py_END_ALLOW_THREADS
+
+    }
+    else {
+        PyUnstable_Object_Dump(op);
+    }
+    Py_RETURN_NONE;
+}
+
+
 static PyMethodDef test_methods[] = {
     {"call_pyobject_print", call_pyobject_print, METH_VARARGS},
     {"pyobject_print_null", pyobject_print_null, METH_VARARGS},
@@ -511,6 +535,7 @@ static PyMethodDef test_methods[] = {
     {"test_py_is_funcs", test_py_is_funcs, METH_NOARGS},
     {"clear_managed_dict", clear_managed_dict, METH_O, NULL},
     {"is_uniquely_referenced", is_uniquely_referenced, METH_O},
+    {"pyobject_dump", pyobject_dump, METH_VARARGS},
     {NULL},
 };
 
diff --git a/Modules/_testmultiphase.c b/Modules/_testmultiphase.c
index cd2d7b65598..e286eaae820 100644
--- a/Modules/_testmultiphase.c
+++ b/Modules/_testmultiphase.c
@@ -1024,6 +1024,20 @@ PyModExport__test_from_modexport(void)
 {
     static PyModuleDef_Slot slots[] = {
         {Py_mod_name, "_test_from_modexport"},
+        {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
+        {Py_mod_gil, Py_MOD_GIL_NOT_USED},
+        {0},
+    };
+    return slots;
+}
+
+PyMODEXPORT_FUNC
+PyModExport__test_from_modexport_gil_used(void)
+{
+    static PyModuleDef_Slot slots[] = {
+        {Py_mod_name, "_test_from_modexport_gil_used"},
+        {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
+        {Py_mod_gil, Py_MOD_GIL_USED},
         {0},
     };
     return slots;
@@ -1073,6 +1087,21 @@ PyModExport__test_from_modexport_create_nonmodule(void)
     static PyModuleDef_Slot slots[] = {
         {Py_mod_name, "_test_from_modexport_create_nonmodule"},
         {Py_mod_create, modexport_create_string},
+        {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
+        {Py_mod_gil, Py_MOD_GIL_NOT_USED},
+        {0},
+    };
+    return slots;
+}
+
+PyMODEXPORT_FUNC
+PyModExport__test_from_modexport_create_nonmodule_gil_used(void)
+{
+    static PyModuleDef_Slot slots[] = {
+        {Py_mod_name, "_test_from_modexport_create_nonmodule"},
+        {Py_mod_create, modexport_create_string},
+        {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
+        {Py_mod_gil, Py_MOD_GIL_USED},
         {0},
     };
     return slots;
@@ -1165,6 +1194,8 @@ PyModExport__test_from_modexport_smoke(void)
         {Py_mod_methods, methods},
         {Py_mod_state_free, modexport_smoke_free},
         {Py_mod_token, (void*)&modexport_smoke_test_token},
+        {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
+        {Py_mod_gil, Py_MOD_GIL_NOT_USED},
         {0},
     };
     return slots;
diff --git a/Modules/clinic/readline.c.h b/Modules/clinic/readline.c.h
index 696475f7d00..dc9381e4b97 100644
--- a/Modules/clinic/readline.c.h
+++ b/Modules/clinic/readline.c.h
@@ -349,6 +349,28 @@ exit:
 
 #endif /* defined(HAVE_RL_PRE_INPUT_HOOK) */
 
+#if defined(HAVE_RL_PRE_INPUT_HOOK)
+
+PyDoc_STRVAR(readline_get_pre_input_hook__doc__,
+"get_pre_input_hook($module, /)\n"
+"--\n"
+"\n"
+"Get the current pre-input hook function.");
+
+#define READLINE_GET_PRE_INPUT_HOOK_METHODDEF    \
+    {"get_pre_input_hook", (PyCFunction)readline_get_pre_input_hook, METH_NOARGS, readline_get_pre_input_hook__doc__},
+
+static PyObject *
+readline_get_pre_input_hook_impl(PyObject *module);
+
+static PyObject *
+readline_get_pre_input_hook(PyObject *module, PyObject *Py_UNUSED(ignored))
+{
+    return readline_get_pre_input_hook_impl(module);
+}
+
+#endif /* defined(HAVE_RL_PRE_INPUT_HOOK) */
+
 PyDoc_STRVAR(readline_get_completion_type__doc__,
 "get_completion_type($module, /)\n"
 "--\n"
@@ -794,7 +816,11 @@ readline_redisplay(PyObject *module, PyObject *Py_UNUSED(ignored))
     #define READLINE_SET_PRE_INPUT_HOOK_METHODDEF
 #endif /* !defined(READLINE_SET_PRE_INPUT_HOOK_METHODDEF) */
 
+#ifndef READLINE_GET_PRE_INPUT_HOOK_METHODDEF
+    #define READLINE_GET_PRE_INPUT_HOOK_METHODDEF
+#endif /* !defined(READLINE_GET_PRE_INPUT_HOOK_METHODDEF) */
+
 #ifndef READLINE_CLEAR_HISTORY_METHODDEF
     #define READLINE_CLEAR_HISTORY_METHODDEF
 #endif /* !defined(READLINE_CLEAR_HISTORY_METHODDEF) */
-/*[clinic end generated code: output=88d9812b6caa2102 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=4bd95070973cd0e2 input=a9049054013a1b77]*/
diff --git a/Modules/cmathmodule.c b/Modules/cmathmodule.c
index aee3e4f343d..65fbcf5cdaa 100644
--- a/Modules/cmathmodule.c
+++ b/Modules/cmathmodule.c
@@ -163,8 +163,15 @@ special_type(double d)
    raised.
 */
 
-static Py_complex acos_special_values[7][7];
-
+static Py_complex acos_special_values[7][7] = {
+  { {P34,INF}, {P,INF},  {P,INF},  {P,-INF},  {P,-INF},  {P34,-INF}, {N,INF} },
+  { {P12,INF}, {U,U},    {U,U},    {U,U},     {U,U},     {P12,-INF}, {N,N} },
+  { {P12,INF}, {U,U},    {P12,0.}, {P12,-0.}, {U,U},     {P12,-INF}, {P12,N} },
+  { {P12,INF}, {U,U},    {P12,0.}, {P12,-0.}, {U,U},     {P12,-INF}, {P12,N} },
+  { {P12,INF}, {U,U},    {U,U},    {U,U},     {U,U},     {P12,-INF}, {N,N} },
+  { {P14,INF}, {0.,INF}, {0.,INF}, {0.,-INF}, {0.,-INF}, {P14,-INF}, {N,INF} },
+  { {N,INF},   {N,N},    {N,N},    {N,N},     {N,N},     {N,-INF},   {N,N} }
+};
 /*[clinic input]
 cmath.acos -> Py_complex_protected
 
@@ -202,7 +209,15 @@ cmath_acos_impl(PyObject *module, Py_complex z)
 }
 
 
-static Py_complex acosh_special_values[7][7];
+static Py_complex acosh_special_values[7][7] = {
+  { {INF,-P34}, {INF,-P},  {INF,-P},  {INF,P},  {INF,P},  {INF,P34}, {INF,N} },
+  { {INF,-P12}, {U,U},     {U,U},     {U,U},    {U,U},    {INF,P12}, {N,N} },
+  { {INF,-P12}, {U,U},     {0.,-P12}, {0.,P12}, {U,U},    {INF,P12}, {N,P12} },
+  { {INF,-P12}, {U,U},     {0.,-P12}, {0.,P12}, {U,U},    {INF,P12}, {N,P12} },
+  { {INF,-P12}, {U,U},     {U,U},     {U,U},    {U,U},    {INF,P12}, {N,N} },
+  { {INF,-P14}, {INF,-0.}, {INF,-0.}, {INF,0.}, {INF,0.}, {INF,P14}, {INF,N} },
+  { {INF,N},    {N,N},     {N,N},     {N,N},    {N,N},    {INF,N},   {N,N} }
+};
 
 /*[clinic input]
 cmath.acosh = cmath.acos
@@ -257,7 +272,15 @@ cmath_asin_impl(PyObject *module, Py_complex z)
 }
 
 
-static Py_complex asinh_special_values[7][7];
+static Py_complex asinh_special_values[7][7] = {
+  { {-INF,-P14}, {-INF,-0.}, {-INF,-0.}, {-INF,0.}, {-INF,0.}, {-INF,P14}, {-INF,N} },
+  { {-INF,-P12}, {U,U},      {U,U},      {U,U},     {U,U},     {-INF,P12}, {N,N} },
+  { {-INF,-P12}, {U,U},      {-0.,-0.},  {-0.,0.},  {U,U},     {-INF,P12}, {N,N} },
+  { {INF,-P12},  {U,U},      {0.,-0.},   {0.,0.},   {U,U},     {INF,P12},  {N,N} },
+  { {INF,-P12},  {U,U},      {U,U},      {U,U},     {U,U},     {INF,P12},  {N,N} },
+  { {INF,-P14},  {INF,-0.},  {INF,-0.},  {INF,0.},  {INF,0.},  {INF,P14},  {INF,N} },
+  { {INF,N},     {N,N},      {N,-0.},    {N,0.},    {N,N},     {INF,N},    {N,N} }
+};
 
 /*[clinic input]
 cmath.asinh = cmath.acos
@@ -318,7 +341,15 @@ cmath_atan_impl(PyObject *module, Py_complex z)
 }
 
 
-static Py_complex atanh_special_values[7][7];
+static Py_complex atanh_special_values[7][7] = {
+  { {-0.,-P12}, {-0.,-P12}, {-0.,-P12}, {-0.,P12}, {-0.,P12}, {-0.,P12}, {-0.,N} },
+  { {-0.,-P12}, {U,U},      {U,U},      {U,U},     {U,U},     {-0.,P12}, {N,N} },
+  { {-0.,-P12}, {U,U},      {-0.,-0.},  {-0.,0.},  {U,U},     {-0.,P12}, {-0.,N} },
+  { {0.,-P12},  {U,U},      {0.,-0.},   {0.,0.},   {U,U},     {0.,P12},  {0.,N} },
+  { {0.,-P12},  {U,U},      {U,U},      {U,U},     {U,U},     {0.,P12},  {N,N} },
+  { {0.,-P12},  {0.,-P12},  {0.,-P12},  {0.,P12},  {0.,P12},  {0.,P12},  {0.,N} },
+  { {0.,-P12},  {N,N},      {N,N},      {N,N},     {N,N},     {0.,P12},  {N,N} }
+};
 
 /*[clinic input]
 cmath.atanh = cmath.acos
@@ -391,7 +422,15 @@ cmath_cos_impl(PyObject *module, Py_complex z)
 
 
 /* cosh(infinity + i*y) needs to be dealt with specially */
-static Py_complex cosh_special_values[7][7];
+static Py_complex cosh_special_values[7][7] = {
+  { {INF,N}, {U,U}, {INF,0.},  {INF,-0.}, {U,U}, {INF,N}, {INF,N} },
+  { {N,N},   {U,U}, {U,U},     {U,U},     {U,U}, {N,N},   {N,N} },
+  { {N,0.},  {U,U}, {1.,0.},   {1.,-0.},  {U,U}, {N,0.},  {N,0.} },
+  { {N,0.},  {U,U}, {1.,-0.},  {1.,0.},   {U,U}, {N,0.},  {N,0.} },
+  { {N,N},   {U,U}, {U,U},     {U,U},     {U,U}, {N,N},   {N,N} },
+  { {INF,N}, {U,U}, {INF,-0.}, {INF,0.},  {U,U}, {INF,N}, {INF,N} },
+  { {N,N},   {N,N}, {N,0.},    {N,0.},    {N,N}, {N,N},   {N,N} }
+};
 
 /*[clinic input]
 cmath.cosh = cmath.acos
@@ -453,7 +492,15 @@ cmath_cosh_impl(PyObject *module, Py_complex z)
 
 /* exp(infinity + i*y) and exp(-infinity + i*y) need special treatment for
    finite y */
-static Py_complex exp_special_values[7][7];
+static Py_complex exp_special_values[7][7] = {
+  { {0.,0.}, {U,U}, {0.,-0.},  {0.,0.},  {U,U}, {0.,0.}, {0.,0.} },
+  { {N,N},   {U,U}, {U,U},     {U,U},    {U,U}, {N,N},   {N,N} },
+  { {N,N},   {U,U}, {1.,-0.},  {1.,0.},  {U,U}, {N,N},   {N,N} },
+  { {N,N},   {U,U}, {1.,-0.},  {1.,0.},  {U,U}, {N,N},   {N,N} },
+  { {N,N},   {U,U}, {U,U},     {U,U},    {U,U}, {N,N},   {N,N} },
+  { {INF,N}, {U,U}, {INF,-0.}, {INF,0.}, {U,U}, {INF,N}, {INF,N} },
+  { {N,N},   {N,N}, {N,-0.},   {N,0.},   {N,N}, {N,N},   {N,N} }
+};
 
 /*[clinic input]
 cmath.exp = cmath.acos
@@ -512,7 +559,15 @@ cmath_exp_impl(PyObject *module, Py_complex z)
     return r;
 }
 
-static Py_complex log_special_values[7][7];
+static Py_complex log_special_values[7][7] = {
+  { {INF,-P34}, {INF,-P},  {INF,-P},   {INF,P},   {INF,P},  {INF,P34},  {INF,N} },
+  { {INF,-P12}, {U,U},     {U,U},      {U,U},     {U,U},    {INF,P12},  {N,N} },
+  { {INF,-P12}, {U,U},     {-INF,-P},  {-INF,P},  {U,U},    {INF,P12},  {N,N} },
+  { {INF,-P12}, {U,U},     {-INF,-0.}, {-INF,0.}, {U,U},    {INF,P12},  {N,N} },
+  { {INF,-P12}, {U,U},     {U,U},      {U,U},     {U,U},    {INF,P12},  {N,N} },
+  { {INF,-P14}, {INF,-0.}, {INF,-0.},  {INF,0.},  {INF,0.}, {INF,P14},  {INF,N} },
+  { {INF,N},    {N,N},     {N,N},      {N,N},     {N,N},    {INF,N},    {N,N} }
+};
 
 static Py_complex
 c_log(Py_complex z)
@@ -628,7 +683,15 @@ cmath_sin_impl(PyObject *module, Py_complex z)
 
 
 /* sinh(infinity + i*y) needs to be dealt with specially */
-static Py_complex sinh_special_values[7][7];
+static Py_complex sinh_special_values[7][7] = {
+  { {INF,N}, {U,U}, {-INF,-0.}, {-INF,0.}, {U,U}, {INF,N}, {INF,N} },
+  { {N,N},   {U,U}, {U,U},      {U,U},     {U,U}, {N,N},   {N,N} },
+  { {0.,N},  {U,U}, {-0.,-0.},  {-0.,0.},  {U,U}, {0.,N},  {0.,N} },
+  { {0.,N},  {U,U}, {0.,-0.},   {0.,0.},   {U,U}, {0.,N},  {0.,N} },
+  { {N,N},   {U,U}, {U,U},      {U,U},     {U,U}, {N,N},   {N,N} },
+  { {INF,N}, {U,U}, {INF,-0.},  {INF,0.},  {U,U}, {INF,N}, {INF,N} },
+  { {N,N},   {N,N}, {N,-0.},    {N,0.},    {N,N}, {N,N},   {N,N} }
+};
 
 /*[clinic input]
 cmath.sinh = cmath.acos
@@ -687,7 +750,15 @@ cmath_sinh_impl(PyObject *module, Py_complex z)
 }
 
 
-static Py_complex sqrt_special_values[7][7];
+static Py_complex sqrt_special_values[7][7] = {
+  { {INF,-INF}, {0.,-INF}, {0.,-INF}, {0.,INF}, {0.,INF}, {INF,INF}, {N,INF} },
+  { {INF,-INF}, {U,U},     {U,U},     {U,U},    {U,U},    {INF,INF}, {N,N} },
+  { {INF,-INF}, {U,U},     {0.,-0.},  {0.,0.},  {U,U},    {INF,INF}, {N,N} },
+  { {INF,-INF}, {U,U},     {0.,-0.},  {0.,0.},  {U,U},    {INF,INF}, {N,N} },
+  { {INF,-INF}, {U,U},     {U,U},     {U,U},    {U,U},    {INF,INF}, {N,N} },
+  { {INF,-INF}, {INF,-0.}, {INF,-0.}, {INF,0.}, {INF,0.}, {INF,INF}, {INF,N} },
+  { {INF,-INF}, {N,N},     {N,N},     {N,N},    {N,N},    {INF,INF}, {N,N} }
+};
 
 /*[clinic input]
 cmath.sqrt = cmath.acos
@@ -786,7 +857,15 @@ cmath_tan_impl(PyObject *module, Py_complex z)
 
 
 /* tanh(infinity + i*y) needs to be dealt with specially */
-static Py_complex tanh_special_values[7][7];
+static Py_complex tanh_special_values[7][7] = {
+  { {-1.,0.}, {U,U}, {-1.,-0.}, {-1.,0.}, {U,U}, {-1.,0.}, {-1.,0.} },
+  { {N,N},    {U,U}, {U,U},     {U,U},    {U,U}, {N,N},    {N,N} },
+  { {-0.0,N}, {U,U}, {-0.,-0.}, {-0.,0.}, {U,U}, {-0.0,N}, {-0.,N} },
+  { {0.0,N},  {U,U}, {0.,-0.},  {0.,0.},  {U,U}, {0.0,N},  {0.,N} },
+  { {N,N},    {U,U}, {U,U},     {U,U},    {U,U}, {N,N},    {N,N} },
+  { {1.,0.},  {U,U}, {1.,-0.},  {1.,0.},  {U,U}, {1.,0.},  {1.,0.} },
+  { {N,N},    {N,N}, {N,-0.},   {N,0.},   {N,N}, {N,N},    {N,N} }
+};
 
 /*[clinic input]
 cmath.tanh = cmath.acos
@@ -969,7 +1048,15 @@ cmath_polar_impl(PyObject *module, Py_complex z)
 
 */
 
-static Py_complex rect_special_values[7][7];
+static Py_complex rect_special_values[7][7] = {
+  { {INF,N}, {U,U}, {-INF,0.}, {-INF,-0.}, {U,U}, {INF,N}, {INF,N} },
+  { {N,N},   {U,U}, {U,U},     {U,U},      {U,U}, {N,N},   {N,N} },
+  { {0.,0.}, {U,U}, {-0.,0.},  {-0.,-0.},  {U,U}, {0.,0.}, {0.,0.} },
+  { {0.,0.}, {U,U}, {0.,-0.},  {0.,0.},    {U,U}, {0.,0.}, {0.,0.} },
+  { {N,N},   {U,U}, {U,U},     {U,U},      {U,U}, {N,N},   {N,N} },
+  { {INF,N}, {U,U}, {INF,-0.}, {INF,0.},   {U,U}, {INF,N}, {INF,N} },
+  { {N,N},   {N,N}, {N,0.},    {N,0.},     {N,N}, {N,N},   {N,N} }
+};
 
 /*[clinic input]
 cmath.rect
@@ -1202,120 +1289,6 @@ cmath_exec(PyObject *mod)
         return -1;
     }
 
-    /* initialize special value tables */
-
-#define INIT_SPECIAL_VALUES(NAME, BODY) { Py_complex* p = (Py_complex*)NAME; BODY }
-#define C(REAL, IMAG) p->real = REAL; p->imag = IMAG; ++p;
-
-    INIT_SPECIAL_VALUES(acos_special_values, {
-      C(P34,INF) C(P,INF)  C(P,INF)  C(P,-INF)  C(P,-INF)  C(P34,-INF) C(N,INF)
-      C(P12,INF) C(U,U)    C(U,U)    C(U,U)     C(U,U)     C(P12,-INF) C(N,N)
-      C(P12,INF) C(U,U)    C(P12,0.) C(P12,-0.) C(U,U)     C(P12,-INF) C(P12,N)
-      C(P12,INF) C(U,U)    C(P12,0.) C(P12,-0.) C(U,U)     C(P12,-INF) C(P12,N)
-      C(P12,INF) C(U,U)    C(U,U)    C(U,U)     C(U,U)     C(P12,-INF) C(N,N)
-      C(P14,INF) C(0.,INF) C(0.,INF) C(0.,-INF) C(0.,-INF) C(P14,-INF) C(N,INF)
-      C(N,INF)   C(N,N)    C(N,N)    C(N,N)     C(N,N)     C(N,-INF)   C(N,N)
-    })
-
-    INIT_SPECIAL_VALUES(acosh_special_values, {
-      C(INF,-P34) C(INF,-P)  C(INF,-P)  C(INF,P)  C(INF,P)  C(INF,P34) C(INF,N)
-      C(INF,-P12) C(U,U)     C(U,U)     C(U,U)    C(U,U)    C(INF,P12) C(N,N)
-      C(INF,-P12) C(U,U)     C(0.,-P12) C(0.,P12) C(U,U)    C(INF,P12) C(N,P12)
-      C(INF,-P12) C(U,U)     C(0.,-P12) C(0.,P12) C(U,U)    C(INF,P12) C(N,P12)
-      C(INF,-P12) C(U,U)     C(U,U)     C(U,U)    C(U,U)    C(INF,P12) C(N,N)
-      C(INF,-P14) C(INF,-0.) C(INF,-0.) C(INF,0.) C(INF,0.) C(INF,P14) C(INF,N)
-      C(INF,N)    C(N,N)     C(N,N)     C(N,N)    C(N,N)    C(INF,N)   C(N,N)
-    })
-
-    INIT_SPECIAL_VALUES(asinh_special_values, {
-      C(-INF,-P14) C(-INF,-0.) C(-INF,-0.) C(-INF,0.) C(-INF,0.) C(-INF,P14) C(-INF,N)
-      C(-INF,-P12) C(U,U)      C(U,U)      C(U,U)     C(U,U)     C(-INF,P12) C(N,N)
-      C(-INF,-P12) C(U,U)      C(-0.,-0.)  C(-0.,0.)  C(U,U)     C(-INF,P12) C(N,N)
-      C(INF,-P12)  C(U,U)      C(0.,-0.)   C(0.,0.)   C(U,U)     C(INF,P12)  C(N,N)
-      C(INF,-P12)  C(U,U)      C(U,U)      C(U,U)     C(U,U)     C(INF,P12)  C(N,N)
-      C(INF,-P14)  C(INF,-0.)  C(INF,-0.)  C(INF,0.)  C(INF,0.)  C(INF,P14)  C(INF,N)
-      C(INF,N)     C(N,N)      C(N,-0.)    C(N,0.)    C(N,N)     C(INF,N)    C(N,N)
-    })
-
-    INIT_SPECIAL_VALUES(atanh_special_values, {
-      C(-0.,-P12) C(-0.,-P12) C(-0.,-P12) C(-0.,P12) C(-0.,P12) C(-0.,P12) C(-0.,N)
-      C(-0.,-P12) C(U,U)      C(U,U)      C(U,U)     C(U,U)     C(-0.,P12) C(N,N)
-      C(-0.,-P12) C(U,U)      C(-0.,-0.)  C(-0.,0.)  C(U,U)     C(-0.,P12) C(-0.,N)
-      C(0.,-P12)  C(U,U)      C(0.,-0.)   C(0.,0.)   C(U,U)     C(0.,P12)  C(0.,N)
-      C(0.,-P12)  C(U,U)      C(U,U)      C(U,U)     C(U,U)     C(0.,P12)  C(N,N)
-      C(0.,-P12)  C(0.,-P12)  C(0.,-P12)  C(0.,P12)  C(0.,P12)  C(0.,P12)  C(0.,N)
-      C(0.,-P12)  C(N,N)      C(N,N)      C(N,N)     C(N,N)     C(0.,P12)  C(N,N)
-    })
-
-    INIT_SPECIAL_VALUES(cosh_special_values, {
-      C(INF,N) C(U,U) C(INF,0.)  C(INF,-0.) C(U,U) C(INF,N) C(INF,N)
-      C(N,N)   C(U,U) C(U,U)     C(U,U)     C(U,U) C(N,N)   C(N,N)
-      C(N,0.)  C(U,U) C(1.,0.)   C(1.,-0.)  C(U,U) C(N,0.)  C(N,0.)
-      C(N,0.)  C(U,U) C(1.,-0.)  C(1.,0.)   C(U,U) C(N,0.)  C(N,0.)
-      C(N,N)   C(U,U) C(U,U)     C(U,U)     C(U,U) C(N,N)   C(N,N)
-      C(INF,N) C(U,U) C(INF,-0.) C(INF,0.)  C(U,U) C(INF,N) C(INF,N)
-      C(N,N)   C(N,N) C(N,0.)    C(N,0.)    C(N,N) C(N,N)   C(N,N)
-    })
-
-    INIT_SPECIAL_VALUES(exp_special_values, {
-      C(0.,0.) C(U,U) C(0.,-0.)  C(0.,0.)  C(U,U) C(0.,0.) C(0.,0.)
-      C(N,N)   C(U,U) C(U,U)     C(U,U)    C(U,U) C(N,N)   C(N,N)
-      C(N,N)   C(U,U) C(1.,-0.)  C(1.,0.)  C(U,U) C(N,N)   C(N,N)
-      C(N,N)   C(U,U) C(1.,-0.)  C(1.,0.)  C(U,U) C(N,N)   C(N,N)
-      C(N,N)   C(U,U) C(U,U)     C(U,U)    C(U,U) C(N,N)   C(N,N)
-      C(INF,N) C(U,U) C(INF,-0.) C(INF,0.) C(U,U) C(INF,N) C(INF,N)
-      C(N,N)   C(N,N) C(N,-0.)   C(N,0.)   C(N,N) C(N,N)   C(N,N)
-    })
-
-    INIT_SPECIAL_VALUES(log_special_values, {
-      C(INF,-P34) C(INF,-P)  C(INF,-P)   C(INF,P)   C(INF,P)  C(INF,P34)  C(INF,N)
-      C(INF,-P12) C(U,U)     C(U,U)      C(U,U)     C(U,U)    C(INF,P12)  C(N,N)
-      C(INF,-P12) C(U,U)     C(-INF,-P)  C(-INF,P)  C(U,U)    C(INF,P12)  C(N,N)
-      C(INF,-P12) C(U,U)     C(-INF,-0.) C(-INF,0.) C(U,U)    C(INF,P12)  C(N,N)
-      C(INF,-P12) C(U,U)     C(U,U)      C(U,U)     C(U,U)    C(INF,P12)  C(N,N)
-      C(INF,-P14) C(INF,-0.) C(INF,-0.)  C(INF,0.)  C(INF,0.) C(INF,P14)  C(INF,N)
-      C(INF,N)    C(N,N)     C(N,N)      C(N,N)     C(N,N)    C(INF,N)    C(N,N)
-    })
-
-    INIT_SPECIAL_VALUES(sinh_special_values, {
-      C(INF,N) C(U,U) C(-INF,-0.) C(-INF,0.) C(U,U) C(INF,N) C(INF,N)
-      C(N,N)   C(U,U) C(U,U)      C(U,U)     C(U,U) C(N,N)   C(N,N)
-      C(0.,N)  C(U,U) C(-0.,-0.)  C(-0.,0.)  C(U,U) C(0.,N)  C(0.,N)
-      C(0.,N)  C(U,U) C(0.,-0.)   C(0.,0.)   C(U,U) C(0.,N)  C(0.,N)
-      C(N,N)   C(U,U) C(U,U)      C(U,U)     C(U,U) C(N,N)   C(N,N)
-      C(INF,N) C(U,U) C(INF,-0.)  C(INF,0.)  C(U,U) C(INF,N) C(INF,N)
-      C(N,N)   C(N,N) C(N,-0.)    C(N,0.)    C(N,N) C(N,N)   C(N,N)
-    })
-
-    INIT_SPECIAL_VALUES(sqrt_special_values, {
-      C(INF,-INF) C(0.,-INF) C(0.,-INF) C(0.,INF) C(0.,INF) C(INF,INF) C(N,INF)
-      C(INF,-INF) C(U,U)     C(U,U)     C(U,U)    C(U,U)    C(INF,INF) C(N,N)
-      C(INF,-INF) C(U,U)     C(0.,-0.)  C(0.,0.)  C(U,U)    C(INF,INF) C(N,N)
-      C(INF,-INF) C(U,U)     C(0.,-0.)  C(0.,0.)  C(U,U)    C(INF,INF) C(N,N)
-      C(INF,-INF) C(U,U)     C(U,U)     C(U,U)    C(U,U)    C(INF,INF) C(N,N)
-      C(INF,-INF) C(INF,-0.) C(INF,-0.) C(INF,0.) C(INF,0.) C(INF,INF) C(INF,N)
-      C(INF,-INF) C(N,N)     C(N,N)     C(N,N)    C(N,N)    C(INF,INF) C(N,N)
-    })
-
-    INIT_SPECIAL_VALUES(tanh_special_values, {
-      C(-1.,0.) C(U,U) C(-1.,-0.) C(-1.,0.) C(U,U) C(-1.,0.) C(-1.,0.)
-      C(N,N)    C(U,U) C(U,U)     C(U,U)    C(U,U) C(N,N)    C(N,N)
-      C(-0.0,N)    C(U,U) C(-0.,-0.) C(-0.,0.) C(U,U) C(-0.0,N)    C(-0.,N)
-      C(0.0,N)    C(U,U) C(0.,-0.)  C(0.,0.)  C(U,U) C(0.0,N)    C(0.,N)
-      C(N,N)    C(U,U) C(U,U)     C(U,U)    C(U,U) C(N,N)    C(N,N)
-      C(1.,0.)  C(U,U) C(1.,-0.)  C(1.,0.)  C(U,U) C(1.,0.)  C(1.,0.)
-      C(N,N)    C(N,N) C(N,-0.)   C(N,0.)   C(N,N) C(N,N)    C(N,N)
-    })
-
-    INIT_SPECIAL_VALUES(rect_special_values, {
-      C(INF,N) C(U,U) C(-INF,0.) C(-INF,-0.) C(U,U) C(INF,N) C(INF,N)
-      C(N,N)   C(U,U) C(U,U)     C(U,U)      C(U,U) C(N,N)   C(N,N)
-      C(0.,0.) C(U,U) C(-0.,0.)  C(-0.,-0.)  C(U,U) C(0.,0.) C(0.,0.)
-      C(0.,0.) C(U,U) C(0.,-0.)  C(0.,0.)    C(U,U) C(0.,0.) C(0.,0.)
-      C(N,N)   C(U,U) C(U,U)     C(U,U)      C(U,U) C(N,N)   C(N,N)
-      C(INF,N) C(U,U) C(INF,-0.) C(INF,0.)   C(U,U) C(INF,N) C(INF,N)
-      C(N,N)   C(N,N) C(N,0.)    C(N,0.)     C(N,N) C(N,N)   C(N,N)
-    })
     return 0;
 }
 
diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c
index 8a8c7284283..4c286f5c12c 100644
--- a/Modules/gcmodule.c
+++ b/Modules/gcmodule.c
@@ -358,10 +358,12 @@ gc_get_stats_impl(PyObject *module)
     for (i = 0; i < NUM_GENERATIONS; i++) {
         PyObject *dict;
         st = &stats[i];
-        dict = Py_BuildValue("{snsnsn}",
+        dict = Py_BuildValue("{snsnsnsnsd}",
                              "collections", st->collections,
                              "collected", st->collected,
-                             "uncollectable", st->uncollectable
+                             "uncollectable", st->uncollectable,
+                             "candidates", st->candidates,
+                             "duration", st->duration
                             );
         if (dict == NULL)
             goto error;
diff --git a/Modules/readline.c b/Modules/readline.c
index e89755b0cb4..cc84eb6229e 100644
--- a/Modules/readline.c
+++ b/Modules/readline.c
@@ -572,6 +572,26 @@ readline_set_pre_input_hook_impl(PyObject *module, PyObject *function)
     return set_hook("pre_input_hook", &state->pre_input_hook,
             function);
 }
+
+/* Get pre-input hook */
+
+/*[clinic input]
+readline.get_pre_input_hook
+
+Get the current pre-input hook function.
+[clinic start generated code]*/
+
+static PyObject *
+readline_get_pre_input_hook_impl(PyObject *module)
+/*[clinic end generated code: output=ad56b77a8e8981ca input=fb1e1b1fbd94e4e5]*/
+{
+    readlinestate *state = get_readline_state(module);
+    if (state->pre_input_hook == NULL) {
+        Py_RETURN_NONE;
+    }
+    return Py_NewRef(state->pre_input_hook);
+}
+
 #endif
 
 
@@ -1074,6 +1094,7 @@ static struct PyMethodDef readline_methods[] =
     READLINE_SET_STARTUP_HOOK_METHODDEF
 #ifdef HAVE_RL_PRE_INPUT_HOOK
     READLINE_SET_PRE_INPUT_HOOK_METHODDEF
+    READLINE_GET_PRE_INPUT_HOOK_METHODDEF
 #endif
 #ifdef HAVE_RL_COMPLETION_APPEND_CHARACTER
     READLINE_CLEAR_HISTORY_METHODDEF
diff --git a/Modules/socketmodule.c b/Modules/socketmodule.c
index 6a844d44bf0..1ef359cb265 100644
--- a/Modules/socketmodule.c
+++ b/Modules/socketmodule.c
@@ -8901,6 +8901,9 @@ socket_exec(PyObject *m)
 #ifdef IPV6_HOPLIMIT
     ADD_INT_MACRO(m, IPV6_HOPLIMIT);
 #endif
+#ifdef IPV6_HDRINCL
+    ADD_INT_MACRO(m, IPV6_HDRINCL);
+#endif
 #ifdef IPV6_HOPOPTS
     ADD_INT_MACRO(m, IPV6_HOPOPTS);
 #endif
diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c
index 99bfdec89f6..99e1c9b13f7 100644
--- a/Objects/bytearrayobject.c
+++ b/Objects/bytearrayobject.c
@@ -1547,8 +1547,20 @@ bytearray_take_bytes_impl(PyByteArrayObject *self, PyObject *n)
         return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
     }
 
-    // Copy remaining bytes to a new bytes.
     Py_ssize_t remaining_length = size - to_take;
+    // optimization: If taking less than leaving, just copy the small to_take
+    // portion out and move ob_start.
+    if (to_take < remaining_length) {
+        PyObject *ret = PyBytes_FromStringAndSize(self->ob_start, to_take);
+        if (ret == NULL) {
+            return NULL;
+        }
+        self->ob_start += to_take;
+        Py_SET_SIZE(self, remaining_length);
+        return ret;
+    }
+
+    // Copy remaining bytes to a new bytes.
     PyObject *remaining = PyBytes_FromStringAndSize(self->ob_start + to_take,
                                                     remaining_length);
     if (remaining == NULL) {
diff --git a/Objects/call.c b/Objects/call.c
index bd8617825b5..c69015abfb3 100644
--- a/Objects/call.c
+++ b/Objects/call.c
@@ -891,39 +891,6 @@ PyObject_CallMethodObjArgs(PyObject *obj, PyObject *name, ...)
 }
 
 
-PyObject *
-_PyObject_CallMethodIdObjArgs(PyObject *obj, _Py_Identifier *name, ...)
-{
-    PyThreadState *tstate = _PyThreadState_GET();
-    if (obj == NULL || name == NULL) {
-        return null_error(tstate);
-    }
-
-    PyObject *oname = _PyUnicode_FromId(name); /* borrowed */
-    if (!oname) {
-        return NULL;
-    }
-    _PyCStackRef method;
-    _PyThreadState_PushCStackRef(tstate, &method);
-    int is_method = _PyObject_GetMethodStackRef(tstate, obj, oname, &method.ref);
-    if (PyStackRef_IsNull(method.ref)) {
-        _PyThreadState_PopCStackRef(tstate, &method);
-        return NULL;
-    }
-    PyObject *callable = PyStackRef_AsPyObjectBorrow(method.ref);
-
-    obj = is_method ? obj : NULL;
-
-    va_list vargs;
-    va_start(vargs, name);
-    PyObject *result = object_vacall(tstate, obj, callable, vargs);
-    va_end(vargs);
-
-    _PyThreadState_PopCStackRef(tstate, &method);
-    return result;
-}
-
-
 PyObject *
 PyObject_CallFunctionObjArgs(PyObject *callable, ...)
 {
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index 14de21f3c67..e0eef7b46df 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -1914,10 +1914,14 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp,
     if (old_value != value) {
         _PyDict_NotifyEvent(interp, PyDict_EVENT_MODIFIED, mp, key, value);
         assert(old_value != NULL);
-        assert(!_PyDict_HasSplitTable(mp));
         if (DK_IS_UNICODE(mp->ma_keys)) {
-            PyDictUnicodeEntry *ep = &DK_UNICODE_ENTRIES(mp->ma_keys)[ix];
-            STORE_VALUE(ep, value);
+            if (_PyDict_HasSplitTable(mp)) {
+                STORE_SPLIT_VALUE(mp, ix, value);
+            }
+            else {
+                PyDictUnicodeEntry *ep = &DK_UNICODE_ENTRIES(mp->ma_keys)[ix];
+                STORE_VALUE(ep, value);
+            }
         }
         else {
             PyDictKeyEntry *ep = &DK_ENTRIES(mp->ma_keys)[ix];
@@ -2527,18 +2531,6 @@ _PyDict_GetItemWithError(PyObject *dp, PyObject *kv)
     return _PyDict_GetItem_KnownHash(dp, kv, hash);  // borrowed reference
 }
 
-PyObject *
-_PyDict_GetItemIdWithError(PyObject *dp, _Py_Identifier *key)
-{
-    PyObject *kv;
-    kv = _PyUnicode_FromId(key); /* borrowed */
-    if (kv == NULL)
-        return NULL;
-    Py_hash_t hash = unicode_get_hash(kv);
-    assert (hash != -1);  /* interned strings have their hash value initialised */
-    return _PyDict_GetItem_KnownHash(dp, kv, hash);  // borrowed reference
-}
-
 PyObject *
 _PyDict_GetItemStringWithError(PyObject *v, const char *key)
 {
@@ -4845,16 +4837,6 @@ _PyDict_Contains_KnownHash(PyObject *op, PyObject *key, Py_hash_t hash)
     return 0;
 }
 
-int
-_PyDict_ContainsId(PyObject *op, _Py_Identifier *key)
-{
-    PyObject *kv = _PyUnicode_FromId(key); /* borrowed */
-    if (kv == NULL) {
-        return -1;
-    }
-    return PyDict_Contains(op, kv);
-}
-
 /* Hack to implement "key in dict" */
 static PySequenceMethods dict_as_sequence = {
     0,                          /* sq_length */
@@ -5035,16 +5017,6 @@ PyDict_GetItemStringRef(PyObject *v, const char *key, PyObject **result)
     return res;
 }
 
-int
-_PyDict_SetItemId(PyObject *v, _Py_Identifier *key, PyObject *item)
-{
-    PyObject *kv;
-    kv = _PyUnicode_FromId(key); /* borrowed */
-    if (kv == NULL)
-        return -1;
-    return PyDict_SetItem(v, kv, item);
-}
-
 int
 PyDict_SetItemString(PyObject *v, const char *key, PyObject *item)
 {
@@ -5060,15 +5032,6 @@ PyDict_SetItemString(PyObject *v, const char *key, PyObject *item)
     return err;
 }
 
-int
-_PyDict_DelItemId(PyObject *v, _Py_Identifier *key)
-{
-    PyObject *kv = _PyUnicode_FromId(key); /* borrowed */
-    if (kv == NULL)
-        return -1;
-    return PyDict_DelItem(v, kv);
-}
-
 int
 PyDict_DelItemString(PyObject *v, const char *key)
 {
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index 244d8f39e2b..9a43057b383 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -694,12 +694,12 @@ PyTypeObject _PyExc_ ## EXCNAME = { \
 
 #define ComplexExtendsException(EXCBASE, EXCNAME, EXCSTORE, EXCNEW, \
                                 EXCMETHODS, EXCMEMBERS, EXCGETSET, \
-                                EXCSTR, EXCDOC) \
+                                EXCSTR, EXCREPR, EXCDOC) \
 static PyTypeObject _PyExc_ ## EXCNAME = { \
     PyVarObject_HEAD_INIT(NULL, 0) \
     # EXCNAME, \
     sizeof(Py ## EXCSTORE ## Object), 0, \
-    EXCSTORE ## _dealloc, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+    EXCSTORE ## _dealloc, 0, 0, 0, 0, EXCREPR, 0, 0, 0, 0, 0, \
     EXCSTR, 0, 0, 0, \
     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, \
     PyDoc_STR(EXCDOC), EXCSTORE ## _traverse, \
@@ -792,7 +792,7 @@ StopIteration_traverse(PyObject *op, visitproc visit, void *arg)
 }
 
 ComplexExtendsException(PyExc_Exception, StopIteration, StopIteration,
-                        0, 0, StopIteration_members, 0, 0,
+                        0, 0, StopIteration_members, 0, 0, 0,
                         "Signal the end from iterator.__next__().");
 
 
@@ -865,7 +865,7 @@ static PyMemberDef SystemExit_members[] = {
 };
 
 ComplexExtendsException(PyExc_BaseException, SystemExit, SystemExit,
-                        0, 0, SystemExit_members, 0, 0,
+                        0, 0, SystemExit_members, 0, 0, 0,
                         "Request to exit from the interpreter.");
 
 /*
@@ -890,6 +890,7 @@ BaseExceptionGroup_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 
     PyObject *message = NULL;
     PyObject *exceptions = NULL;
+    PyObject *exceptions_str = NULL;
 
     if (!PyArg_ParseTuple(args,
                           "UO:BaseExceptionGroup.__new__",
@@ -905,6 +906,18 @@ BaseExceptionGroup_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
         return NULL;
     }
 
+    /* Save initial exceptions sequence as a string in case sequence is mutated */
+    if (!PyList_Check(exceptions) && !PyTuple_Check(exceptions)) {
+        exceptions_str = PyObject_Repr(exceptions);
+        if (exceptions_str == NULL) {
+            /* We don't hold a reference to exceptions, so clear it before
+             * attempting a decref in the cleanup.
+             */
+            exceptions = NULL;
+            goto error;
+        }
+    }
+
     exceptions = PySequence_Tuple(exceptions);
     if (!exceptions) {
         return NULL;
@@ -988,9 +1001,11 @@ BaseExceptionGroup_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 
     self->msg = Py_NewRef(message);
     self->excs = exceptions;
+    self->excs_str = exceptions_str;
     return (PyObject*)self;
 error:
-    Py_DECREF(exceptions);
+    Py_XDECREF(exceptions);
+    Py_XDECREF(exceptions_str);
     return NULL;
 }
 
@@ -1029,6 +1044,7 @@ BaseExceptionGroup_clear(PyObject *op)
     PyBaseExceptionGroupObject *self = PyBaseExceptionGroupObject_CAST(op);
     Py_CLEAR(self->msg);
     Py_CLEAR(self->excs);
+    Py_CLEAR(self->excs_str);
     return BaseException_clear(op);
 }
 
@@ -1046,6 +1062,7 @@ BaseExceptionGroup_traverse(PyObject *op, visitproc visit, void *arg)
     PyBaseExceptionGroupObject *self = PyBaseExceptionGroupObject_CAST(op);
     Py_VISIT(self->msg);
     Py_VISIT(self->excs);
+    Py_VISIT(self->excs_str);
     return BaseException_traverse(op, visit, arg);
 }
 
@@ -1063,6 +1080,54 @@ BaseExceptionGroup_str(PyObject *op)
         self->msg, num_excs, num_excs > 1 ? "s" : "");
 }
 
+static PyObject *
+BaseExceptionGroup_repr(PyObject *op)
+{
+    PyBaseExceptionGroupObject *self = PyBaseExceptionGroupObject_CAST(op);
+    assert(self->msg);
+
+    PyObject *exceptions_str = NULL;
+
+    /* Use the saved exceptions string for custom sequences. */
+    if (self->excs_str) {
+        exceptions_str = Py_NewRef(self->excs_str);
+    }
+    else {
+        assert(self->excs);
+
+        /* Older versions delegated to BaseException, inserting the current
+         * value of self.args[1]; but this can be mutable and go out-of-sync
+         * with self.exceptions. Instead, use self.exceptions for accuracy,
+         * making it look like self.args[1] for backwards compatibility. */
+        if (PyList_Check(PyTuple_GET_ITEM(self->args, 1))) {
+            PyObject *exceptions_list = PySequence_List(self->excs);
+            if (!exceptions_list) {
+                return NULL;
+            }
+
+            exceptions_str = PyObject_Repr(exceptions_list);
+            Py_DECREF(exceptions_list);
+        }
+        else {
+            exceptions_str = PyObject_Repr(self->excs);
+        }
+
+        if (!exceptions_str) {
+            return NULL;
+        }
+    }
+
+    assert(exceptions_str != NULL);
+
+    const char *name = _PyType_Name(Py_TYPE(self));
+    PyObject *repr = PyUnicode_FromFormat(
+        "%s(%R, %U)", name,
+        self->msg, exceptions_str);
+
+    Py_DECREF(exceptions_str);
+    return repr;
+}
+
 /*[clinic input]
 @critical_section
 BaseExceptionGroup.derive
@@ -1697,7 +1762,7 @@ static PyMethodDef BaseExceptionGroup_methods[] = {
 ComplexExtendsException(PyExc_BaseException, BaseExceptionGroup,
     BaseExceptionGroup, BaseExceptionGroup_new /* new */,
     BaseExceptionGroup_methods, BaseExceptionGroup_members,
-    0 /* getset */, BaseExceptionGroup_str,
+    0 /* getset */, BaseExceptionGroup_str, BaseExceptionGroup_repr,
     "A combination of multiple unrelated exceptions.");
 
 /*
@@ -2425,7 +2490,7 @@ static PyGetSetDef OSError_getset[] = {
 ComplexExtendsException(PyExc_Exception, OSError,
                         OSError, OSError_new,
                         OSError_methods, OSError_members, OSError_getset,
-                        OSError_str,
+                        OSError_str, 0,
                         "Base class for I/O related errors.");
 
 
@@ -2566,7 +2631,7 @@ static PyMethodDef NameError_methods[] = {
 ComplexExtendsException(PyExc_Exception, NameError,
                         NameError, 0,
                         NameError_methods, NameError_members,
-                        0, BaseException_str, "Name not found globally.");
+                        0, BaseException_str, 0, "Name not found globally.");
 
 /*
  *    UnboundLocalError extends NameError
@@ -2700,7 +2765,7 @@ static PyMethodDef AttributeError_methods[] = {
 ComplexExtendsException(PyExc_Exception, AttributeError,
                         AttributeError, 0,
                         AttributeError_methods, AttributeError_members,
-                        0, BaseException_str, "Attribute not found.");
+                        0, BaseException_str, 0, "Attribute not found.");
 
 /*
  *    SyntaxError extends Exception
@@ -2899,7 +2964,7 @@ static PyMemberDef SyntaxError_members[] = {
 
 ComplexExtendsException(PyExc_Exception, SyntaxError, SyntaxError,
                         0, 0, SyntaxError_members, 0,
-                        SyntaxError_str, "Invalid syntax.");
+                        SyntaxError_str, 0, "Invalid syntax.");
 
 
 /*
@@ -2959,7 +3024,7 @@ KeyError_str(PyObject *op)
 }
 
 ComplexExtendsException(PyExc_LookupError, KeyError, BaseException,
-                        0, 0, 0, 0, KeyError_str, "Mapping key not found.");
+                        0, 0, 0, 0, KeyError_str, 0, "Mapping key not found.");
 
 
 /*
diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c
index 6c1c5f5eb89..5a0b16ba572 100644
--- a/Objects/moduleobject.c
+++ b/Objects/moduleobject.c
@@ -2,6 +2,7 @@
 
 #include "Python.h"
 #include "pycore_call.h"          // _PyObject_CallNoArgs()
+#include "pycore_ceval.h"         // _PyEval_EnableGILTransient()
 #include "pycore_dict.h"          // _PyDict_EnablePerThreadRefcounting()
 #include "pycore_fileutils.h"     // _Py_wgetcwd
 #include "pycore_import.h"        // _PyImport_GetNextModuleIndex()
@@ -522,6 +523,22 @@ module_from_def_and_spec(
 #undef COPY_COMMON_SLOT
     }
 
+#ifdef Py_GIL_DISABLED
+    // For modules created directly from slots (not from a def), we enable
+    // the GIL here (pairing `_PyEval_EnableGILTransient` with
+    // an immediate `_PyImport_EnableGILAndWarn`).
+    // For modules created from a def, the caller is responsible for this.
+    if (!original_def && requires_gil) {
+        PyThreadState *tstate = _PyThreadState_GET();
+        if (_PyEval_EnableGILTransient(tstate) < 0) {
+            goto error;
+        }
+        if (_PyImport_EnableGILAndWarn(tstate, nameobj) < 0) {
+            goto error;
+        }
+    }
+#endif
+
     /* By default, multi-phase init modules are expected
        to work under multiple interpreters. */
     if (!has_multiple_interpreters_slot) {
diff --git a/Objects/object.c b/Objects/object.c
index 0540112d7d2..fcea3503de8 100644
--- a/Objects/object.c
+++ b/Objects/object.c
@@ -713,7 +713,7 @@ _PyObject_IsFreed(PyObject *op)
 
 /* For debugging convenience.  See Misc/gdbinit for some useful gdb hooks */
 void
-_PyObject_Dump(PyObject* op)
+PyUnstable_Object_Dump(PyObject* op)
 {
     if (_PyObject_IsFreed(op)) {
         /* It seems like the object memory has been freed:
@@ -3150,7 +3150,7 @@ _PyObject_AssertFailed(PyObject *obj, const char *expr, const char *msg,
 
         /* This might succeed or fail, but we're about to abort, so at least
            try to provide any extra info we can: */
-        _PyObject_Dump(obj);
+        PyUnstable_Object_Dump(obj);
 
         fprintf(stderr, "\n");
         fflush(stderr);
@@ -3361,24 +3361,6 @@ Py_GetConstantBorrowed(unsigned int constant_id)
     return Py_GetConstant(constant_id);
 }
 
-
-// Py_TYPE() implementation for the stable ABI
-#undef Py_TYPE
-PyTypeObject*
-Py_TYPE(PyObject *ob)
-{
-    return _Py_TYPE(ob);
-}
-
-
-// Py_REFCNT() implementation for the stable ABI
-#undef Py_REFCNT
-Py_ssize_t
-Py_REFCNT(PyObject *ob)
-{
-    return _Py_REFCNT(ob);
-}
-
 int
 PyUnstable_IsImmortal(PyObject *op)
 {
@@ -3405,3 +3387,16 @@ _PyObject_VisitType(PyObject *op, visitproc visit, void *arg)
     Py_VISIT(tp);
     return 0;
 }
+
+// Implementations for the stable ABI
+// Keep these at the end.
+#undef Py_TYPE
+#undef Py_REFCNT
+#undef Py_SIZE
+#undef Py_IS_TYPE
+#undef Py_SET_SIZE
+PyTypeObject* Py_TYPE(PyObject *ob) { return _Py_TYPE_impl(ob); }
+Py_ssize_t Py_REFCNT(PyObject *ob) { return _Py_REFCNT(ob); }
+Py_ssize_t Py_SIZE(PyObject *o) { return _Py_SIZE_impl(o); }
+int Py_IS_TYPE(PyObject *o, PyTypeObject *t) { return _Py_IS_TYPE_impl(o, t); }
+void Py_SET_SIZE(PyVarObject *o, Py_ssize_t s) { _Py_SET_SIZE_impl(o, s); }
diff --git a/Objects/odictobject.c b/Objects/odictobject.c
index 45d2ea0203a..25928028919 100644
--- a/Objects/odictobject.c
+++ b/Objects/odictobject.c
@@ -223,7 +223,6 @@ PyDict_DelItem                           PyMapping_DelItem
 PyDict_DelItemString                     PyMapping_DelItemString
 PyDict_GetItem                           -
 PyDict_GetItemWithError                  PyObject_GetItem
-_PyDict_GetItemIdWithError               -
 PyDict_GetItemString                     PyMapping_GetItemString
 PyDict_Items                             PyMapping_Items
 PyDict_Keys                              PyMapping_Keys
diff --git a/Objects/typeobject.c b/Objects/typeobject.c
index 61bcc21ce13..cbe0215359e 100644
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -81,7 +81,7 @@ class object "PyObject *" "&PyBaseObject_Type"
 
 #define END_TYPE_DICT_LOCK() Py_END_CRITICAL_SECTION2()
 
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(Py_DEBUG)
 // Return true if the world is currently stopped.
 static bool
 types_world_is_stopped(void)
@@ -4343,14 +4343,6 @@ type_new_slots_bases(type_new_ctx *ctx)
 static int
 type_new_slots_impl(type_new_ctx *ctx, PyObject *dict)
 {
-    /* Are slots allowed? */
-    if (ctx->nslot > 0 && ctx->base->tp_itemsize != 0) {
-        PyErr_Format(PyExc_TypeError,
-                     "nonempty __slots__ not supported for subtype of '%s'",
-                     ctx->base->tp_name);
-        return -1;
-    }
-
     if (type_new_visit_slots(ctx) < 0) {
         return -1;
     }
@@ -4377,14 +4369,13 @@ type_new_slots(type_new_ctx *ctx, PyObject *dict)
     ctx->add_dict = 0;
     ctx->add_weak = 0;
     ctx->may_add_dict = (ctx->base->tp_dictoffset == 0);
-    ctx->may_add_weak = (ctx->base->tp_weaklistoffset == 0
-                         && ctx->base->tp_itemsize == 0);
+    ctx->may_add_weak = (ctx->base->tp_weaklistoffset == 0);
 
     if (ctx->slots == NULL) {
         if (ctx->may_add_dict) {
             ctx->add_dict++;
         }
-        if (ctx->may_add_weak) {
+        if (ctx->may_add_weak && ctx->base->tp_itemsize == 0) {
             ctx->add_weak++;
         }
     }
@@ -4650,6 +4641,12 @@ type_new_descriptors(const type_new_ctx *ctx, PyTypeObject *type, PyObject *dict
     if (et->ht_slots != NULL) {
         PyMemberDef *mp = _PyHeapType_GET_MEMBERS(et);
         Py_ssize_t nslot = PyTuple_GET_SIZE(et->ht_slots);
+        if (ctx->base->tp_itemsize != 0) {
+            PyErr_Format(PyExc_TypeError,
+                         "arbitrary __slots__ not supported for subtype of '%s'",
+                         ctx->base->tp_name);
+            return -1;
+        }
         for (Py_ssize_t i = 0; i < nslot; i++, mp++) {
             mp->name = PyUnicode_AsUTF8(
                 PyTuple_GET_ITEM(et->ht_slots, i));
@@ -4889,8 +4886,14 @@ type_new_init(type_new_ctx *ctx)
     set_tp_dict(type, dict);
 
     PyHeapTypeObject *et = (PyHeapTypeObject*)type;
-    et->ht_slots = ctx->slots;
-    ctx->slots = NULL;
+    if (ctx->slots && PyTuple_GET_SIZE(ctx->slots)) {
+        et->ht_slots = ctx->slots;
+        ctx->slots = NULL;
+    }
+    else {
+        et->ht_slots = NULL;
+        Py_CLEAR(ctx->slots);
+    }
 
     return type;
 
@@ -6546,6 +6549,18 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value)
     assert(!_PyType_HasFeature(metatype, Py_TPFLAGS_INLINE_VALUES));
     assert(!_PyType_HasFeature(metatype, Py_TPFLAGS_MANAGED_DICT));
 
+#ifdef Py_GIL_DISABLED
+    // gh-139103: Enable deferred refcounting for functions assigned
+    // to type objects.  This is important for `dataclass.__init__`,
+    // which is generated dynamically.
+    if (value != NULL &&
+        PyFunction_Check(value) &&
+        !_PyObject_HasDeferredRefcount(value))
+    {
+        PyUnstable_Object_EnableDeferredRefcount(value);
+    }
+#endif
+
     PyObject *old_value = NULL;
     PyObject *descr = _PyType_LookupRef(metatype, name);
     if (descr != NULL) {
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 4e8c132327b..f737a885f19 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -547,7 +547,8 @@ unicode_check_encoding_errors(const char *encoding, const char *errors)
     }
 
     /* Disable checks during Python finalization. For example, it allows to
-       call _PyObject_Dump() during finalization for debugging purpose. */
+     * call PyUnstable_Object_Dump() during finalization for debugging purpose.
+     */
     if (_PyInterpreterState_GetFinalizing(interp) != NULL) {
         return 0;
     }
@@ -11193,47 +11194,6 @@ _PyUnicode_EqualToASCIIString(PyObject *unicode, const char *str)
            memcmp(PyUnicode_1BYTE_DATA(unicode), str, len) == 0;
 }
 
-int
-_PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
-{
-    PyObject *right_uni;
-
-    assert(_PyUnicode_CHECK(left));
-    assert(right->string);
-#ifndef NDEBUG
-    for (const char *p = right->string; *p; p++) {
-        assert((unsigned char)*p < 128);
-    }
-#endif
-
-    if (!PyUnicode_IS_ASCII(left))
-        return 0;
-
-    right_uni = _PyUnicode_FromId(right);       /* borrowed */
-    if (right_uni == NULL) {
-        /* memory error or bad data */
-        PyErr_Clear();
-        return _PyUnicode_EqualToASCIIString(left, right->string);
-    }
-
-    if (left == right_uni)
-        return 1;
-
-    assert(PyUnicode_CHECK_INTERNED(right_uni));
-    if (PyUnicode_CHECK_INTERNED(left)) {
-        return 0;
-    }
-
-    Py_hash_t right_hash = PyUnicode_HASH(right_uni);
-    assert(right_hash != -1);
-    Py_hash_t hash = PyUnicode_HASH(left);
-    if (hash != -1 && hash != right_hash) {
-        return 0;
-    }
-
-    return unicode_eq(left, right_uni);
-}
-
 PyObject *
 PyUnicode_RichCompare(PyObject *left, PyObject *right, int op)
 {
diff --git a/PC/python3dll.c b/PC/python3dll.c
index 99e0f05fe03..0d9e7e9a1ba 100755
--- a/PC/python3dll.c
+++ b/PC/python3dll.c
@@ -71,6 +71,7 @@ EXPORT_FUNC(Py_IncRef)
 EXPORT_FUNC(Py_Initialize)
 EXPORT_FUNC(Py_InitializeEx)
 EXPORT_FUNC(Py_Is)
+EXPORT_FUNC(Py_IS_TYPE)
 EXPORT_FUNC(Py_IsFalse)
 EXPORT_FUNC(Py_IsFinalizing)
 EXPORT_FUNC(Py_IsInitialized)
@@ -86,10 +87,12 @@ EXPORT_FUNC(Py_PACK_VERSION)
 EXPORT_FUNC(Py_REFCNT)
 EXPORT_FUNC(Py_ReprEnter)
 EXPORT_FUNC(Py_ReprLeave)
+EXPORT_FUNC(Py_SET_SIZE)
 EXPORT_FUNC(Py_SetPath)
 EXPORT_FUNC(Py_SetProgramName)
 EXPORT_FUNC(Py_SetPythonHome)
 EXPORT_FUNC(Py_SetRecursionLimit)
+EXPORT_FUNC(Py_SIZE)
 EXPORT_FUNC(Py_TYPE)
 EXPORT_FUNC(Py_VaBuildValue)
 EXPORT_FUNC(Py_XNewRef)
@@ -191,6 +194,7 @@ EXPORT_FUNC(PyDict_Merge)
 EXPORT_FUNC(PyDict_MergeFromSeq2)
 EXPORT_FUNC(PyDict_New)
 EXPORT_FUNC(PyDict_Next)
+EXPORT_FUNC(PyDict_SetDefaultRef)
 EXPORT_FUNC(PyDict_SetItem)
 EXPORT_FUNC(PyDict_SetItemString)
 EXPORT_FUNC(PyDict_Size)
diff --git a/PCbuild/_remote_debugging.vcxproj b/PCbuild/_remote_debugging.vcxproj
index a01905fdf2f..c91c9cf3652 100644
--- a/PCbuild/_remote_debugging.vcxproj
+++ b/PCbuild/_remote_debugging.vcxproj
@@ -98,7 +98,16 @@
   </Link>
 </ItemDefinitionGroup>
   <ItemGroup>
-    <ClCompile Include="..\Modules\_remote_debugging_module.c" />
+    <ClCompile Include="..\Modules\_remote_debugging\module.c" />
+    <ClCompile Include="..\Modules\_remote_debugging\object_reading.c" />
+    <ClCompile Include="..\Modules\_remote_debugging\code_objects.c" />
+    <ClCompile Include="..\Modules\_remote_debugging\frames.c" />
+    <ClCompile Include="..\Modules\_remote_debugging\frame_cache.c" />
+    <ClCompile Include="..\Modules\_remote_debugging\threads.c" />
+    <ClCompile Include="..\Modules\_remote_debugging\asyncio.c" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\Modules\_remote_debugging\_remote_debugging.h" />
   </ItemGroup>
   <ItemGroup>
     <ResourceCompile Include="..\PC\python_nt.rc" />
diff --git a/PCbuild/_remote_debugging.vcxproj.filters b/PCbuild/_remote_debugging.vcxproj.filters
index ce4437f74e0..b37a2c5575c 100644
--- a/PCbuild/_remote_debugging.vcxproj.filters
+++ b/PCbuild/_remote_debugging.vcxproj.filters
@@ -4,12 +4,40 @@
     <Filter Include="Source Files">
       <UniqueIdentifier>{6d101329-41df-49a0-8639-f35408ad7c6d}</UniqueIdentifier>
     </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+    </Filter>
     <Filter Include="Resource Files">
       <UniqueIdentifier>{711941d1-269c-49cb-a733-759b2b91fc61}</UniqueIdentifier>
     </Filter>
   </ItemGroup>
   <ItemGroup>
-    <ClCompile Include="..\Modules\_remote_debugging_module.c" />
+    <ClCompile Include="..\Modules\_remote_debugging\module.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\Modules\_remote_debugging\object_reading.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\Modules\_remote_debugging\code_objects.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\Modules\_remote_debugging\frames.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\Modules\_remote_debugging\frame_cache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\Modules\_remote_debugging\threads.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\Modules\_remote_debugging\asyncio.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\Modules\_remote_debugging\_remote_debugging.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <ResourceCompile Include="..\PC\python_nt.rc">
diff --git a/PCbuild/build.bat b/PCbuild/build.bat
index 60235704886..e4de9a80d76 100644
--- a/PCbuild/build.bat
+++ b/PCbuild/build.bat
@@ -41,6 +41,7 @@ echo.  --experimental-jit-off      Ditto but off by default (PYTHON_JIT=1 enable
 echo.  --experimental-jit-interpreter  Enable the experimental Tier 2 interpreter.
 echo.  --pystats      Enable PyStats collection.
 echo.  --tail-call-interp  Enable tail-calling interpreter (requires LLVM 19 or higher).
+echo.  --enable-stackref-debug  Enable stackref debugging mode.
 echo.
 echo.Available flags to avoid building certain modules.
 echo.These flags have no effect if '-e' is not given:
@@ -98,6 +99,7 @@ if "%~1"=="--experimental-jit-interpreter-off" (set UseTIER2=6) & shift & goto C
 if "%~1"=="--without-remote-debug" (set DisableRemoteDebug=true) & shift & goto CheckOpts
 if "%~1"=="--pystats" (set PyStats=1) & shift & goto CheckOpts
 if "%~1"=="--tail-call-interp" (set UseTailCallInterp=true) & shift & goto CheckOpts
+if "%~1"=="--enable-stackref-debug" (set StackRefDebug=true) & shift & goto CheckOpts
 rem These use the actual property names used by MSBuild.  We could just let
 rem them in through the environment, but we specify them on the command line
 rem anyway for visibility so set defaults after this
@@ -202,6 +204,7 @@ echo on
  /p:PyStats=%PyStats%^
  /p:UseTailCallInterp=%UseTailCallInterp%^
  /p:DisableRemoteDebug=%DisableRemoteDebug%^
+ /p:StackRefDebug=%StackRefDebug%^
  %1 %2 %3 %4 %5 %6 %7 %8 %9
 
 @echo off
diff --git a/PCbuild/pyproject.props b/PCbuild/pyproject.props
index cf35e705f35..53bfe5e3ea9 100644
--- a/PCbuild/pyproject.props
+++ b/PCbuild/pyproject.props
@@ -13,6 +13,7 @@
     <GeneratedFrozenModulesDir>$(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)_frozen\</GeneratedFrozenModulesDir>
     <GeneratedZlibNgDir>$(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)$(ArchName)_$(Configuration)\zlib-ng\</GeneratedZlibNgDir>
     <GeneratedJitStencilsDir>$(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)_$(Configuration)</GeneratedJitStencilsDir>
+    <GeneratedJitStencilsDir Condition="$(Configuration) == 'PGUpdate'">$(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)_PGInstrument</GeneratedJitStencilsDir>
     <TargetName Condition="'$(TargetName)' == ''">$(ProjectName)</TargetName>
     <TargetName>$(TargetName)$(PyDebugExt)</TargetName>
     <GenerateManifest>false</GenerateManifest>
diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj
index 359a47fbfc4..85363949c23 100644
--- a/PCbuild/pythoncore.vcxproj
+++ b/PCbuild/pythoncore.vcxproj
@@ -110,6 +110,7 @@
       <PreprocessorDefinitions Condition="'$(UseTailCallInterp)' == 'true'">_Py_TAIL_CALL_INTERP=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <PreprocessorDefinitions Condition="'$(WITH_COMPUTED_GOTOS)' != ''">HAVE_COMPUTED_GOTOS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <PreprocessorDefinitions Condition="'$(DisableRemoteDebug)' != 'true'">Py_REMOTE_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions Condition="'$(StackRefDebug)' == 'true'">Py_STACKREF_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
     </ClCompile>
     <Link>
       <AdditionalDependencies>version.lib;ws2_32.lib;pathcch.lib;bcrypt.lib;%(AdditionalDependencies)</AdditionalDependencies>
diff --git a/Parser/parser.c b/Parser/parser.c
index 53417fb2b72..648b3702d8f 100644
--- a/Parser/parser.c
+++ b/Parser/parser.c
@@ -1,5 +1,6 @@
 // @generated by pegen from python.gram
 #include "pegen.h"
+#include "pycore_ceval.h"
 
 #if defined(Py_DEBUG) && defined(Py_BUILD_CORE)
 #  define D(x) if (p->debug) { x; }
diff --git a/Programs/_testembed.c b/Programs/_testembed.c
index d0d7d5f03fb..c6a18249e3c 100644
--- a/Programs/_testembed.c
+++ b/Programs/_testembed.c
@@ -2323,6 +2323,177 @@ test_create_module_from_initfunc(void)
     return Py_RunMain();
 }
 
+/// Multi-phase initialization package & submodule ///
+
+int
+mp_pkg_exec(PyObject *mod)
+{
+    // make this a namespace package
+    // empty list = namespace package
+    if (PyModule_Add(mod, "__path__", PyList_New(0)) < 0) {
+        return -1;
+    }
+    if (PyModule_AddStringConstant(mod, "mp_pkg_exec_slot_ran", "yes") < 0) {
+        return -1;
+    }
+    return 0;
+}
+
+static PyModuleDef_Slot mp_pkg_slots[] = {
+    {Py_mod_gil, Py_MOD_GIL_NOT_USED},
+    {Py_mod_exec, mp_pkg_exec},
+    {0, NULL}
+};
+
+static struct PyModuleDef mp_pkg_def = {
+    PyModuleDef_HEAD_INIT,
+    .m_name = "mp_pkg",
+    .m_size = 0,
+    .m_slots = mp_pkg_slots,
+};
+
+PyMODINIT_FUNC
+PyInit_mp_pkg(void)
+{
+    return PyModuleDef_Init(&mp_pkg_def);
+}
+
+static PyObject *
+submod_greet(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+    return PyUnicode_FromString("Hello from sub-module");
+}
+
+static PyMethodDef submod_methods[] = {
+    {"greet", submod_greet, METH_NOARGS, NULL},
+    {NULL},
+};
+
+int
+mp_submod_exec(PyObject *mod)
+{
+    return PyModule_AddStringConstant(mod, "mp_submod_exec_slot_ran", "yes");
+}
+
+static PyModuleDef_Slot mp_submod_slots[] = {
+    {Py_mod_gil, Py_MOD_GIL_NOT_USED},
+    {Py_mod_exec, mp_submod_exec},
+    {0, NULL}
+};
+
+static struct PyModuleDef mp_submod_def = {
+    PyModuleDef_HEAD_INIT,
+    .m_name = "mp_pkg.mp_submod",
+    .m_size = 0,
+    .m_methods = submod_methods,
+    .m_slots = mp_submod_slots,
+};
+
+PyMODINIT_FUNC
+PyInit_mp_submod(void)
+{
+    return PyModuleDef_Init(&mp_submod_def);
+}
+
+static int
+test_inittab_submodule_multiphase(void)
+{
+    wchar_t* argv[] = {
+        PROGRAM_NAME,
+        L"-c",
+        L"import sys;"
+        L"import mp_pkg.mp_submod;"
+        L"print(mp_pkg.mp_submod);"
+        L"print(sys.modules['mp_pkg.mp_submod']);"
+        L"print(mp_pkg.mp_submod.greet());"
+        L"print(f'{mp_pkg.mp_submod.mp_submod_exec_slot_ran=}');"
+        L"print(f'{mp_pkg.mp_pkg_exec_slot_ran=}');"
+    };
+    PyConfig config;
+    if (PyImport_AppendInittab("mp_pkg",
+                               &PyInit_mp_pkg) != 0) {
+        fprintf(stderr, "PyImport_AppendInittab() failed\n");
+        return 1;
+    }
+    if (PyImport_AppendInittab("mp_pkg.mp_submod",
+                               &PyInit_mp_submod) != 0) {
+        fprintf(stderr, "PyImport_AppendInittab() failed\n");
+        return 1;
+    }
+    PyConfig_InitPythonConfig(&config);
+    config.isolated = 1;
+    config_set_argv(&config, Py_ARRAY_LENGTH(argv), argv);
+    init_from_config_clear(&config);
+    return Py_RunMain();
+}
+
+/// Single-phase initialization package & submodule ///
+
+static struct PyModuleDef sp_pkg_def = {
+    PyModuleDef_HEAD_INIT,
+    .m_name = "sp_pkg",
+    .m_size = 0,
+};
+
+PyMODINIT_FUNC
+PyInit_sp_pkg(void)
+{
+    PyObject *mod = PyModule_Create(&sp_pkg_def);
+    if (mod == NULL) {
+        return NULL;
+    }
+    // make this a namespace package
+    // empty list = namespace package
+    if (PyModule_Add(mod, "__path__", PyList_New(0)) < 0) {
+        Py_DECREF(mod);
+        return NULL;
+    }
+    return mod;
+}
+
+static struct PyModuleDef sp_submod_def = {
+    PyModuleDef_HEAD_INIT,
+    .m_name = "sp_pkg.sp_submod",
+    .m_size = 0,
+    .m_methods = submod_methods,
+};
+
+PyMODINIT_FUNC
+PyInit_sp_submod(void)
+{
+    return PyModule_Create(&sp_submod_def);
+}
+
+static int
+test_inittab_submodule_singlephase(void)
+{
+    wchar_t* argv[] = {
+        PROGRAM_NAME,
+        L"-c",
+        L"import sys;"
+        L"import sp_pkg.sp_submod;"
+        L"print(sp_pkg.sp_submod);"
+        L"print(sys.modules['sp_pkg.sp_submod']);"
+        L"print(sp_pkg.sp_submod.greet());"
+    };
+    PyConfig config;
+    if (PyImport_AppendInittab("sp_pkg",
+                               &PyInit_sp_pkg) != 0) {
+        fprintf(stderr, "PyImport_AppendInittab() failed\n");
+        return 1;
+    }
+    if (PyImport_AppendInittab("sp_pkg.sp_submod",
+                               &PyInit_sp_submod) != 0) {
+        fprintf(stderr, "PyImport_AppendInittab() failed\n");
+        return 1;
+    }
+    PyConfig_InitPythonConfig(&config);
+    config.isolated = 1;
+    config_set_argv(&config, Py_ARRAY_LENGTH(argv), argv);
+    init_from_config_clear(&config);
+    return Py_RunMain();
+}
+
 static void wrap_allocator(PyMemAllocatorEx *allocator);
 static void unwrap_allocator(PyMemAllocatorEx *allocator);
 
@@ -2507,6 +2678,8 @@ static struct TestCase TestCases[] = {
     {"test_get_incomplete_frame", test_get_incomplete_frame},
     {"test_gilstate_after_finalization", test_gilstate_after_finalization},
     {"test_create_module_from_initfunc", test_create_module_from_initfunc},
+    {"test_inittab_submodule_multiphase", test_inittab_submodule_multiphase},
+    {"test_inittab_submodule_singlephase", test_inittab_submodule_singlephase},
     {NULL, NULL}
 };
 
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index 12ee506e4f2..4ba255d28bd 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -1976,14 +1976,8 @@ dummy_func(
         }
 
         inst(BUILD_STRING, (pieces[oparg] -- str)) {
-            STACKREFS_TO_PYOBJECTS(pieces, oparg, pieces_o);
-            if (CONVERSION_FAILED(pieces_o)) {
-                DECREF_INPUTS();
-                ERROR_IF(true);
-            }
-            PyObject *str_o = _PyUnicode_JoinArray(&_Py_STR(empty), pieces_o, oparg);
-            STACKREFS_TO_PYOBJECTS_CLEANUP(pieces_o);
-            DECREF_INPUTS();
+            PyObject *str_o = _Py_BuildString_StackRefSteal(pieces, oparg);
+            DEAD(pieces);
             ERROR_IF(str_o == NULL);
             str = PyStackRef_FromPyObjectSteal(str_o);
         }
@@ -2098,17 +2092,9 @@ dummy_func(
         }
 
         inst(BUILD_MAP, (values[oparg*2] -- map)) {
-            STACKREFS_TO_PYOBJECTS(values, oparg*2, values_o);
-            if (CONVERSION_FAILED(values_o)) {
-                DECREF_INPUTS();
-                ERROR_IF(true);
-            }
-            PyObject *map_o = _PyDict_FromItems(
-                    values_o, 2,
-                    values_o+1, 2,
-                    oparg);
-            STACKREFS_TO_PYOBJECTS_CLEANUP(values_o);
-            DECREF_INPUTS();
+
+            PyObject *map_o = _Py_BuildMap_StackRefSteal(values, oparg);
+            DEAD(values);
             ERROR_IF(map_o == NULL);
             map = PyStackRef_FromPyObjectStealMortal(map_o);
         }
@@ -3891,27 +3877,20 @@ dummy_func(
 #if TIER_ONE
             assert(opcode != INSTRUMENTED_CALL);
 #endif
-            PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
-
             int total_args = oparg;
             _PyStackRef *arguments = args;
             if (!PyStackRef_IsNull(self_or_null)) {
                 arguments--;
                 total_args++;
             }
-            /* Callable is not a normal Python function */
-            STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
-            if (CONVERSION_FAILED(args_o)) {
-                DECREF_INPUTS();
-                ERROR_IF(true);
-            }
-            PyObject *res_o = PyObject_Vectorcall(
-                callable_o, args_o,
-                total_args | PY_VECTORCALL_ARGUMENTS_OFFSET,
-                NULL);
-            STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
-            assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
-            DECREF_INPUTS();
+            PyObject *res_o = _Py_VectorCall_StackRefSteal(
+                callable,
+                arguments,
+                total_args,
+                PyStackRef_NULL);
+            DEAD(args);
+            DEAD(self_or_null);
+            DEAD(callable);
             ERROR_IF(res_o == NULL);
             res = PyStackRef_FromPyObjectSteal(res_o);
         }
@@ -4186,14 +4165,13 @@ dummy_func(
             }
             DEOPT_IF(tp->tp_vectorcall == NULL);
             STAT_INC(CALL, hit);
-            STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
-            if (CONVERSION_FAILED(args_o)) {
-                DECREF_INPUTS();
-                ERROR_IF(true);
-            }
-            PyObject *res_o = tp->tp_vectorcall((PyObject *)tp, args_o, total_args, NULL);
-            STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
-            DECREF_INPUTS();
+            PyObject *res_o = _Py_CallBuiltinClass_StackRefSteal(
+                callable,
+                arguments,
+                total_args);
+            DEAD(args);
+            DEAD(self_or_null);
+            DEAD(callable);
             ERROR_IF(res_o == NULL);
             res = PyStackRef_FromPyObjectSteal(res_o);
         }
@@ -4241,31 +4219,24 @@ dummy_func(
 
         op(_CALL_BUILTIN_FAST, (callable, self_or_null, args[oparg] -- res)) {
             /* Builtin METH_FASTCALL functions, without keywords */
-            PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
-
             int total_args = oparg;
             _PyStackRef *arguments = args;
             if (!PyStackRef_IsNull(self_or_null)) {
                 arguments--;
                 total_args++;
             }
+            PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
             DEOPT_IF(!PyCFunction_CheckExact(callable_o));
             DEOPT_IF(PyCFunction_GET_FLAGS(callable_o) != METH_FASTCALL);
             STAT_INC(CALL, hit);
-            PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable_o);
-            /* res = func(self, args, nargs) */
-            STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
-            if (CONVERSION_FAILED(args_o)) {
-                DECREF_INPUTS();
-                ERROR_IF(true);
-            }
-            PyObject *res_o = _PyCFunctionFast_CAST(cfunc)(
-                PyCFunction_GET_SELF(callable_o),
-                args_o,
-                total_args);
-            STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
-            assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
-            DECREF_INPUTS();
+            PyObject *res_o = _Py_BuiltinCallFast_StackRefSteal(
+                callable,
+                arguments,
+                total_args
+            );
+            DEAD(args);
+            DEAD(self_or_null);
+            DEAD(callable);
             ERROR_IF(res_o == NULL);
             res = PyStackRef_FromPyObjectSteal(res_o);
         }
@@ -4278,30 +4249,20 @@ dummy_func(
 
         op(_CALL_BUILTIN_FAST_WITH_KEYWORDS, (callable, self_or_null, args[oparg] -- res)) {
             /* Builtin METH_FASTCALL | METH_KEYWORDS functions */
-            PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
-
             int total_args = oparg;
             _PyStackRef *arguments = args;
             if (!PyStackRef_IsNull(self_or_null)) {
                 arguments--;
                 total_args++;
             }
+            PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
             DEOPT_IF(!PyCFunction_CheckExact(callable_o));
             DEOPT_IF(PyCFunction_GET_FLAGS(callable_o) != (METH_FASTCALL | METH_KEYWORDS));
             STAT_INC(CALL, hit);
-            /* res = func(self, arguments, nargs, kwnames) */
-            PyCFunctionFastWithKeywords cfunc =
-                _PyCFunctionFastWithKeywords_CAST(PyCFunction_GET_FUNCTION(callable_o));
-
-            STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
-            if (CONVERSION_FAILED(args_o)) {
-                DECREF_INPUTS();
-                ERROR_IF(true);
-            }
-            PyObject *res_o = cfunc(PyCFunction_GET_SELF(callable_o), args_o, total_args, NULL);
-            STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
-            assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
-            DECREF_INPUTS();
+            PyObject *res_o = _Py_BuiltinCallFastWithKeywords_StackRefSteal(callable, arguments, total_args);
+            DEAD(args);
+            DEAD(self_or_null);
+            DEAD(callable);
             ERROR_IF(res_o == NULL);
             res = PyStackRef_FromPyObjectSteal(res_o);
         }
@@ -4468,19 +4429,16 @@ dummy_func(
             assert(self != NULL);
             EXIT_IF(!Py_IS_TYPE(self, d_type));
             STAT_INC(CALL, hit);
-            int nargs = total_args - 1;
-
-            STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
-            if (CONVERSION_FAILED(args_o)) {
-                DECREF_INPUTS();
-                ERROR_IF(true);
-            }
-            PyCFunctionFastWithKeywords cfunc =
-                _PyCFunctionFastWithKeywords_CAST(meth->ml_meth);
-            PyObject *res_o = cfunc(self, (args_o + 1), nargs, NULL);
-            STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
-            assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
-            DECREF_INPUTS();
+            PyObject *res_o = _PyCallMethodDescriptorFastWithKeywords_StackRefSteal(
+                callable,
+                meth,
+                self,
+                arguments,
+                total_args
+            );
+            DEAD(args);
+            DEAD(self_or_null);
+            DEAD(callable);
             ERROR_IF(res_o == NULL);
             res = PyStackRef_FromPyObjectSteal(res_o);
         }
@@ -4548,18 +4506,16 @@ dummy_func(
             assert(self != NULL);
             EXIT_IF(!Py_IS_TYPE(self, method->d_common.d_type));
             STAT_INC(CALL, hit);
-            int nargs = total_args - 1;
-
-            STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
-            if (CONVERSION_FAILED(args_o)) {
-                DECREF_INPUTS();
-                ERROR_IF(true);
-            }
-            PyCFunctionFast cfunc = _PyCFunctionFast_CAST(meth->ml_meth);
-            PyObject *res_o = cfunc(self, (args_o + 1), nargs);
-            STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
-            assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
-            DECREF_INPUTS();
+            PyObject *res_o = _PyCallMethodDescriptorFast_StackRefSteal(
+                callable,
+                meth,
+                self,
+                arguments,
+                total_args
+            );
+            DEAD(args);
+            DEAD(self_or_null);
+            DEAD(callable);
             ERROR_IF(res_o == NULL);
             res = PyStackRef_FromPyObjectSteal(res_o);
         }
@@ -4792,30 +4748,21 @@ dummy_func(
 #if TIER_ONE
             assert(opcode != INSTRUMENTED_CALL);
 #endif
-            PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
-
             int total_args = oparg;
             _PyStackRef *arguments = args;
             if (!PyStackRef_IsNull(self_or_null)) {
                 arguments--;
                 total_args++;
             }
-            /* Callable is not a normal Python function */
-            STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
-            if (CONVERSION_FAILED(args_o)) {
-                DECREF_INPUTS();
-                ERROR_IF(true);
-            }
-            PyObject *kwnames_o = PyStackRef_AsPyObjectBorrow(kwnames);
-            int positional_args = total_args - (int)PyTuple_GET_SIZE(kwnames_o);
-            PyObject *res_o = PyObject_Vectorcall(
-                callable_o, args_o,
-                positional_args | PY_VECTORCALL_ARGUMENTS_OFFSET,
-                kwnames_o);
-            PyStackRef_CLOSE(kwnames);
-            STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
-            assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
-            DECREF_INPUTS();
+            PyObject *res_o = _Py_VectorCall_StackRefSteal(
+                callable,
+                arguments,
+                total_args,
+                kwnames);
+            DEAD(kwnames);
+            DEAD(args);
+            DEAD(self_or_null);
+            DEAD(callable);
             ERROR_IF(res_o == NULL);
             res = PyStackRef_FromPyObjectSteal(res_o);
         }
@@ -5650,7 +5597,7 @@ dummy_func(
                 LEAVE_TRACING();
                 int err = stop_tracing_and_jit(tstate, frame);
                 ERROR_IF(err < 0);
-                DISPATCH_GOTO_NON_TRACING();
+                DISPATCH();
             }
             // Super instructions. Instruction deopted. There's a mismatch in what the stack expects
             // in the optimizer. So we have to reflect in the trace correctly.
diff --git a/Python/ceval.c b/Python/ceval.c
index 14fef42ea96..a1d54bd058b 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -362,9 +362,11 @@ _Py_ReachedRecursionLimitWithMargin(PyThreadState *tstate, int margin_count)
         _Py_InitializeRecursionLimits(tstate);
     }
 #if _Py_STACK_GROWS_DOWN
-    return here_addr <= _tstate->c_stack_soft_limit + margin_count * _PyOS_STACK_MARGIN_BYTES;
+    return here_addr <= _tstate->c_stack_soft_limit + margin_count * _PyOS_STACK_MARGIN_BYTES &&
+        here_addr >= _tstate->c_stack_soft_limit - 2 * _PyOS_STACK_MARGIN_BYTES;
 #else
-    return here_addr > _tstate->c_stack_soft_limit - margin_count * _PyOS_STACK_MARGIN_BYTES;
+    return here_addr > _tstate->c_stack_soft_limit - margin_count * _PyOS_STACK_MARGIN_BYTES &&
+        here_addr <= _tstate->c_stack_soft_limit + 2 * _PyOS_STACK_MARGIN_BYTES;
 #endif
 }
 
@@ -455,7 +457,7 @@ int pthread_attr_destroy(pthread_attr_t *a)
 #endif
 
 static void
-hardware_stack_limits(uintptr_t *base, uintptr_t *top)
+hardware_stack_limits(uintptr_t *base, uintptr_t *top, uintptr_t sp)
 {
 #ifdef WIN32
     ULONG_PTR low, high;
@@ -491,10 +493,19 @@ hardware_stack_limits(uintptr_t *base, uintptr_t *top)
         return;
     }
 #  endif
-    uintptr_t here_addr = _Py_get_machine_stack_pointer();
-    uintptr_t top_addr = _Py_SIZE_ROUND_UP(here_addr, 4096);
+    // Add some space for caller function then round to minimum page size
+    // This is a guess at the top of the stack, but should be a reasonably
+    // good guess if called from _PyThreadState_Attach when creating a thread.
+    // If the thread is attached deep in a call stack, then the guess will be poor.
+#if _Py_STACK_GROWS_DOWN
+    uintptr_t top_addr = _Py_SIZE_ROUND_UP(sp + 8*sizeof(void*), SYSTEM_PAGE_SIZE);
     *top = top_addr;
     *base = top_addr - Py_C_STACK_SIZE;
+#  else
+    uintptr_t base_addr = _Py_SIZE_ROUND_DOWN(sp - 8*sizeof(void*), SYSTEM_PAGE_SIZE);
+    *base = base_addr;
+    *top = base_addr + Py_C_STACK_SIZE;
+#endif
 #endif
 }
 
@@ -543,7 +554,8 @@ void
 _Py_InitializeRecursionLimits(PyThreadState *tstate)
 {
     uintptr_t base, top;
-    hardware_stack_limits(&base, &top);
+    uintptr_t here_addr = _Py_get_machine_stack_pointer();
+    hardware_stack_limits(&base, &top, here_addr);
     assert(top != 0);
 
     tstate_set_stack(tstate, base, top);
@@ -587,7 +599,7 @@ PyUnstable_ThreadState_ResetStackProtection(PyThreadState *tstate)
 
 
 /* The function _Py_EnterRecursiveCallTstate() only calls _Py_CheckRecursiveCall()
-   if the recursion_depth reaches recursion_limit. */
+   if the stack pointer is between the stack base and c_stack_hard_limit. */
 int
 _Py_CheckRecursiveCall(PyThreadState *tstate, const char *where)
 {
@@ -596,10 +608,12 @@ _Py_CheckRecursiveCall(PyThreadState *tstate, const char *where)
     assert(_tstate->c_stack_soft_limit != 0);
     assert(_tstate->c_stack_hard_limit != 0);
 #if _Py_STACK_GROWS_DOWN
+    assert(here_addr >= _tstate->c_stack_hard_limit - _PyOS_STACK_MARGIN_BYTES);
     if (here_addr < _tstate->c_stack_hard_limit) {
         /* Overflowing while handling an overflow. Give up. */
         int kbytes_used = (int)(_tstate->c_stack_top - here_addr)/1024;
 #else
+    assert(here_addr <= _tstate->c_stack_hard_limit + _PyOS_STACK_MARGIN_BYTES);
     if (here_addr > _tstate->c_stack_hard_limit) {
         /* Overflowing while handling an overflow. Give up. */
         int kbytes_used = (int)(here_addr - _tstate->c_stack_top)/1024;
@@ -1003,6 +1017,283 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
 
 #include "ceval_macros.h"
 
+
+/* Helper functions to keep the size of the largest uops down */
+
+PyObject *
+_Py_VectorCall_StackRefSteal(
+    _PyStackRef callable,
+    _PyStackRef *arguments,
+    int total_args,
+    _PyStackRef kwnames)
+{
+    PyObject *res;
+    STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
+    if (CONVERSION_FAILED(args_o)) {
+        res = NULL;
+        goto cleanup;
+    }
+    PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
+    PyObject *kwnames_o = PyStackRef_AsPyObjectBorrow(kwnames);
+    int positional_args = total_args;
+    if (kwnames_o != NULL) {
+        positional_args -= (int)PyTuple_GET_SIZE(kwnames_o);
+    }
+    res = PyObject_Vectorcall(
+        callable_o, args_o,
+        positional_args | PY_VECTORCALL_ARGUMENTS_OFFSET,
+        kwnames_o);
+    STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
+    assert((res != NULL) ^ (PyErr_Occurred() != NULL));
+cleanup:
+    PyStackRef_XCLOSE(kwnames);
+    // arguments is a pointer into the GC visible stack,
+    // so we must NULL out values as we clear them.
+    for (int i = total_args-1; i >= 0; i--) {
+        _PyStackRef tmp = arguments[i];
+        arguments[i] = PyStackRef_NULL;
+        PyStackRef_CLOSE(tmp);
+    }
+    PyStackRef_CLOSE(callable);
+    return res;
+}
+
+PyObject *
+_Py_BuiltinCallFast_StackRefSteal(
+    _PyStackRef callable,
+    _PyStackRef *arguments,
+    int total_args)
+{
+    PyObject *res;
+    STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
+    if (CONVERSION_FAILED(args_o)) {
+        res = NULL;
+        goto cleanup;
+    }
+    PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
+    PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable_o);
+    res = _PyCFunctionFast_CAST(cfunc)(
+        PyCFunction_GET_SELF(callable_o),
+        args_o,
+        total_args
+    );
+    STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
+    assert((res != NULL) ^ (PyErr_Occurred() != NULL));
+cleanup:
+    // arguments is a pointer into the GC visible stack,
+    // so we must NULL out values as we clear them.
+    for (int i = total_args-1; i >= 0; i--) {
+        _PyStackRef tmp = arguments[i];
+        arguments[i] = PyStackRef_NULL;
+        PyStackRef_CLOSE(tmp);
+    }
+    PyStackRef_CLOSE(callable);
+    return res;
+}
+
+PyObject *
+_Py_BuiltinCallFastWithKeywords_StackRefSteal(
+    _PyStackRef callable,
+    _PyStackRef *arguments,
+    int total_args)
+{
+    PyObject *res;
+    STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
+    if (CONVERSION_FAILED(args_o)) {
+        res = NULL;
+        goto cleanup;
+    }
+    PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
+    PyCFunctionFastWithKeywords cfunc =
+        _PyCFunctionFastWithKeywords_CAST(PyCFunction_GET_FUNCTION(callable_o));
+    res = cfunc(PyCFunction_GET_SELF(callable_o), args_o, total_args, NULL);
+    STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
+    assert((res != NULL) ^ (PyErr_Occurred() != NULL));
+cleanup:
+    // arguments is a pointer into the GC visible stack,
+    // so we must NULL out values as we clear them.
+    for (int i = total_args-1; i >= 0; i--) {
+        _PyStackRef tmp = arguments[i];
+        arguments[i] = PyStackRef_NULL;
+        PyStackRef_CLOSE(tmp);
+    }
+    PyStackRef_CLOSE(callable);
+    return res;
+}
+
+PyObject *
+_PyCallMethodDescriptorFast_StackRefSteal(
+    _PyStackRef callable,
+    PyMethodDef *meth,
+    PyObject *self,
+    _PyStackRef *arguments,
+    int total_args)
+{
+    PyObject *res;
+    STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
+    if (CONVERSION_FAILED(args_o)) {
+        res = NULL;
+        goto cleanup;
+    }
+    assert(((PyMethodDescrObject *)PyStackRef_AsPyObjectBorrow(callable))->d_method == meth);
+    assert(self == PyStackRef_AsPyObjectBorrow(arguments[0]));
+
+    PyCFunctionFast cfunc = _PyCFunctionFast_CAST(meth->ml_meth);
+    res = cfunc(self, (args_o + 1), total_args - 1);
+    STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
+    assert((res != NULL) ^ (PyErr_Occurred() != NULL));
+cleanup:
+    // arguments is a pointer into the GC visible stack,
+    // so we must NULL out values as we clear them.
+    for (int i = total_args-1; i >= 0; i--) {
+        _PyStackRef tmp = arguments[i];
+        arguments[i] = PyStackRef_NULL;
+        PyStackRef_CLOSE(tmp);
+    }
+    PyStackRef_CLOSE(callable);
+    return res;
+}
+
+PyObject *
+_PyCallMethodDescriptorFastWithKeywords_StackRefSteal(
+    _PyStackRef callable,
+    PyMethodDef *meth,
+    PyObject *self,
+    _PyStackRef *arguments,
+    int total_args)
+{
+    PyObject *res;
+    STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
+    if (CONVERSION_FAILED(args_o)) {
+        res = NULL;
+        goto cleanup;
+    }
+    assert(((PyMethodDescrObject *)PyStackRef_AsPyObjectBorrow(callable))->d_method == meth);
+    assert(self == PyStackRef_AsPyObjectBorrow(arguments[0]));
+
+    PyCFunctionFastWithKeywords cfunc =
+        _PyCFunctionFastWithKeywords_CAST(meth->ml_meth);
+    res = cfunc(self, (args_o + 1), total_args-1, NULL);
+    STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
+    assert((res != NULL) ^ (PyErr_Occurred() != NULL));
+cleanup:
+    // arguments is a pointer into the GC visible stack,
+    // so we must NULL out values as we clear them.
+    for (int i = total_args-1; i >= 0; i--) {
+        _PyStackRef tmp = arguments[i];
+        arguments[i] = PyStackRef_NULL;
+        PyStackRef_CLOSE(tmp);
+    }
+    PyStackRef_CLOSE(callable);
+    return res;
+}
+
+PyObject *
+_Py_CallBuiltinClass_StackRefSteal(
+    _PyStackRef callable,
+    _PyStackRef *arguments,
+    int total_args)
+{
+    PyObject *res;
+    STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
+    if (CONVERSION_FAILED(args_o)) {
+        res = NULL;
+        goto cleanup;
+    }
+    PyTypeObject *tp = (PyTypeObject *)PyStackRef_AsPyObjectBorrow(callable);
+    res = tp->tp_vectorcall((PyObject *)tp, args_o, total_args, NULL);
+    STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
+    assert((res != NULL) ^ (PyErr_Occurred() != NULL));
+cleanup:
+    // arguments is a pointer into the GC visible stack,
+    // so we must NULL out values as we clear them.
+    for (int i = total_args-1; i >= 0; i--) {
+        _PyStackRef tmp = arguments[i];
+        arguments[i] = PyStackRef_NULL;
+        PyStackRef_CLOSE(tmp);
+    }
+    PyStackRef_CLOSE(callable);
+    return res;
+}
+
+PyObject *
+_Py_BuildString_StackRefSteal(
+    _PyStackRef *arguments,
+    int total_args)
+{
+    PyObject *res;
+    STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
+    if (CONVERSION_FAILED(args_o)) {
+        res = NULL;
+        goto cleanup;
+    }
+    res = _PyUnicode_JoinArray(&_Py_STR(empty), args_o, total_args);
+    STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
+    assert((res != NULL) ^ (PyErr_Occurred() != NULL));
+cleanup:
+    // arguments is a pointer into the GC visible stack,
+    // so we must NULL out values as we clear them.
+    for (int i = total_args-1; i >= 0; i--) {
+        _PyStackRef tmp = arguments[i];
+        arguments[i] = PyStackRef_NULL;
+        PyStackRef_CLOSE(tmp);
+    }
+    return res;
+}
+
+PyObject *
+_Py_BuildMap_StackRefSteal(
+    _PyStackRef *arguments,
+    int half_args)
+{
+    PyObject *res;
+    STACKREFS_TO_PYOBJECTS(arguments, half_args*2, args_o);
+    if (CONVERSION_FAILED(args_o)) {
+        res = NULL;
+        goto cleanup;
+    }
+    res = _PyDict_FromItems(
+        args_o, 2,
+        args_o+1, 2,
+        half_args
+    );
+    STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
+    assert((res != NULL) ^ (PyErr_Occurred() != NULL));
+cleanup:
+    // arguments is a pointer into the GC visible stack,
+    // so we must NULL out values as we clear them.
+    for (int i = half_args*2-1; i >= 0; i--) {
+        _PyStackRef tmp = arguments[i];
+        arguments[i] = PyStackRef_NULL;
+        PyStackRef_CLOSE(tmp);
+    }
+    return res;
+}
+
+#ifdef Py_DEBUG
+void
+_Py_assert_within_stack_bounds(
+    _PyInterpreterFrame *frame, _PyStackRef *stack_pointer,
+    const char *filename, int lineno
+) {
+    if (frame->owner == FRAME_OWNED_BY_INTERPRETER) {
+        return;
+    }
+    int level = (int)(stack_pointer - _PyFrame_Stackbase(frame));
+    if (level < 0) {
+        printf("Stack underflow (depth = %d) at %s:%d\n", level, filename, lineno);
+        fflush(stdout);
+        abort();
+    }
+    int size = _PyFrame_GetCode(frame)->co_stacksize;
+    if (level > size) {
+        printf("Stack overflow (depth = %d) at %s:%d\n", level, filename, lineno);
+        fflush(stdout);
+        abort();
+    }
+}
+#endif
+
 int _Py_CheckRecursiveCallPy(
     PyThreadState *tstate)
 {
@@ -1064,11 +1355,12 @@ _PyObjectArray_FromStackRefArray(_PyStackRef *input, Py_ssize_t nargs, PyObject
         if (result == NULL) {
             return NULL;
         }
-        result++;
     }
     else {
         result = scratch;
     }
+    result++;
+    result[0] = NULL; /* Keep GCC happy */
     for (int i = 0; i < nargs; i++) {
         result[i] = PyStackRef_AsPyObjectBorrow(input[i]);
     }
@@ -1083,6 +1375,12 @@ _PyObjectArray_Free(PyObject **array, PyObject **scratch)
     }
 }
 
+#ifdef Py_DEBUG
+#define ASSERT_WITHIN_STACK_BOUNDS(F, L) _Py_assert_within_stack_bounds(frame, stack_pointer, (F), (L))
+#else
+#define ASSERT_WITHIN_STACK_BOUNDS(F, L) (void)0
+#endif
+
 #if _Py_TIER2
 // 0 for success, -1  for error.
 static int
@@ -1990,6 +2288,16 @@ clear_gen_frame(PyThreadState *tstate, _PyInterpreterFrame * frame)
 void
 _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame * frame)
 {
+    // Update last_profiled_frame for remote profiler frame caching.
+    // By this point, tstate->current_frame is already set to the parent frame.
+    // Only update if we're popping the exact frame that was last profiled.
+    // This avoids corrupting the cache when transient frames (called and returned
+    // between profiler samples) update last_profiled_frame to addresses the
+    // profiler never saw.
+    if (tstate->last_profiled_frame != NULL && tstate->last_profiled_frame == frame) {
+        tstate->last_profiled_frame = tstate->current_frame;
+    }
+
     if (frame->owner == FRAME_OWNED_BY_THREAD) {
         clear_thread_frame(tstate, frame);
     }
@@ -2046,7 +2354,7 @@ _PyEvalFramePushAndInit_Ex(PyThreadState *tstate, _PyStackRef func,
     PyObject *kwnames = NULL;
     _PyStackRef *newargs;
     PyObject *const *object_array = NULL;
-    _PyStackRef stack_array[8];
+    _PyStackRef stack_array[8] = {0};
     if (has_dict) {
         object_array = _PyStack_UnpackDict(tstate, _PyTuple_ITEMS(callargs), nargs, kwargs, &kwnames);
         if (object_array == NULL) {
@@ -2109,7 +2417,7 @@ _PyEval_Vector(PyThreadState *tstate, PyFunctionObject *func,
     if (kwnames) {
         total_args += PyTuple_GET_SIZE(kwnames);
     }
-    _PyStackRef stack_array[8];
+    _PyStackRef stack_array[8] = {0};
     _PyStackRef *arguments;
     if (total_args <= 8) {
         arguments = stack_array;
@@ -2814,12 +3122,6 @@ _PyEval_GetBuiltin(PyObject *name)
     return attr;
 }
 
-PyObject *
-_PyEval_GetBuiltinId(_Py_Identifier *name)
-{
-    return _PyEval_GetBuiltin(_PyUnicode_FromId(name));
-}
-
 PyObject *
 PyEval_GetLocals(void)
 {
@@ -3050,6 +3352,9 @@ PyEval_MergeCompilerFlags(PyCompilerFlags *cf)
 {
     PyThreadState *tstate = _PyThreadState_GET();
     _PyInterpreterFrame *current_frame = tstate->current_frame;
+    if (current_frame == tstate->base_frame) {
+        current_frame = NULL;
+    }
     int result = cf->cf_flags != 0;
 
     if (current_frame != NULL) {
diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h
index c30638c221a..edf8fc9a57d 100644
--- a/Python/ceval_macros.h
+++ b/Python/ceval_macros.h
@@ -458,7 +458,7 @@ do {                                                   \
 #define STACKREFS_TO_PYOBJECTS(ARGS, ARG_COUNT, NAME) \
     /* +1 because vectorcall might use -1 to write self */ \
     PyObject *NAME##_temp[MAX_STACKREF_SCRATCH+1]; \
-    PyObject **NAME = _PyObjectArray_FromStackRefArray(ARGS, ARG_COUNT, NAME##_temp + 1);
+    PyObject **NAME = _PyObjectArray_FromStackRefArray(ARGS, ARG_COUNT, NAME##_temp);
 
 #define STACKREFS_TO_PYOBJECTS_CLEANUP(NAME) \
     /* +1 because we +1 previously */ \
diff --git a/Python/critical_section.c b/Python/critical_section.c
index 218b580e951..2c2152f5de4 100644
--- a/Python/critical_section.c
+++ b/Python/critical_section.c
@@ -17,10 +17,9 @@ untag_critical_section(uintptr_t tag)
 #endif
 
 void
-_PyCriticalSection_BeginSlow(PyCriticalSection *c, PyMutex *m)
+_PyCriticalSection_BeginSlow(PyThreadState *tstate, PyCriticalSection *c, PyMutex *m)
 {
 #ifdef Py_GIL_DISABLED
-    PyThreadState *tstate = _PyThreadState_GET();
     // As an optimisation for locking the same object recursively, skip
     // locking if the mutex is currently locked by the top-most critical
     // section.
@@ -53,11 +52,10 @@ _PyCriticalSection_BeginSlow(PyCriticalSection *c, PyMutex *m)
 }
 
 void
-_PyCriticalSection2_BeginSlow(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2,
+_PyCriticalSection2_BeginSlow(PyThreadState *tstate, PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2,
                               int is_m1_locked)
 {
 #ifdef Py_GIL_DISABLED
-    PyThreadState *tstate = _PyThreadState_GET();
     c->_cs_base._cs_mutex = NULL;
     c->_cs_mutex2 = NULL;
     c->_cs_base._cs_prev = tstate->critical_section;
@@ -139,7 +137,7 @@ void
 PyCriticalSection_Begin(PyCriticalSection *c, PyObject *op)
 {
 #ifdef Py_GIL_DISABLED
-    _PyCriticalSection_Begin(c, op);
+    _PyCriticalSection_Begin(_PyThreadState_GET(), c, op);
 #endif
 }
 
@@ -148,7 +146,7 @@ void
 PyCriticalSection_BeginMutex(PyCriticalSection *c, PyMutex *m)
 {
 #ifdef Py_GIL_DISABLED
-    _PyCriticalSection_BeginMutex(c, m);
+    _PyCriticalSection_BeginMutex(_PyThreadState_GET(), c, m);
 #endif
 }
 
@@ -157,7 +155,7 @@ void
 PyCriticalSection_End(PyCriticalSection *c)
 {
 #ifdef Py_GIL_DISABLED
-    _PyCriticalSection_End(c);
+    _PyCriticalSection_End(_PyThreadState_GET(), c);
 #endif
 }
 
@@ -166,7 +164,7 @@ void
 PyCriticalSection2_Begin(PyCriticalSection2 *c, PyObject *a, PyObject *b)
 {
 #ifdef Py_GIL_DISABLED
-    _PyCriticalSection2_Begin(c, a, b);
+    _PyCriticalSection2_Begin(_PyThreadState_GET(), c, a, b);
 #endif
 }
 
@@ -175,7 +173,7 @@ void
 PyCriticalSection2_BeginMutex(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2)
 {
 #ifdef Py_GIL_DISABLED
-    _PyCriticalSection2_BeginMutex(c, m1, m2);
+    _PyCriticalSection2_BeginMutex(_PyThreadState_GET(), c, m1, m2);
 #endif
 }
 
@@ -184,6 +182,6 @@ void
 PyCriticalSection2_End(PyCriticalSection2 *c)
 {
 #ifdef Py_GIL_DISABLED
-    _PyCriticalSection2_End(c);
+    _PyCriticalSection2_End(_PyThreadState_GET(), c);
 #endif
 }
diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h
index e1edd20b778..7273a87681b 100644
--- a/Python/executor_cases.c.h
+++ b/Python/executor_cases.c.h
@@ -84,7 +84,7 @@
             value = PyStackRef_DUP(value_s);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -96,7 +96,7 @@
             value = PyStackRef_DUP(GETLOCAL(oparg));
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -108,7 +108,7 @@
             value = PyStackRef_DUP(GETLOCAL(oparg));
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -120,7 +120,7 @@
             value = PyStackRef_DUP(GETLOCAL(oparg));
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -132,7 +132,7 @@
             value = PyStackRef_DUP(GETLOCAL(oparg));
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -144,7 +144,7 @@
             value = PyStackRef_DUP(GETLOCAL(oparg));
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -156,7 +156,7 @@
             value = PyStackRef_DUP(GETLOCAL(oparg));
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -168,7 +168,7 @@
             value = PyStackRef_DUP(GETLOCAL(oparg));
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -180,7 +180,7 @@
             value = PyStackRef_DUP(GETLOCAL(oparg));
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -191,7 +191,7 @@
             value = PyStackRef_DUP(GETLOCAL(oparg));
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -203,7 +203,7 @@
             value = PyStackRef_Borrow(GETLOCAL(oparg));
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -215,7 +215,7 @@
             value = PyStackRef_Borrow(GETLOCAL(oparg));
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -227,7 +227,7 @@
             value = PyStackRef_Borrow(GETLOCAL(oparg));
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -239,7 +239,7 @@
             value = PyStackRef_Borrow(GETLOCAL(oparg));
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -251,7 +251,7 @@
             value = PyStackRef_Borrow(GETLOCAL(oparg));
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -263,7 +263,7 @@
             value = PyStackRef_Borrow(GETLOCAL(oparg));
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -275,7 +275,7 @@
             value = PyStackRef_Borrow(GETLOCAL(oparg));
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -287,7 +287,7 @@
             value = PyStackRef_Borrow(GETLOCAL(oparg));
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -298,7 +298,7 @@
             value = PyStackRef_Borrow(GETLOCAL(oparg));
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -309,7 +309,7 @@
             GETLOCAL(oparg) = PyStackRef_NULL;
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -320,7 +320,7 @@
             value = PyStackRef_FromPyObjectBorrow(obj);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -333,7 +333,7 @@
             value = PyStackRef_FromPyObjectBorrow(obj);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -346,7 +346,7 @@
             value = PyStackRef_FromPyObjectBorrow(obj);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -359,7 +359,7 @@
             value = PyStackRef_FromPyObjectBorrow(obj);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -372,7 +372,7 @@
             value = PyStackRef_FromPyObjectBorrow(obj);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -384,7 +384,7 @@
             value = PyStackRef_FromPyObjectBorrow(obj);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -396,7 +396,7 @@
             _PyStackRef tmp = GETLOCAL(oparg);
             GETLOCAL(oparg) = value;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_XCLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -411,7 +411,7 @@
             _PyStackRef tmp = GETLOCAL(oparg);
             GETLOCAL(oparg) = value;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_XCLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -426,7 +426,7 @@
             _PyStackRef tmp = GETLOCAL(oparg);
             GETLOCAL(oparg) = value;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_XCLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -441,7 +441,7 @@
             _PyStackRef tmp = GETLOCAL(oparg);
             GETLOCAL(oparg) = value;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_XCLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -456,7 +456,7 @@
             _PyStackRef tmp = GETLOCAL(oparg);
             GETLOCAL(oparg) = value;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_XCLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -471,7 +471,7 @@
             _PyStackRef tmp = GETLOCAL(oparg);
             GETLOCAL(oparg) = value;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_XCLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -486,7 +486,7 @@
             _PyStackRef tmp = GETLOCAL(oparg);
             GETLOCAL(oparg) = value;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_XCLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -501,7 +501,7 @@
             _PyStackRef tmp = GETLOCAL(oparg);
             GETLOCAL(oparg) = value;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_XCLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -515,7 +515,7 @@
             _PyStackRef tmp = GETLOCAL(oparg);
             GETLOCAL(oparg) = value;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_XCLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -526,7 +526,7 @@
             _PyStackRef value;
             value = stack_pointer[-1];
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_XCLOSE(value);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -539,7 +539,7 @@
             assert(PyStackRef_IsNull(value) || (!PyStackRef_RefcountOnObject(value)) ||
                    _Py_IsImmortal((PyStackRef_AsPyObjectBorrow(value))));
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -549,7 +549,7 @@
             assert(PyLong_CheckExact(PyStackRef_AsPyObjectBorrow(value)));
             PyStackRef_CLOSE_SPECIALIZED(value, _PyLong_ExactDealloc);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -559,7 +559,7 @@
             assert(PyFloat_CheckExact(PyStackRef_AsPyObjectBorrow(value)));
             PyStackRef_CLOSE_SPECIALIZED(value, _PyFloat_ExactDealloc);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -569,7 +569,7 @@
             assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(value)));
             PyStackRef_CLOSE_SPECIALIZED(value, _PyUnicode_ExactDealloc);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -579,12 +579,12 @@
             tos = stack_pointer[-1];
             nos = stack_pointer[-2];
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(tos);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(nos);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -596,7 +596,7 @@
             res = PyStackRef_NULL;
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -604,7 +604,7 @@
             _PyStackRef value;
             value = stack_pointer[-1];
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(value);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -618,7 +618,7 @@
             iter = stack_pointer[-2];
             (void)index_or_null;
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(iter);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -634,7 +634,7 @@
             val = value;
             stack_pointer[-2] = val;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(receiver);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -649,7 +649,7 @@
             PyObject *res_o = PyNumber_Negative(PyStackRef_AsPyObjectBorrow(value));
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(value);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -659,7 +659,7 @@
             res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -682,7 +682,7 @@
             int err = PyObject_IsTrue(PyStackRef_AsPyObjectBorrow(value));
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(value);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -692,7 +692,7 @@
             res = err ? PyStackRef_True : PyStackRef_False;
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -723,7 +723,7 @@
             }
             else {
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(value);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -833,7 +833,7 @@
             else {
                 assert(Py_SIZE(value_o));
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(value);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -849,14 +849,14 @@
             _PyStackRef res;
             value = stack_pointer[-1];
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(value);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             res = PyStackRef_True;
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -868,7 +868,7 @@
             PyObject *res_o = PyNumber_Invert(PyStackRef_AsPyObjectBorrow(value));
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(value);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -878,7 +878,7 @@
             res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -949,7 +949,7 @@
             PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc);
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -974,7 +974,7 @@
             PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc);
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -999,7 +999,7 @@
             PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc);
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1043,12 +1043,12 @@
             if (PyStackRef_IsNull(res)) {
                 stack_pointer[-2] = res;
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 JUMP_TO_ERROR();
             }
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1070,12 +1070,12 @@
             if (PyStackRef_IsNull(res)) {
                 stack_pointer[-2] = res;
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 JUMP_TO_ERROR();
             }
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1097,12 +1097,12 @@
             if (PyStackRef_IsNull(res)) {
                 stack_pointer[-2] = res;
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 JUMP_TO_ERROR();
             }
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1124,12 +1124,12 @@
             if (PyStackRef_IsNull(res)) {
                 stack_pointer[-2] = res;
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 JUMP_TO_ERROR();
             }
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1151,12 +1151,12 @@
             if (PyStackRef_IsNull(res)) {
                 stack_pointer[-2] = res;
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 JUMP_TO_ERROR();
             }
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1178,12 +1178,12 @@
             if (PyStackRef_IsNull(res)) {
                 stack_pointer[-2] = res;
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 JUMP_TO_ERROR();
             }
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1203,13 +1203,13 @@
             PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc);
             if (res_o == NULL) {
                 stack_pointer += -2;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 JUMP_TO_ERROR();
             }
             res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1240,7 +1240,7 @@
             PyObject *temp = PyStackRef_AsPyObjectSteal(*target_local);
             PyObject *right_o = PyStackRef_AsPyObjectSteal(right);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyUnicode_Append(&temp, right_o);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -1304,11 +1304,11 @@
             PyStackRef_CLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1330,7 +1330,7 @@
             }
             else {
                 stack_pointer += -2;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 res_o = PyObject_GetItem(PyStackRef_AsPyObjectBorrow(container), slice);
                 Py_DECREF(slice);
@@ -1338,7 +1338,7 @@
                 stack_pointer += 2;
             }
             stack_pointer += -3;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(container);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -1348,7 +1348,7 @@
             res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1371,7 +1371,7 @@
             }
             else {
                 stack_pointer += -2;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 err = PyObject_SetItem(PyStackRef_AsPyObjectBorrow(container), slice, PyStackRef_AsPyObjectBorrow(v));
                 Py_DECREF(slice);
@@ -1389,7 +1389,7 @@
             PyStackRef_CLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -4;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (err) {
                 JUMP_TO_ERROR();
             }
@@ -1443,7 +1443,7 @@
             PyStackRef_CLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1472,14 +1472,14 @@
             PyStackRef_CLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (res_o == NULL) {
                 JUMP_TO_ERROR();
             }
             res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1511,14 +1511,14 @@
             PyObject *res_o = (PyObject*)&_Py_SINGLETON(strings).ascii[c];
             PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(str_st);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             res = PyStackRef_FromPyObjectBorrow(res_o);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1569,7 +1569,7 @@
             PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc);
             res = PyStackRef_FromPyObjectNew(res_o);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             _PyStackRef tmp = tuple_st;
             tuple_st = res;
@@ -1631,14 +1631,14 @@
             PyStackRef_CLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (rc <= 0) {
                 JUMP_TO_ERROR();
             }
             res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1673,7 +1673,7 @@
             STAT_INC(BINARY_OP, hit);
             stack_pointer[0] = getitem;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1692,7 +1692,7 @@
             new_frame = PyStackRef_Wrap(pushed_frame);
             stack_pointer[-3] = new_frame;
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1706,11 +1706,11 @@
                 PyStackRef_AsPyObjectSteal(v));
             if (err < 0) {
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 JUMP_TO_ERROR();
             }
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1726,11 +1726,11 @@
             stack_pointer = _PyFrame_GetStackPointer(frame);
             if (err) {
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 JUMP_TO_ERROR();
             }
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1757,7 +1757,7 @@
             PyStackRef_CLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -3;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (err) {
                 JUMP_TO_ERROR();
             }
@@ -1799,7 +1799,7 @@
             UNLOCK_OBJECT(list);
             PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc);
             stack_pointer += -3;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(list_st);
             Py_DECREF(old_value);
@@ -1823,7 +1823,7 @@
                 PyStackRef_AsPyObjectSteal(value));
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -3;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(dict_st);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -1851,7 +1851,7 @@
             PyStackRef_CLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (err) {
                 JUMP_TO_ERROR();
             }
@@ -1868,7 +1868,7 @@
             PyObject *res_o = _PyIntrinsics_UnaryFunctions[oparg].func(tstate, PyStackRef_AsPyObjectBorrow(value));
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(value);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -1878,7 +1878,7 @@
             res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1904,14 +1904,14 @@
             PyStackRef_CLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (res_o == NULL) {
                 JUMP_TO_ERROR();
             }
             res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1922,7 +1922,7 @@
             assert(frame->owner != FRAME_OWNED_BY_INTERPRETER);
             _PyStackRef temp = PyStackRef_MakeHeapSafe(retval);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             assert(STACK_LEVEL() == 0);
             _Py_LeaveRecursiveCallPy(tstate);
@@ -1935,7 +1935,7 @@
             LLTRACE_RESUME_FRAME();
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1958,7 +1958,7 @@
                               type->tp_name);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(obj);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -1968,7 +1968,7 @@
             iter_o = (*getter)(obj_o);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(obj);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -1989,7 +1989,7 @@
             iter = PyStackRef_FromPyObjectSteal(iter_o);
             stack_pointer[0] = iter;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2006,7 +2006,7 @@
             awaitable = PyStackRef_FromPyObjectSteal(awaitable_o);
             stack_pointer[0] = awaitable;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2019,7 +2019,7 @@
             PyObject *iter_o = _PyEval_GetAwaitable(PyStackRef_AsPyObjectBorrow(iterable), oparg);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(iterable);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -2029,7 +2029,7 @@
             iter = PyStackRef_FromPyObjectSteal(iter_o);
             stack_pointer[0] = iter;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2078,7 +2078,7 @@
             gen->gi_frame_state = FRAME_SUSPENDED + oparg;
             _PyStackRef temp = retval;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             tstate->exc_info = gen->gi_exc_state.previous_item;
             gen->gi_exc_state.previous_item = NULL;
@@ -2101,7 +2101,7 @@
             LLTRACE_RESUME_FRAME();
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2115,7 +2115,7 @@
                        ? NULL : PyStackRef_AsPyObjectSteal(exc_value));
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2126,7 +2126,7 @@
             value = PyStackRef_FromPyObjectNew(tstate->interp->common_consts[oparg]);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2149,7 +2149,7 @@
             bc = PyStackRef_FromPyObjectSteal(bc_o);
             stack_pointer[0] = bc;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2166,7 +2166,7 @@
                               "no locals found when storing %R", name);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(v);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -2183,7 +2183,7 @@
                 stack_pointer = _PyFrame_GetStackPointer(frame);
             }
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(v);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -2227,7 +2227,7 @@
             top = &stack_pointer[-1 + oparg];
             PyObject *seq_o = PyStackRef_AsPyObjectSteal(seq);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             int res = _PyEval_UnpackIterableStackRef(tstate, seq_o, oparg, -1, top);
             Py_DECREF(seq_o);
@@ -2236,7 +2236,7 @@
                 JUMP_TO_ERROR();
             }
             stack_pointer += oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2259,7 +2259,7 @@
             stack_pointer[-1] = val1;
             stack_pointer[0] = val0;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(seq);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -2284,7 +2284,7 @@
                 *values++ = PyStackRef_FromPyObjectNew(items[i]);
             }
             stack_pointer += -1 + oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(seq);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -2317,7 +2317,7 @@
             }
             UNLOCK_OBJECT(seq_o);
             stack_pointer += -1 + oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(seq);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -2332,7 +2332,7 @@
             top = &stack_pointer[(oparg & 0xFF) + (oparg >> 8)];
             PyObject *seq_o = PyStackRef_AsPyObjectSteal(seq);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             int res = _PyEval_UnpackIterableStackRef(tstate, seq_o, oparg & 0xFF, oparg >> 8, top);
             Py_DECREF(seq_o);
@@ -2341,7 +2341,7 @@
                 JUMP_TO_ERROR();
             }
             stack_pointer += 1 + (oparg & 0xFF) + (oparg >> 8);
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2365,7 +2365,7 @@
             PyStackRef_CLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (err) {
                 JUMP_TO_ERROR();
             }
@@ -2381,7 +2381,7 @@
             int err = PyObject_DelAttr(PyStackRef_AsPyObjectBorrow(owner), name);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(owner);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -2400,7 +2400,7 @@
             int err = PyDict_SetItem(GLOBALS(), name, PyStackRef_AsPyObjectBorrow(v));
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(v);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -2442,7 +2442,7 @@
             locals = PyStackRef_FromPyObjectNew(l);
             stack_pointer[0] = locals;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2461,7 +2461,7 @@
             v = PyStackRef_FromPyObjectSteal(v_o);
             stack_pointer[0] = v;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2477,7 +2477,7 @@
                 JUMP_TO_ERROR();
             }
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2489,7 +2489,7 @@
                 null[0] = PyStackRef_NULL;
             }
             stack_pointer += (oparg & 1);
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2543,7 +2543,7 @@
             STAT_INC(LOAD_GLOBAL, hit);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2580,7 +2580,7 @@
             STAT_INC(LOAD_GLOBAL, hit);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2663,14 +2663,14 @@
                 }
             }
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(class_dict_st);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             value = PyStackRef_FromPyObjectSteal(value_o);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2684,7 +2684,7 @@
             if (PyStackRef_IsNull(value)) {
                 stack_pointer[0] = value;
                 stack_pointer += 1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 _PyEval_FormatExcUnbound(tstate, _PyFrame_GetCode(frame), oparg);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -2692,7 +2692,7 @@
             }
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2705,7 +2705,7 @@
             PyCell_SetTakeRef(cell, PyStackRef_AsPyObjectSteal(v));
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2729,39 +2729,18 @@
             _PyStackRef str;
             oparg = CURRENT_OPARG();
             pieces = &stack_pointer[-oparg];
-            STACKREFS_TO_PYOBJECTS(pieces, oparg, pieces_o);
-            if (CONVERSION_FAILED(pieces_o)) {
-                _PyFrame_SetStackPointer(frame, stack_pointer);
-                _PyStackRef tmp;
-                for (int _i = oparg; --_i >= 0;) {
-                    tmp = pieces[_i];
-                    pieces[_i] = PyStackRef_NULL;
-                    PyStackRef_CLOSE(tmp);
-                }
-                stack_pointer = _PyFrame_GetStackPointer(frame);
-                stack_pointer += -oparg;
-                assert(WITHIN_STACK_BOUNDS());
-                JUMP_TO_ERROR();
-            }
-            PyObject *str_o = _PyUnicode_JoinArray(&_Py_STR(empty), pieces_o, oparg);
-            STACKREFS_TO_PYOBJECTS_CLEANUP(pieces_o);
             _PyFrame_SetStackPointer(frame, stack_pointer);
-            _PyStackRef tmp;
-            for (int _i = oparg; --_i >= 0;) {
-                tmp = pieces[_i];
-                pieces[_i] = PyStackRef_NULL;
-                PyStackRef_CLOSE(tmp);
-            }
+            PyObject *str_o = _Py_BuildString_StackRefSteal(pieces, oparg);
             stack_pointer = _PyFrame_GetStackPointer(frame);
-            stack_pointer += -oparg;
-            assert(WITHIN_STACK_BOUNDS());
             if (str_o == NULL) {
+                stack_pointer += -oparg;
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 JUMP_TO_ERROR();
             }
             str = PyStackRef_FromPyObjectSteal(str_o);
-            stack_pointer[0] = str;
-            stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            stack_pointer[-oparg] = str;
+            stack_pointer += 1 - oparg;
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2789,7 +2768,7 @@
             stack_pointer = _PyFrame_GetStackPointer(frame);
             if (oparg & 1) {
                 stack_pointer += -(oparg & 1);
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(format[0]);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -2798,12 +2777,12 @@
                 stack_pointer += -(oparg & 1);
             }
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(str);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(value);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -2813,7 +2792,7 @@
             interpolation = PyStackRef_FromPyObjectSteal(interpolation_o);
             stack_pointer[0] = interpolation;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2829,12 +2808,12 @@
             PyObject *template_o = _PyTemplate_Build(strings_o, interpolations_o);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(interpolations);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(strings);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -2844,7 +2823,7 @@
             template = PyStackRef_FromPyObjectSteal(template_o);
             stack_pointer[0] = template;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2860,7 +2839,7 @@
             tup = PyStackRef_FromPyObjectStealMortal(tup_o);
             stack_pointer[-oparg] = tup;
             stack_pointer += 1 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2878,7 +2857,7 @@
             list = PyStackRef_FromPyObjectStealMortal(list_o);
             stack_pointer[-oparg] = list;
             stack_pointer += 1 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2908,7 +2887,7 @@
                     stack_pointer = _PyFrame_GetStackPointer(frame);
                 }
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(iterable_st);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -2916,7 +2895,7 @@
             }
             assert(Py_IsNone(none_val));
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(iterable_st);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -2934,7 +2913,7 @@
                                     PyStackRef_AsPyObjectBorrow(iterable));
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(iterable);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -2962,7 +2941,7 @@
                 }
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -oparg;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 JUMP_TO_ERROR();
             }
             int err = 0;
@@ -2982,7 +2961,7 @@
             }
             if (err) {
                 stack_pointer += -oparg;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 Py_DECREF(set_o);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -2991,7 +2970,7 @@
             set = PyStackRef_FromPyObjectStealMortal(set_o);
             stack_pointer[-oparg] = set;
             stack_pointer += 1 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -3000,44 +2979,18 @@
             _PyStackRef map;
             oparg = CURRENT_OPARG();
             values = &stack_pointer[-oparg*2];
-            STACKREFS_TO_PYOBJECTS(values, oparg*2, values_o);
-            if (CONVERSION_FAILED(values_o)) {
-                _PyFrame_SetStackPointer(frame, stack_pointer);
-                _PyStackRef tmp;
-                for (int _i = oparg*2; --_i >= 0;) {
-                    tmp = values[_i];
-                    values[_i] = PyStackRef_NULL;
-                    PyStackRef_CLOSE(tmp);
-                }
-                stack_pointer = _PyFrame_GetStackPointer(frame);
-                stack_pointer += -oparg*2;
-                assert(WITHIN_STACK_BOUNDS());
-                JUMP_TO_ERROR();
-            }
             _PyFrame_SetStackPointer(frame, stack_pointer);
-            PyObject *map_o = _PyDict_FromItems(
-                values_o, 2,
-                values_o+1, 2,
-                oparg);
+            PyObject *map_o = _Py_BuildMap_StackRefSteal(values, oparg);
             stack_pointer = _PyFrame_GetStackPointer(frame);
-            STACKREFS_TO_PYOBJECTS_CLEANUP(values_o);
-            _PyFrame_SetStackPointer(frame, stack_pointer);
-            _PyStackRef tmp;
-            for (int _i = oparg*2; --_i >= 0;) {
-                tmp = values[_i];
-                values[_i] = PyStackRef_NULL;
-                PyStackRef_CLOSE(tmp);
-            }
-            stack_pointer = _PyFrame_GetStackPointer(frame);
-            stack_pointer += -oparg*2;
-            assert(WITHIN_STACK_BOUNDS());
             if (map_o == NULL) {
+                stack_pointer += -oparg*2;
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 JUMP_TO_ERROR();
             }
             map = PyStackRef_FromPyObjectStealMortal(map_o);
-            stack_pointer[0] = map;
-            stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            stack_pointer[-oparg*2] = map;
+            stack_pointer += 1 - oparg*2;
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -3103,14 +3056,14 @@
                     stack_pointer = _PyFrame_GetStackPointer(frame);
                 }
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(update);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 JUMP_TO_ERROR();
             }
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(update);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -3136,14 +3089,14 @@
                 _PyEval_FormatKwargsError(tstate, callable_o, update_o);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(update);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 JUMP_TO_ERROR();
             }
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(update);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -3169,11 +3122,11 @@
             stack_pointer = _PyFrame_GetStackPointer(frame);
             if (err != 0) {
                 stack_pointer += -2;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 JUMP_TO_ERROR();
             }
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -3216,14 +3169,14 @@
             PyStackRef_CLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -3;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (attr == NULL) {
                 JUMP_TO_ERROR();
             }
             attr_st = PyStackRef_FromPyObjectSteal(attr);
             stack_pointer[0] = attr_st;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -3264,7 +3217,7 @@
                 self_or_null = self_st;
             } else {
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(self_st);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -3272,7 +3225,7 @@
                 stack_pointer += 1;
             }
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             _PyStackRef tmp = global_super_st;
             global_super_st = self_or_null;
@@ -3284,12 +3237,12 @@
             PyStackRef_CLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             attr = PyStackRef_FromPyObjectSteal(attr_o);
             stack_pointer[0] = attr;
             stack_pointer[1] = self_or_null;
             stack_pointer += 2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -3313,7 +3266,7 @@
                 }
                 else {
                     stack_pointer += -1;
-                    assert(WITHIN_STACK_BOUNDS());
+                    ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                     _PyFrame_SetStackPointer(frame, stack_pointer);
                     PyStackRef_CLOSE(owner);
                     stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -3329,7 +3282,7 @@
                 PyObject *attr_o = PyObject_GetAttr(PyStackRef_AsPyObjectBorrow(owner), name);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(owner);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -3340,7 +3293,7 @@
                 stack_pointer += 1;
             }
             stack_pointer += (oparg&1);
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -3672,7 +3625,7 @@
             }
             UNLOCK_OBJECT(owner_o);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(owner);
             Py_XDECREF(old_value);
@@ -3731,7 +3684,7 @@
             UNLOCK_OBJECT(dict);
             STAT_INC(STORE_ATTR, hit);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(owner);
             Py_XDECREF(old_value);
@@ -3756,7 +3709,7 @@
             FT_ATOMIC_STORE_PTR_RELEASE(*(PyObject **)addr, PyStackRef_AsPyObjectSteal(value));
             UNLOCK_OBJECT(owner_o);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(owner);
             Py_XDECREF(old_value);
@@ -3786,7 +3739,7 @@
             PyStackRef_CLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (res_o == NULL) {
                 JUMP_TO_ERROR();
             }
@@ -3805,7 +3758,7 @@
             }
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -3827,7 +3780,7 @@
             res = (sign_ish & oparg) ? PyStackRef_True : PyStackRef_False;
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -3853,7 +3806,7 @@
             res =  (sign_ish & oparg) ? PyStackRef_True : PyStackRef_False;
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -3877,7 +3830,7 @@
             res = ((COMPARISON_NOT_EQUALS + eq) & oparg) ? PyStackRef_True : PyStackRef_False;
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -3900,11 +3853,11 @@
             PyStackRef_CLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             b = res ? PyStackRef_True : PyStackRef_False;
             stack_pointer[0] = b;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -3929,14 +3882,14 @@
             PyStackRef_CLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (res < 0) {
                 JUMP_TO_ERROR();
             }
             b = (res ^ oparg) ? PyStackRef_True : PyStackRef_False;
             stack_pointer[0] = b;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -3974,14 +3927,14 @@
             PyStackRef_CLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (res < 0) {
                 JUMP_TO_ERROR();
             }
             b = (res ^ oparg) ? PyStackRef_True : PyStackRef_False;
             stack_pointer[0] = b;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -4008,14 +3961,14 @@
             PyStackRef_CLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (res < 0) {
                 JUMP_TO_ERROR();
             }
             b = (res ^ oparg) ? PyStackRef_True : PyStackRef_False;
             stack_pointer[0] = b;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -4043,7 +3996,7 @@
                 PyStackRef_CLOSE(tmp);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -2;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 JUMP_TO_ERROR();
             }
             PyObject *match_o = NULL;
@@ -4061,7 +4014,7 @@
             PyStackRef_CLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (res < 0) {
                 JUMP_TO_ERROR();
             }
@@ -4079,7 +4032,7 @@
             stack_pointer[0] = rest;
             stack_pointer[1] = match;
             stack_pointer += 2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -4102,14 +4055,14 @@
             int res = PyErr_GivenExceptionMatches(left_o, right_o);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(right);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             b = res ? PyStackRef_True : PyStackRef_False;
             stack_pointer[0] = b;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -4135,14 +4088,14 @@
             PyStackRef_CLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (res_o == NULL) {
                 JUMP_TO_ERROR();
             }
             res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -4161,7 +4114,7 @@
             res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -4208,7 +4161,7 @@
             len = PyStackRef_FromPyObjectSteal(len_o);
             stack_pointer[0] = len;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -4241,7 +4194,7 @@
             PyStackRef_CLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -3;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (attrs_o) {
                 assert(PyTuple_CheckExact(attrs_o));
                 attrs = PyStackRef_FromPyObjectSteal(attrs_o);
@@ -4254,7 +4207,7 @@
             }
             stack_pointer[0] = attrs;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -4266,7 +4219,7 @@
             res = match ? PyStackRef_True : PyStackRef_False;
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -4278,7 +4231,7 @@
             res = match ? PyStackRef_True : PyStackRef_False;
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -4298,7 +4251,7 @@
             values_or_none = PyStackRef_FromPyObjectSteal(values_or_none_o);
             stack_pointer[0] = values_or_none;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -4323,7 +4276,7 @@
                 PyObject *iter_o = PyObject_GetIter(PyStackRef_AsPyObjectBorrow(iterable));
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(iterable);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -4337,7 +4290,7 @@
             stack_pointer[-1] = iter;
             stack_pointer[0] = index_or_null;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -4403,7 +4356,7 @@
             stack_pointer[-1] = null_or_index;
             stack_pointer[0] = next;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -4476,7 +4429,7 @@
             stack_pointer[-1] = null_or_index;
             stack_pointer[0] = next;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -4525,7 +4478,7 @@
             stack_pointer[-1] = null_or_index;
             stack_pointer[0] = next;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -4580,7 +4533,7 @@
             next = PyStackRef_FromPyObjectSteal(res);
             stack_pointer[0] = next;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -4616,7 +4569,7 @@
             gen_frame = PyStackRef_Wrap(pushed_frame);
             stack_pointer[0] = gen_frame;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -4628,7 +4581,7 @@
             method_and_self[1] = self;
             method_and_self[0] = PyStackRef_NULL;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -4693,7 +4646,7 @@
             res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -4715,7 +4668,7 @@
             stack_pointer[-1] = prev_exc;
             stack_pointer[0] = new_exc;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -4762,7 +4715,7 @@
             stack_pointer[-1] = attr;
             stack_pointer[0] = self;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -4783,7 +4736,7 @@
             stack_pointer[-1] = attr;
             stack_pointer[0] = self;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -4797,14 +4750,14 @@
             STAT_INC(LOAD_ATTR, hit);
             assert(descr != NULL);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(owner);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             attr = PyStackRef_FromPyObjectNew(descr);
             stack_pointer[0] = attr;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -4819,14 +4772,14 @@
             STAT_INC(LOAD_ATTR, hit);
             assert(descr != NULL);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(owner);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             attr = PyStackRef_FromPyObjectNew(descr);
             stack_pointer[0] = attr;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -4859,7 +4812,7 @@
             stack_pointer[-1] = attr;
             stack_pointer[0] = self;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -4916,14 +4869,14 @@
             );
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -2 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (temp == NULL) {
                 JUMP_TO_ERROR();
             }
             new_frame = PyStackRef_Wrap(temp);
             stack_pointer[0] = new_frame;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -5034,68 +4987,28 @@
             #if TIER_ONE
             assert(opcode != INSTRUMENTED_CALL);
             #endif
-            PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
             int total_args = oparg;
             _PyStackRef *arguments = args;
             if (!PyStackRef_IsNull(self_or_null)) {
                 arguments--;
                 total_args++;
             }
-            STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
-            if (CONVERSION_FAILED(args_o)) {
-                _PyFrame_SetStackPointer(frame, stack_pointer);
-                _PyStackRef tmp;
-                for (int _i = oparg; --_i >= 0;) {
-                    tmp = args[_i];
-                    args[_i] = PyStackRef_NULL;
-                    PyStackRef_CLOSE(tmp);
-                }
-                tmp = self_or_null;
-                self_or_null = PyStackRef_NULL;
-                stack_pointer[-1 - oparg] = self_or_null;
-                PyStackRef_XCLOSE(tmp);
-                tmp = callable;
-                callable = PyStackRef_NULL;
-                stack_pointer[-2 - oparg] = callable;
-                PyStackRef_CLOSE(tmp);
-                stack_pointer = _PyFrame_GetStackPointer(frame);
-                stack_pointer += -2 - oparg;
-                assert(WITHIN_STACK_BOUNDS());
-                JUMP_TO_ERROR();
-            }
             _PyFrame_SetStackPointer(frame, stack_pointer);
-            PyObject *res_o = PyObject_Vectorcall(
-                callable_o, args_o,
-                total_args | PY_VECTORCALL_ARGUMENTS_OFFSET,
-                NULL);
+            PyObject *res_o = _Py_VectorCall_StackRefSteal(
+                callable,
+                arguments,
+                total_args,
+                PyStackRef_NULL);
             stack_pointer = _PyFrame_GetStackPointer(frame);
-            STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
-            assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
-            _PyFrame_SetStackPointer(frame, stack_pointer);
-            _PyStackRef tmp;
-            for (int _i = oparg; --_i >= 0;) {
-                tmp = args[_i];
-                args[_i] = PyStackRef_NULL;
-                PyStackRef_CLOSE(tmp);
-            }
-            tmp = self_or_null;
-            self_or_null = PyStackRef_NULL;
-            stack_pointer[-1 - oparg] = self_or_null;
-            PyStackRef_XCLOSE(tmp);
-            tmp = callable;
-            callable = PyStackRef_NULL;
-            stack_pointer[-2 - oparg] = callable;
-            PyStackRef_CLOSE(tmp);
-            stack_pointer = _PyFrame_GetStackPointer(frame);
-            stack_pointer += -2 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
             if (res_o == NULL) {
+                stack_pointer += -2 - oparg;
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 JUMP_TO_ERROR();
             }
             res = PyStackRef_FromPyObjectSteal(res_o);
-            stack_pointer[0] = res;
-            stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            stack_pointer[-2 - oparg] = res;
+            stack_pointer += -1 - oparg;
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -5204,7 +5117,7 @@
             new_frame = PyStackRef_Wrap(pushed_frame);
             stack_pointer[-2 - oparg] = new_frame;
             stack_pointer += -1 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -5229,7 +5142,7 @@
             new_frame = PyStackRef_Wrap(pushed_frame);
             stack_pointer[-2 - oparg] = new_frame;
             stack_pointer += -1 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -5254,7 +5167,7 @@
             new_frame = PyStackRef_Wrap(pushed_frame);
             stack_pointer[-2 - oparg] = new_frame;
             stack_pointer += -1 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -5279,7 +5192,7 @@
             new_frame = PyStackRef_Wrap(pushed_frame);
             stack_pointer[-2 - oparg] = new_frame;
             stack_pointer += -1 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -5304,7 +5217,7 @@
             new_frame = PyStackRef_Wrap(pushed_frame);
             stack_pointer[-2 - oparg] = new_frame;
             stack_pointer += -1 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -5328,7 +5241,7 @@
             new_frame = PyStackRef_Wrap(pushed_frame);
             stack_pointer[-2 - oparg] = new_frame;
             stack_pointer += -1 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -5338,7 +5251,7 @@
             assert(tstate->interp->eval_frame == NULL);
             _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             assert(temp->previous == frame || temp->previous->previous == frame);
             CALL_STAT_INC(inlined_py_calls);
@@ -5409,7 +5322,7 @@
             res = PyStackRef_FromPyObjectNew(Py_TYPE(arg_o));
             stack_pointer[-3] = res;
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(arg);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -5445,7 +5358,7 @@
             (void)callable;
             (void)null;
             stack_pointer += -3;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(arg);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -5455,7 +5368,7 @@
             res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -5488,7 +5401,7 @@
             (void)callable;
             (void)null;
             stack_pointer += -3;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(arg);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -5498,7 +5411,7 @@
             res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -5572,7 +5485,7 @@
                 tstate, init, NULL, args-1, oparg+1, NULL, shim);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -2 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (temp == NULL) {
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 _PyEval_FrameClearAndPop(tstate, shim);
@@ -5584,7 +5497,7 @@
             init_frame = PyStackRef_Wrap(temp);
             stack_pointer[0] = init_frame;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -5600,7 +5513,7 @@
                 JUMP_TO_ERROR();
             }
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -5630,57 +5543,21 @@
                 JUMP_TO_JUMP_TARGET();
             }
             STAT_INC(CALL, hit);
-            STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
-            if (CONVERSION_FAILED(args_o)) {
-                _PyFrame_SetStackPointer(frame, stack_pointer);
-                _PyStackRef tmp;
-                for (int _i = oparg; --_i >= 0;) {
-                    tmp = args[_i];
-                    args[_i] = PyStackRef_NULL;
-                    PyStackRef_CLOSE(tmp);
-                }
-                tmp = self_or_null;
-                self_or_null = PyStackRef_NULL;
-                stack_pointer[-1 - oparg] = self_or_null;
-                PyStackRef_XCLOSE(tmp);
-                tmp = callable;
-                callable = PyStackRef_NULL;
-                stack_pointer[-2 - oparg] = callable;
-                PyStackRef_CLOSE(tmp);
-                stack_pointer = _PyFrame_GetStackPointer(frame);
-                stack_pointer += -2 - oparg;
-                assert(WITHIN_STACK_BOUNDS());
-                JUMP_TO_ERROR();
-            }
             _PyFrame_SetStackPointer(frame, stack_pointer);
-            PyObject *res_o = tp->tp_vectorcall((PyObject *)tp, args_o, total_args, NULL);
+            PyObject *res_o = _Py_CallBuiltinClass_StackRefSteal(
+                callable,
+                arguments,
+                total_args);
             stack_pointer = _PyFrame_GetStackPointer(frame);
-            STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
-            _PyFrame_SetStackPointer(frame, stack_pointer);
-            _PyStackRef tmp;
-            for (int _i = oparg; --_i >= 0;) {
-                tmp = args[_i];
-                args[_i] = PyStackRef_NULL;
-                PyStackRef_CLOSE(tmp);
-            }
-            tmp = self_or_null;
-            self_or_null = PyStackRef_NULL;
-            stack_pointer[-1 - oparg] = self_or_null;
-            PyStackRef_XCLOSE(tmp);
-            tmp = callable;
-            callable = PyStackRef_NULL;
-            stack_pointer[-2 - oparg] = callable;
-            PyStackRef_CLOSE(tmp);
-            stack_pointer = _PyFrame_GetStackPointer(frame);
-            stack_pointer += -2 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
             if (res_o == NULL) {
+                stack_pointer += -2 - oparg;
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 JUMP_TO_ERROR();
             }
             res = PyStackRef_FromPyObjectSteal(res_o);
-            stack_pointer[0] = res;
-            stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            stack_pointer[-2 - oparg] = res;
+            stack_pointer += -1 - oparg;
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -5727,7 +5604,7 @@
             PyStackRef_CLOSE(arg);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -2 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(callable);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -5737,7 +5614,7 @@
             res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -5750,13 +5627,13 @@
             args = &stack_pointer[-oparg];
             self_or_null = stack_pointer[-1 - oparg];
             callable = stack_pointer[-2 - oparg];
-            PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
             int total_args = oparg;
             _PyStackRef *arguments = args;
             if (!PyStackRef_IsNull(self_or_null)) {
                 arguments--;
                 total_args++;
             }
+            PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
             if (!PyCFunction_CheckExact(callable_o)) {
                 UOP_STAT_INC(uopcode, miss);
                 JUMP_TO_JUMP_TARGET();
@@ -5766,62 +5643,22 @@
                 JUMP_TO_JUMP_TARGET();
             }
             STAT_INC(CALL, hit);
-            PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable_o);
-            STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
-            if (CONVERSION_FAILED(args_o)) {
-                _PyFrame_SetStackPointer(frame, stack_pointer);
-                _PyStackRef tmp;
-                for (int _i = oparg; --_i >= 0;) {
-                    tmp = args[_i];
-                    args[_i] = PyStackRef_NULL;
-                    PyStackRef_CLOSE(tmp);
-                }
-                tmp = self_or_null;
-                self_or_null = PyStackRef_NULL;
-                stack_pointer[-1 - oparg] = self_or_null;
-                PyStackRef_XCLOSE(tmp);
-                tmp = callable;
-                callable = PyStackRef_NULL;
-                stack_pointer[-2 - oparg] = callable;
-                PyStackRef_CLOSE(tmp);
-                stack_pointer = _PyFrame_GetStackPointer(frame);
-                stack_pointer += -2 - oparg;
-                assert(WITHIN_STACK_BOUNDS());
-                JUMP_TO_ERROR();
-            }
             _PyFrame_SetStackPointer(frame, stack_pointer);
-            PyObject *res_o = _PyCFunctionFast_CAST(cfunc)(
-                PyCFunction_GET_SELF(callable_o),
-                args_o,
-                total_args);
+            PyObject *res_o = _Py_BuiltinCallFast_StackRefSteal(
+                callable,
+                arguments,
+                total_args
+            );
             stack_pointer = _PyFrame_GetStackPointer(frame);
-            STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
-            assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
-            _PyFrame_SetStackPointer(frame, stack_pointer);
-            _PyStackRef tmp;
-            for (int _i = oparg; --_i >= 0;) {
-                tmp = args[_i];
-                args[_i] = PyStackRef_NULL;
-                PyStackRef_CLOSE(tmp);
-            }
-            tmp = self_or_null;
-            self_or_null = PyStackRef_NULL;
-            stack_pointer[-1 - oparg] = self_or_null;
-            PyStackRef_XCLOSE(tmp);
-            tmp = callable;
-            callable = PyStackRef_NULL;
-            stack_pointer[-2 - oparg] = callable;
-            PyStackRef_CLOSE(tmp);
-            stack_pointer = _PyFrame_GetStackPointer(frame);
-            stack_pointer += -2 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
             if (res_o == NULL) {
+                stack_pointer += -2 - oparg;
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 JUMP_TO_ERROR();
             }
             res = PyStackRef_FromPyObjectSteal(res_o);
-            stack_pointer[0] = res;
-            stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            stack_pointer[-2 - oparg] = res;
+            stack_pointer += -1 - oparg;
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -5834,13 +5671,13 @@
             args = &stack_pointer[-oparg];
             self_or_null = stack_pointer[-1 - oparg];
             callable = stack_pointer[-2 - oparg];
-            PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
             int total_args = oparg;
             _PyStackRef *arguments = args;
             if (!PyStackRef_IsNull(self_or_null)) {
                 arguments--;
                 total_args++;
             }
+            PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
             if (!PyCFunction_CheckExact(callable_o)) {
                 UOP_STAT_INC(uopcode, miss);
                 JUMP_TO_JUMP_TARGET();
@@ -5851,61 +5688,17 @@
             }
             STAT_INC(CALL, hit);
             _PyFrame_SetStackPointer(frame, stack_pointer);
-            PyCFunctionFastWithKeywords cfunc =
-            _PyCFunctionFastWithKeywords_CAST(PyCFunction_GET_FUNCTION(callable_o));
+            PyObject *res_o = _Py_BuiltinCallFastWithKeywords_StackRefSteal(callable, arguments, total_args);
             stack_pointer = _PyFrame_GetStackPointer(frame);
-            STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
-            if (CONVERSION_FAILED(args_o)) {
-                _PyFrame_SetStackPointer(frame, stack_pointer);
-                _PyStackRef tmp;
-                for (int _i = oparg; --_i >= 0;) {
-                    tmp = args[_i];
-                    args[_i] = PyStackRef_NULL;
-                    PyStackRef_CLOSE(tmp);
-                }
-                tmp = self_or_null;
-                self_or_null = PyStackRef_NULL;
-                stack_pointer[-1 - oparg] = self_or_null;
-                PyStackRef_XCLOSE(tmp);
-                tmp = callable;
-                callable = PyStackRef_NULL;
-                stack_pointer[-2 - oparg] = callable;
-                PyStackRef_CLOSE(tmp);
-                stack_pointer = _PyFrame_GetStackPointer(frame);
-                stack_pointer += -2 - oparg;
-                assert(WITHIN_STACK_BOUNDS());
-                JUMP_TO_ERROR();
-            }
-            _PyFrame_SetStackPointer(frame, stack_pointer);
-            PyObject *res_o = cfunc(PyCFunction_GET_SELF(callable_o), args_o, total_args, NULL);
-            stack_pointer = _PyFrame_GetStackPointer(frame);
-            STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
-            assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
-            _PyFrame_SetStackPointer(frame, stack_pointer);
-            _PyStackRef tmp;
-            for (int _i = oparg; --_i >= 0;) {
-                tmp = args[_i];
-                args[_i] = PyStackRef_NULL;
-                PyStackRef_CLOSE(tmp);
-            }
-            tmp = self_or_null;
-            self_or_null = PyStackRef_NULL;
-            stack_pointer[-1 - oparg] = self_or_null;
-            PyStackRef_XCLOSE(tmp);
-            tmp = callable;
-            callable = PyStackRef_NULL;
-            stack_pointer[-2 - oparg] = callable;
-            PyStackRef_CLOSE(tmp);
-            stack_pointer = _PyFrame_GetStackPointer(frame);
-            stack_pointer += -2 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
             if (res_o == NULL) {
+                stack_pointer += -2 - oparg;
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 JUMP_TO_ERROR();
             }
             res = PyStackRef_FromPyObjectSteal(res_o);
-            stack_pointer[0] = res;
-            stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            stack_pointer[-2 - oparg] = res;
+            stack_pointer += -1 - oparg;
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -5944,19 +5737,19 @@
                 JUMP_TO_ERROR();
             }
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(arg);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(callable);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -5993,17 +5786,17 @@
             }
             (void)null;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(cls);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(instance);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(callable);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -6011,7 +5804,7 @@
             assert((!PyStackRef_IsNull(res)) ^ (_PyErr_Occurred(tstate) != NULL));
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -6045,12 +5838,12 @@
             int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg));
             UNLOCK_OBJECT(self_o);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(self);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(callable);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -6132,14 +5925,14 @@
             PyStackRef_CLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -2 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (res_o == NULL) {
                 JUMP_TO_ERROR();
             }
             res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -6181,61 +5974,24 @@
                 JUMP_TO_JUMP_TARGET();
             }
             STAT_INC(CALL, hit);
-            int nargs = total_args - 1;
-            STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
-            if (CONVERSION_FAILED(args_o)) {
-                _PyFrame_SetStackPointer(frame, stack_pointer);
-                _PyStackRef tmp;
-                for (int _i = oparg; --_i >= 0;) {
-                    tmp = args[_i];
-                    args[_i] = PyStackRef_NULL;
-                    PyStackRef_CLOSE(tmp);
-                }
-                tmp = self_or_null;
-                self_or_null = PyStackRef_NULL;
-                stack_pointer[-1 - oparg] = self_or_null;
-                PyStackRef_XCLOSE(tmp);
-                tmp = callable;
-                callable = PyStackRef_NULL;
-                stack_pointer[-2 - oparg] = callable;
-                PyStackRef_CLOSE(tmp);
-                stack_pointer = _PyFrame_GetStackPointer(frame);
-                stack_pointer += -2 - oparg;
-                assert(WITHIN_STACK_BOUNDS());
-                JUMP_TO_ERROR();
-            }
             _PyFrame_SetStackPointer(frame, stack_pointer);
-            PyCFunctionFastWithKeywords cfunc =
-            _PyCFunctionFastWithKeywords_CAST(meth->ml_meth);
-            PyObject *res_o = cfunc(self, (args_o + 1), nargs, NULL);
+            PyObject *res_o = _PyCallMethodDescriptorFastWithKeywords_StackRefSteal(
+                callable,
+                meth,
+                self,
+                arguments,
+                total_args
+            );
             stack_pointer = _PyFrame_GetStackPointer(frame);
-            STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
-            assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
-            _PyFrame_SetStackPointer(frame, stack_pointer);
-            _PyStackRef tmp;
-            for (int _i = oparg; --_i >= 0;) {
-                tmp = args[_i];
-                args[_i] = PyStackRef_NULL;
-                PyStackRef_CLOSE(tmp);
-            }
-            tmp = self_or_null;
-            self_or_null = PyStackRef_NULL;
-            stack_pointer[-1 - oparg] = self_or_null;
-            PyStackRef_XCLOSE(tmp);
-            tmp = callable;
-            callable = PyStackRef_NULL;
-            stack_pointer[-2 - oparg] = callable;
-            PyStackRef_CLOSE(tmp);
-            stack_pointer = _PyFrame_GetStackPointer(frame);
-            stack_pointer += -2 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
             if (res_o == NULL) {
+                stack_pointer += -2 - oparg;
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 JUMP_TO_ERROR();
             }
             res = PyStackRef_FromPyObjectSteal(res_o);
-            stack_pointer[0] = res;
-            stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            stack_pointer[-2 - oparg] = res;
+            stack_pointer += -1 - oparg;
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -6290,7 +6046,7 @@
             PyStackRef_CLOSE(self_stackref);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -2 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(callable);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -6300,7 +6056,7 @@
             res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -6341,60 +6097,24 @@
                 JUMP_TO_JUMP_TARGET();
             }
             STAT_INC(CALL, hit);
-            int nargs = total_args - 1;
-            STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
-            if (CONVERSION_FAILED(args_o)) {
-                _PyFrame_SetStackPointer(frame, stack_pointer);
-                _PyStackRef tmp;
-                for (int _i = oparg; --_i >= 0;) {
-                    tmp = args[_i];
-                    args[_i] = PyStackRef_NULL;
-                    PyStackRef_CLOSE(tmp);
-                }
-                tmp = self_or_null;
-                self_or_null = PyStackRef_NULL;
-                stack_pointer[-1 - oparg] = self_or_null;
-                PyStackRef_XCLOSE(tmp);
-                tmp = callable;
-                callable = PyStackRef_NULL;
-                stack_pointer[-2 - oparg] = callable;
-                PyStackRef_CLOSE(tmp);
-                stack_pointer = _PyFrame_GetStackPointer(frame);
-                stack_pointer += -2 - oparg;
-                assert(WITHIN_STACK_BOUNDS());
-                JUMP_TO_ERROR();
-            }
             _PyFrame_SetStackPointer(frame, stack_pointer);
-            PyCFunctionFast cfunc = _PyCFunctionFast_CAST(meth->ml_meth);
-            PyObject *res_o = cfunc(self, (args_o + 1), nargs);
+            PyObject *res_o = _PyCallMethodDescriptorFast_StackRefSteal(
+                callable,
+                meth,
+                self,
+                arguments,
+                total_args
+            );
             stack_pointer = _PyFrame_GetStackPointer(frame);
-            STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
-            assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
-            _PyFrame_SetStackPointer(frame, stack_pointer);
-            _PyStackRef tmp;
-            for (int _i = oparg; --_i >= 0;) {
-                tmp = args[_i];
-                args[_i] = PyStackRef_NULL;
-                PyStackRef_CLOSE(tmp);
-            }
-            tmp = self_or_null;
-            self_or_null = PyStackRef_NULL;
-            stack_pointer[-1 - oparg] = self_or_null;
-            PyStackRef_XCLOSE(tmp);
-            tmp = callable;
-            callable = PyStackRef_NULL;
-            stack_pointer[-2 - oparg] = callable;
-            PyStackRef_CLOSE(tmp);
-            stack_pointer = _PyFrame_GetStackPointer(frame);
-            stack_pointer += -2 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
             if (res_o == NULL) {
+                stack_pointer += -2 - oparg;
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 JUMP_TO_ERROR();
             }
             res = PyStackRef_FromPyObjectSteal(res_o);
-            stack_pointer[0] = res;
-            stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            stack_pointer[-2 - oparg] = res;
+            stack_pointer += -1 - oparg;
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -6456,19 +6176,19 @@
             );
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(kwnames);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -2 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (temp == NULL) {
                 JUMP_TO_ERROR();
             }
             new_frame = PyStackRef_Wrap(temp);
             stack_pointer[0] = new_frame;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -6569,78 +6289,28 @@
             #if TIER_ONE
             assert(opcode != INSTRUMENTED_CALL);
             #endif
-            PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
             int total_args = oparg;
             _PyStackRef *arguments = args;
             if (!PyStackRef_IsNull(self_or_null)) {
                 arguments--;
                 total_args++;
             }
-            STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
-            if (CONVERSION_FAILED(args_o)) {
-                _PyFrame_SetStackPointer(frame, stack_pointer);
-                _PyStackRef tmp = kwnames;
-                kwnames = PyStackRef_NULL;
-                stack_pointer[-1] = kwnames;
-                PyStackRef_CLOSE(tmp);
-                for (int _i = oparg; --_i >= 0;) {
-                    tmp = args[_i];
-                    args[_i] = PyStackRef_NULL;
-                    PyStackRef_CLOSE(tmp);
-                }
-                tmp = self_or_null;
-                self_or_null = PyStackRef_NULL;
-                stack_pointer[-2 - oparg] = self_or_null;
-                PyStackRef_XCLOSE(tmp);
-                tmp = callable;
-                callable = PyStackRef_NULL;
-                stack_pointer[-3 - oparg] = callable;
-                PyStackRef_CLOSE(tmp);
-                stack_pointer = _PyFrame_GetStackPointer(frame);
-                stack_pointer += -3 - oparg;
-                assert(WITHIN_STACK_BOUNDS());
-                JUMP_TO_ERROR();
-            }
-            PyObject *kwnames_o = PyStackRef_AsPyObjectBorrow(kwnames);
-            int positional_args = total_args - (int)PyTuple_GET_SIZE(kwnames_o);
             _PyFrame_SetStackPointer(frame, stack_pointer);
-            PyObject *res_o = PyObject_Vectorcall(
-                callable_o, args_o,
-                positional_args | PY_VECTORCALL_ARGUMENTS_OFFSET,
-                kwnames_o);
+            PyObject *res_o = _Py_VectorCall_StackRefSteal(
+                callable,
+                arguments,
+                total_args,
+                kwnames);
             stack_pointer = _PyFrame_GetStackPointer(frame);
-            stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
-            _PyFrame_SetStackPointer(frame, stack_pointer);
-            PyStackRef_CLOSE(kwnames);
-            stack_pointer = _PyFrame_GetStackPointer(frame);
-            STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
-            assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
-            _PyFrame_SetStackPointer(frame, stack_pointer);
-            _PyStackRef tmp;
-            for (int _i = oparg; --_i >= 0;) {
-                tmp = args[_i];
-                args[_i] = PyStackRef_NULL;
-                PyStackRef_CLOSE(tmp);
-            }
-            tmp = self_or_null;
-            self_or_null = PyStackRef_NULL;
-            stack_pointer[-1 - oparg] = self_or_null;
-            PyStackRef_XCLOSE(tmp);
-            tmp = callable;
-            callable = PyStackRef_NULL;
-            stack_pointer[-2 - oparg] = callable;
-            PyStackRef_CLOSE(tmp);
-            stack_pointer = _PyFrame_GetStackPointer(frame);
-            stack_pointer += -2 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
             if (res_o == NULL) {
+                stack_pointer += -3 - oparg;
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 JUMP_TO_ERROR();
             }
             res = PyStackRef_FromPyObjectSteal(res_o);
-            stack_pointer[0] = res;
-            stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            stack_pointer[-3 - oparg] = res;
+            stack_pointer += -2 - oparg;
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -6686,7 +6356,7 @@
             PyFunction_New(codeobj, GLOBALS());
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(codeobj_st);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -6698,7 +6368,7 @@
             func = PyStackRef_FromPyObjectSteal((PyObject *)func_obj);
             stack_pointer[0] = func;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -6720,7 +6390,7 @@
             *ptr = attr;
             stack_pointer[-2] = func_out;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -6752,7 +6422,7 @@
             LLTRACE_RESUME_FRAME();
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -6774,14 +6444,14 @@
             }
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (slice_o == NULL) {
                 JUMP_TO_ERROR();
             }
             slice = PyStackRef_FromPyObjectStealMortal(slice_o);
             stack_pointer[0] = slice;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -6797,7 +6467,7 @@
             PyObject *result_o = conv_fn(PyStackRef_AsPyObjectBorrow(value));
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(value);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -6807,7 +6477,7 @@
             result = PyStackRef_FromPyObjectSteal(result_o);
             stack_pointer[0] = result;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -6821,7 +6491,7 @@
                 PyObject *res_o = PyObject_Format(value_o, NULL);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(value);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -6836,7 +6506,7 @@
             }
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -6858,14 +6528,14 @@
             PyStackRef_CLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (res_o == NULL) {
                 JUMP_TO_ERROR();
             }
             res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -6876,7 +6546,7 @@
             top = PyStackRef_DUP(bottom);
             stack_pointer[0] = top;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -6887,7 +6557,7 @@
             top = PyStackRef_DUP(bottom);
             stack_pointer[0] = top;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -6898,7 +6568,7 @@
             top = PyStackRef_DUP(bottom);
             stack_pointer[0] = top;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -6910,7 +6580,7 @@
             top = PyStackRef_DUP(bottom);
             stack_pointer[0] = top;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -6942,7 +6612,7 @@
             PyStackRef_CLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -7009,7 +6679,7 @@
             flag = stack_pointer[-1];
             int is_true = PyStackRef_IsTrue(flag);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (!is_true) {
                 UOP_STAT_INC(uopcode, miss);
                 JUMP_TO_JUMP_TARGET();
@@ -7022,7 +6692,7 @@
             flag = stack_pointer[-1];
             int is_false = PyStackRef_IsFalse(flag);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (!is_false) {
                 UOP_STAT_INC(uopcode, miss);
                 JUMP_TO_JUMP_TARGET();
@@ -7036,7 +6706,7 @@
             int is_none = PyStackRef_IsNone(val);
             if (!is_none) {
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(val);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -7046,7 +6716,7 @@
                 }
             }
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -7055,7 +6725,7 @@
             val = stack_pointer[-1];
             int is_none = PyStackRef_IsNone(val);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(val);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -7162,7 +6832,7 @@
             value = PyStackRef_FromPyObjectNew(ptr);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -7172,14 +6842,14 @@
             pop = stack_pointer[-1];
             PyObject *ptr = (PyObject *)CURRENT_OPERAND0();
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(pop);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             value = PyStackRef_FromPyObjectNew(ptr);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -7189,7 +6859,7 @@
             value = PyStackRef_FromPyObjectBorrow(ptr);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -7200,7 +6870,7 @@
             callable = stack_pointer[-2];
             (void)null;
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(callable);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -7215,13 +6885,13 @@
             null = stack_pointer[-2];
             callable = stack_pointer[-3];
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(pop);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             (void)null;
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(callable);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -7238,18 +6908,18 @@
             null = stack_pointer[-3];
             callable = stack_pointer[-4];
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(pop2);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(pop1);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             (void)null;
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(callable);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -7262,14 +6932,14 @@
             pop = stack_pointer[-1];
             PyObject *ptr = (PyObject *)CURRENT_OPERAND0();
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(pop);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             value = PyStackRef_FromPyObjectBorrow(ptr);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -7281,19 +6951,19 @@
             pop1 = stack_pointer[-2];
             PyObject *ptr = (PyObject *)CURRENT_OPERAND0();
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(pop2);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(pop1);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             value = PyStackRef_FromPyObjectBorrow(ptr);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -7306,14 +6976,14 @@
             PyObject *ptr = (PyObject *)CURRENT_OPERAND0();
             (void)null;
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(callable);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             value = PyStackRef_FromPyObjectBorrow(ptr);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -7327,20 +6997,20 @@
             callable = stack_pointer[-3];
             PyObject *ptr = (PyObject *)CURRENT_OPERAND0();
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(pop);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             (void)null;
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(callable);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             value = PyStackRef_FromPyObjectBorrow(ptr);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -7356,25 +7026,25 @@
             callable = stack_pointer[-4];
             PyObject *ptr = (PyObject *)CURRENT_OPERAND0();
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(pop2);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(pop1);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             (void)null;
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(callable);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             value = PyStackRef_FromPyObjectBorrow(ptr);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -7389,7 +7059,7 @@
             stack_pointer[-1] = value;
             stack_pointer[0] = new;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -7404,7 +7074,7 @@
             stack_pointer[-1] = value;
             stack_pointer[0] = new;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
diff --git a/Python/fileutils.c b/Python/fileutils.c
index 93abd70a34d..0c1766b8804 100644
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -2118,7 +2118,6 @@ _Py_wrealpath(const wchar_t *path,
               wchar_t *resolved_path, size_t resolved_path_len)
 {
     char *cpath;
-    char cresolved_path[MAXPATHLEN];
     wchar_t *wresolved_path;
     char *res;
     size_t r;
@@ -2127,12 +2126,14 @@ _Py_wrealpath(const wchar_t *path,
         errno = EINVAL;
         return NULL;
     }
-    res = realpath(cpath, cresolved_path);
+    res = realpath(cpath, NULL);
     PyMem_RawFree(cpath);
     if (res == NULL)
         return NULL;
 
-    wresolved_path = Py_DecodeLocale(cresolved_path, &r);
+    wresolved_path = Py_DecodeLocale(res, &r);
+    free(res);
+
     if (wresolved_path == NULL) {
         errno = EINVAL;
         return NULL;
diff --git a/Python/gc.c b/Python/gc.c
index 064f9406e0a..d067a6144b0 100644
--- a/Python/gc.c
+++ b/Python/gc.c
@@ -483,11 +483,12 @@ validate_consistent_old_space(PyGC_Head *head)
 /* Set all gc_refs = ob_refcnt.  After this, gc_refs is > 0 and
  * PREV_MASK_COLLECTING bit is set for all objects in containers.
  */
-static void
+static Py_ssize_t
 update_refs(PyGC_Head *containers)
 {
     PyGC_Head *next;
     PyGC_Head *gc = GC_NEXT(containers);
+    Py_ssize_t candidates = 0;
 
     while (gc != containers) {
         next = GC_NEXT(gc);
@@ -519,7 +520,9 @@ update_refs(PyGC_Head *containers)
          */
         _PyObject_ASSERT(op, gc_get_refs(gc) != 0);
         gc = next;
+        candidates++;
     }
+    return candidates;
 }
 
 /* A traversal callback for subtract_refs. */
@@ -1240,7 +1243,7 @@ flag set but it does not clear it to skip unnecessary iteration. Before the
 flag is cleared (for example, by using 'clear_unreachable_mask' function or
 by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal
 list and we can not use most gc_list_* functions for it. */
-static inline void
+static inline Py_ssize_t
 deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
     validate_list(base, collecting_clear_unreachable_clear);
     /* Using ob_refcnt and gc_refs, calculate which objects in the
@@ -1248,7 +1251,7 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
      * refcount greater than 0 when all the references within the
      * set are taken into account).
      */
-    update_refs(base);  // gc_prev is used for gc_refs
+    Py_ssize_t candidates = update_refs(base);  // gc_prev is used for gc_refs
     subtract_refs(base);
 
     /* Leave everything reachable from outside base in base, and move
@@ -1289,6 +1292,7 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
     move_unreachable(base, unreachable);  // gc_prev is pointer again
     validate_list(base, collecting_clear_unreachable_clear);
     validate_list(unreachable, collecting_set_unreachable_set);
+    return candidates;
 }
 
 /* Handle objects that may have resurrected after a call to 'finalize_garbage', moving
@@ -1363,8 +1367,10 @@ gc_list_set_space(PyGC_Head *list, int space)
 static void
 add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats)
 {
+    gcstate->generation_stats[gen].duration += stats->duration;
     gcstate->generation_stats[gen].collected += stats->collected;
     gcstate->generation_stats[gen].uncollectable += stats->uncollectable;
+    gcstate->generation_stats[gen].candidates += stats->candidates;
     gcstate->generation_stats[gen].collections += 1;
 }
 
@@ -1387,7 +1393,6 @@ gc_collect_young(PyThreadState *tstate,
     validate_spaces(gcstate);
     gcstate->young.count = 0;
     gcstate->old[gcstate->visited_space].count++;
-    add_stats(gcstate, 0, stats);
     validate_spaces(gcstate);
 }
 
@@ -1662,6 +1667,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
         Py_ssize_t objects_marked = mark_at_start(tstate);
         GC_STAT_ADD(1, objects_transitively_reachable, objects_marked);
         gcstate->work_to_do -= objects_marked;
+        stats->candidates += objects_marked;
         validate_spaces(gcstate);
         return;
     }
@@ -1701,7 +1707,6 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
     assert(gc_list_is_empty(&increment));
     gcstate->work_to_do -= increment_size;
 
-    add_stats(gcstate, 1, stats);
     if (gc_list_is_empty(not_visited)) {
         completed_scavenge(gcstate);
     }
@@ -1736,7 +1741,6 @@ gc_collect_full(PyThreadState *tstate,
     completed_scavenge(gcstate);
     _PyGC_ClearAllFreeLists(tstate->interp);
     validate_spaces(gcstate);
-    add_stats(gcstate, 2, stats);
 }
 
 /* This is the main function. Read this to understand how the
@@ -1756,7 +1760,7 @@ gc_collect_region(PyThreadState *tstate,
     assert(!_PyErr_Occurred(tstate));
 
     gc_list_init(&unreachable);
-    deduce_unreachable(from, &unreachable);
+    stats->candidates = deduce_unreachable(from, &unreachable);
     validate_consistent_old_space(from);
     untrack_tuples(from);
 
@@ -1846,10 +1850,12 @@ do_gc_callback(GCState *gcstate, const char *phase,
     assert(PyList_CheckExact(gcstate->callbacks));
     PyObject *info = NULL;
     if (PyList_GET_SIZE(gcstate->callbacks) != 0) {
-        info = Py_BuildValue("{sisnsn}",
+        info = Py_BuildValue("{sisnsnsnsd}",
             "generation", generation,
             "collected", stats->collected,
-            "uncollectable", stats->uncollectable);
+            "uncollectable", stats->uncollectable,
+            "candidates", stats->candidates,
+            "duration", stats->duration);
         if (info == NULL) {
             PyErr_FormatUnraisable("Exception ignored while invoking gc callbacks");
             return;
@@ -2080,15 +2086,15 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason)
     if (reason != _Py_GC_REASON_SHUTDOWN) {
         invoke_gc_callback(gcstate, "start", generation, &stats);
     }
-    PyTime_t t1;
     if (gcstate->debug & _PyGC_DEBUG_STATS) {
         PySys_WriteStderr("gc: collecting generation %d...\n", generation);
-        (void)PyTime_PerfCounterRaw(&t1);
         show_stats_each_generations(gcstate);
     }
     if (PyDTrace_GC_START_ENABLED()) {
         PyDTrace_GC_START(generation);
     }
+    PyTime_t start, stop;
+    (void)PyTime_PerfCounterRaw(&start);
     PyObject *exc = _PyErr_GetRaisedException(tstate);
     switch(generation) {
         case 0:
@@ -2103,6 +2109,9 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason)
         default:
             Py_UNREACHABLE();
     }
+    (void)PyTime_PerfCounterRaw(&stop);
+    stats.duration = PyTime_AsSecondsDouble(stop - start);
+    add_stats(gcstate, generation, &stats);
     if (PyDTrace_GC_DONE_ENABLED()) {
         PyDTrace_GC_DONE(stats.uncollectable + stats.collected);
     }
@@ -2124,12 +2133,9 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason)
     _Py_atomic_store_int(&gcstate->collecting, 0);
 
     if (gcstate->debug & _PyGC_DEBUG_STATS) {
-        PyTime_t t2;
-        (void)PyTime_PerfCounterRaw(&t2);
-        double d = PyTime_AsSecondsDouble(t2 - t1);
         PySys_WriteStderr(
             "gc: done, %zd unreachable, %zd uncollectable, %.4fs elapsed\n",
-            stats.collected + stats.uncollectable, stats.uncollectable, d
+            stats.collected + stats.uncollectable, stats.uncollectable, stats.duration
         );
     }
 
@@ -2237,7 +2243,7 @@ _PyGC_Fini(PyInterpreterState *interp)
 void
 _PyGC_Dump(PyGC_Head *g)
 {
-    _PyObject_Dump(FROM_GC(g));
+    PyUnstable_Object_Dump(FROM_GC(g));
 }
 
 
diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c
index 7724676c242..e672e870db2 100644
--- a/Python/gc_free_threading.c
+++ b/Python/gc_free_threading.c
@@ -100,6 +100,7 @@ struct collection_state {
     int skip_deferred_objects;
     Py_ssize_t collected;
     Py_ssize_t uncollectable;
+    Py_ssize_t candidates;
     Py_ssize_t long_lived_total;
     struct worklist unreachable;
     struct worklist legacy_finalizers;
@@ -975,15 +976,12 @@ static bool
 update_refs(const mi_heap_t *heap, const mi_heap_area_t *area,
             void *block, size_t block_size, void *args)
 {
+    struct collection_state *state = (struct collection_state *)args;
     PyObject *op = op_from_block(block, args, false);
     if (op == NULL) {
         return true;
     }
 
-    if (gc_is_alive(op)) {
-        return true;
-    }
-
     // Exclude immortal objects from garbage collection
     if (_Py_IsImmortal(op)) {
         op->ob_tid = 0;
@@ -991,6 +989,11 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t *area,
         gc_clear_unreachable(op);
         return true;
     }
+    // Marked objects count as candidates, immortals don't:
+    state->candidates++;
+    if (gc_is_alive(op)) {
+        return true;
+    }
 
     Py_ssize_t refcount = Py_REFCNT(op);
     if (_PyObject_HasDeferredRefcount(op)) {
@@ -1911,7 +1914,8 @@ handle_resurrected_objects(struct collection_state *state)
 static void
 invoke_gc_callback(PyThreadState *tstate, const char *phase,
                    int generation, Py_ssize_t collected,
-                   Py_ssize_t uncollectable)
+                   Py_ssize_t uncollectable, Py_ssize_t candidates,
+                   double duration)
 {
     assert(!_PyErr_Occurred(tstate));
 
@@ -1925,10 +1929,12 @@ invoke_gc_callback(PyThreadState *tstate, const char *phase,
     assert(PyList_CheckExact(gcstate->callbacks));
     PyObject *info = NULL;
     if (PyList_GET_SIZE(gcstate->callbacks) != 0) {
-        info = Py_BuildValue("{sisnsn}",
+        info = Py_BuildValue("{sisnsnsnsd}",
             "generation", generation,
             "collected", collected,
-            "uncollectable", uncollectable);
+            "uncollectable", uncollectable,
+            "candidates", candidates,
+            "duration", duration);
         if (info == NULL) {
             PyErr_FormatUnraisable("Exception ignored while "
                                    "invoking gc callbacks");
@@ -2204,7 +2210,19 @@ record_deallocation(PyThreadState *tstate)
     gc->alloc_count--;
     if (gc->alloc_count <= -LOCAL_ALLOC_COUNT_THRESHOLD) {
         GCState *gcstate = &tstate->interp->gc;
-        _Py_atomic_add_int(&gcstate->young.count, (int)gc->alloc_count);
+        int count = _Py_atomic_load_int_relaxed(&gcstate->young.count);
+        int new_count;
+        do {
+            if (count == 0) {
+                break;
+            }
+            new_count = count + (int)gc->alloc_count;
+            if (new_count < 0) {
+                new_count = 0;
+            }
+        } while (!_Py_atomic_compare_exchange_int(&gcstate->young.count,
+                                                  &count,
+                                                  new_count));
         gc->alloc_count = 0;
     }
 }
@@ -2340,7 +2358,6 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
 {
     Py_ssize_t m = 0; /* # objects collected */
     Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */
-    PyTime_t t1 = 0;   /* initialize to prevent a compiler warning */
     GCState *gcstate = &tstate->interp->gc;
 
     // gc_collect_main() must not be called before _PyGC_Init
@@ -2372,19 +2389,19 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
     GC_STAT_ADD(generation, collections, 1);
 
     if (reason != _Py_GC_REASON_SHUTDOWN) {
-        invoke_gc_callback(tstate, "start", generation, 0, 0);
+        invoke_gc_callback(tstate, "start", generation, 0, 0, 0, 0.0);
     }
 
     if (gcstate->debug & _PyGC_DEBUG_STATS) {
         PySys_WriteStderr("gc: collecting generation %d...\n", generation);
         show_stats_each_generations(gcstate);
-        // ignore error: don't interrupt the GC if reading the clock fails
-        (void)PyTime_PerfCounterRaw(&t1);
     }
 
     if (PyDTrace_GC_START_ENABLED()) {
         PyDTrace_GC_START(generation);
     }
+    PyTime_t start, stop;
+    (void)PyTime_PerfCounterRaw(&start);
 
     PyInterpreterState *interp = tstate->interp;
 
@@ -2399,13 +2416,13 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
     m = state.collected;
     n = state.uncollectable;
 
+    (void)PyTime_PerfCounterRaw(&stop);
+    double duration = PyTime_AsSecondsDouble(stop - start);
+
     if (gcstate->debug & _PyGC_DEBUG_STATS) {
-        PyTime_t t2;
-        (void)PyTime_PerfCounterRaw(&t2);
-        double d = PyTime_AsSecondsDouble(t2 - t1);
         PySys_WriteStderr(
             "gc: done, %zd unreachable, %zd uncollectable, %.4fs elapsed\n",
-            n+m, n, d);
+            n+m, n, duration);
     }
 
     // Clear the current thread's free-list again.
@@ -2426,6 +2443,8 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
     stats->collections++;
     stats->collected += m;
     stats->uncollectable += n;
+    stats->duration += duration;
+    stats->candidates += state.candidates;
 
     GC_STAT_ADD(generation, objects_collected, m);
 #ifdef Py_STATS
@@ -2444,7 +2463,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
     }
 
     if (reason != _Py_GC_REASON_SHUTDOWN) {
-        invoke_gc_callback(tstate, "stop", generation, m, n);
+        invoke_gc_callback(tstate, "stop", generation, m, n, state.candidates, duration);
     }
 
     assert(!_PyErr_Occurred(tstate));
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index b83b7c528e9..68d73cccec4 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -76,7 +76,7 @@
                 PyStackRef_CLOSE(tmp);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             }
             DISPATCH();
         }
@@ -135,7 +135,7 @@
             }
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -195,7 +195,7 @@
             }
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -255,7 +255,7 @@
             }
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -314,12 +314,12 @@
                 PyStackRef_CLOSE(tmp);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -2;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 res = PyStackRef_FromPyObjectSteal(res_o);
             }
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -386,7 +386,7 @@
                 PyObject *temp = PyStackRef_AsPyObjectSteal(*target_local);
                 PyObject *right_o = PyStackRef_AsPyObjectSteal(right);
                 stack_pointer += -2;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyUnicode_Append(&temp, right_o);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -460,7 +460,7 @@
             }
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -520,7 +520,7 @@
             }
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -578,7 +578,7 @@
                 PyStackRef_CLOSE(tmp);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -2;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 if (rc <= 0) {
                     JUMP_TO_LABEL(error);
                 }
@@ -586,7 +586,7 @@
             }
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -661,7 +661,7 @@
                 assert(tstate->interp->eval_frame == NULL);
                 _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame);
                 stack_pointer += -2;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 assert(temp->previous == frame || temp->previous->previous == frame);
                 CALL_STAT_INC(inlined_py_calls);
@@ -759,7 +759,7 @@
                 PyStackRef_CLOSE(tmp);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             }
             DISPATCH();
         }
@@ -824,7 +824,7 @@
                 PyStackRef_CLOSE(tmp);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -2;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 if (res_o == NULL) {
                     JUMP_TO_LABEL(error);
                 }
@@ -832,7 +832,7 @@
             }
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -902,7 +902,7 @@
                 PyObject *res_o = (PyObject*)&_Py_SINGLETON(strings).ascii[c];
                 PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc);
                 stack_pointer += -2;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(str_st);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -910,7 +910,7 @@
             }
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -976,7 +976,7 @@
                 PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc);
                 res = PyStackRef_FromPyObjectNew(res_o);
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 _PyStackRef tmp = tuple_st;
                 tuple_st = res;
@@ -1041,7 +1041,7 @@
             }
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -1101,7 +1101,7 @@
             }
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -1138,7 +1138,7 @@
                 }
                 else {
                     stack_pointer += -2;
-                    assert(WITHIN_STACK_BOUNDS());
+                    ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                     _PyFrame_SetStackPointer(frame, stack_pointer);
                     res_o = PyObject_GetItem(PyStackRef_AsPyObjectBorrow(container), slice);
                     Py_DECREF(slice);
@@ -1146,7 +1146,7 @@
                     stack_pointer += 2;
                 }
                 stack_pointer += -3;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(container);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -1157,7 +1157,7 @@
             }
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -1191,7 +1191,7 @@
             stack_pointer = _PyFrame_GetStackPointer(frame);
             if (oparg & 1) {
                 stack_pointer += -(oparg & 1);
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(format[0]);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -1200,12 +1200,12 @@
                 stack_pointer += -(oparg & 1);
             }
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(str);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(value);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -1215,7 +1215,7 @@
             interpolation = PyStackRef_FromPyObjectSteal(interpolation_o);
             stack_pointer[0] = interpolation;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -1239,7 +1239,7 @@
             list = PyStackRef_FromPyObjectStealMortal(list_o);
             stack_pointer[-oparg] = list;
             stack_pointer += 1 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -1254,44 +1254,18 @@
             _PyStackRef *values;
             _PyStackRef map;
             values = &stack_pointer[-oparg*2];
-            STACKREFS_TO_PYOBJECTS(values, oparg*2, values_o);
-            if (CONVERSION_FAILED(values_o)) {
-                _PyFrame_SetStackPointer(frame, stack_pointer);
-                _PyStackRef tmp;
-                for (int _i = oparg*2; --_i >= 0;) {
-                    tmp = values[_i];
-                    values[_i] = PyStackRef_NULL;
-                    PyStackRef_CLOSE(tmp);
-                }
-                stack_pointer = _PyFrame_GetStackPointer(frame);
-                stack_pointer += -oparg*2;
-                assert(WITHIN_STACK_BOUNDS());
-                JUMP_TO_LABEL(error);
-            }
             _PyFrame_SetStackPointer(frame, stack_pointer);
-            PyObject *map_o = _PyDict_FromItems(
-                values_o, 2,
-                values_o+1, 2,
-                oparg);
+            PyObject *map_o = _Py_BuildMap_StackRefSteal(values, oparg);
             stack_pointer = _PyFrame_GetStackPointer(frame);
-            STACKREFS_TO_PYOBJECTS_CLEANUP(values_o);
-            _PyFrame_SetStackPointer(frame, stack_pointer);
-            _PyStackRef tmp;
-            for (int _i = oparg*2; --_i >= 0;) {
-                tmp = values[_i];
-                values[_i] = PyStackRef_NULL;
-                PyStackRef_CLOSE(tmp);
-            }
-            stack_pointer = _PyFrame_GetStackPointer(frame);
-            stack_pointer += -oparg*2;
-            assert(WITHIN_STACK_BOUNDS());
             if (map_o == NULL) {
+                stack_pointer += -oparg*2;
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 JUMP_TO_LABEL(error);
             }
             map = PyStackRef_FromPyObjectStealMortal(map_o);
-            stack_pointer[0] = map;
-            stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            stack_pointer[-oparg*2] = map;
+            stack_pointer += 1 - oparg*2;
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -1319,7 +1293,7 @@
                 }
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -oparg;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 JUMP_TO_LABEL(error);
             }
             int err = 0;
@@ -1339,7 +1313,7 @@
             }
             if (err) {
                 stack_pointer += -oparg;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 Py_DECREF(set_o);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -1348,7 +1322,7 @@
             set = PyStackRef_FromPyObjectStealMortal(set_o);
             stack_pointer[-oparg] = set;
             stack_pointer += 1 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -1376,14 +1350,14 @@
             }
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (slice_o == NULL) {
                 JUMP_TO_LABEL(error);
             }
             slice = PyStackRef_FromPyObjectStealMortal(slice_o);
             stack_pointer[0] = slice;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -1398,39 +1372,18 @@
             _PyStackRef *pieces;
             _PyStackRef str;
             pieces = &stack_pointer[-oparg];
-            STACKREFS_TO_PYOBJECTS(pieces, oparg, pieces_o);
-            if (CONVERSION_FAILED(pieces_o)) {
-                _PyFrame_SetStackPointer(frame, stack_pointer);
-                _PyStackRef tmp;
-                for (int _i = oparg; --_i >= 0;) {
-                    tmp = pieces[_i];
-                    pieces[_i] = PyStackRef_NULL;
-                    PyStackRef_CLOSE(tmp);
-                }
-                stack_pointer = _PyFrame_GetStackPointer(frame);
-                stack_pointer += -oparg;
-                assert(WITHIN_STACK_BOUNDS());
-                JUMP_TO_LABEL(error);
-            }
-            PyObject *str_o = _PyUnicode_JoinArray(&_Py_STR(empty), pieces_o, oparg);
-            STACKREFS_TO_PYOBJECTS_CLEANUP(pieces_o);
             _PyFrame_SetStackPointer(frame, stack_pointer);
-            _PyStackRef tmp;
-            for (int _i = oparg; --_i >= 0;) {
-                tmp = pieces[_i];
-                pieces[_i] = PyStackRef_NULL;
-                PyStackRef_CLOSE(tmp);
-            }
+            PyObject *str_o = _Py_BuildString_StackRefSteal(pieces, oparg);
             stack_pointer = _PyFrame_GetStackPointer(frame);
-            stack_pointer += -oparg;
-            assert(WITHIN_STACK_BOUNDS());
             if (str_o == NULL) {
+                stack_pointer += -oparg;
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 JUMP_TO_LABEL(error);
             }
             str = PyStackRef_FromPyObjectSteal(str_o);
-            stack_pointer[0] = str;
-            stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            stack_pointer[-oparg] = str;
+            stack_pointer += 1 - oparg;
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -1453,12 +1406,12 @@
             PyObject *template_o = _PyTemplate_Build(strings_o, interpolations_o);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(interpolations);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(strings);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -1468,7 +1421,7 @@
             template = PyStackRef_FromPyObjectSteal(template_o);
             stack_pointer[0] = template;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -1490,7 +1443,7 @@
             tup = PyStackRef_FromPyObjectStealMortal(tup_o);
             stack_pointer[-oparg] = tup;
             stack_pointer += 1 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -1583,7 +1536,7 @@
                     );
                     stack_pointer = _PyFrame_GetStackPointer(frame);
                     stack_pointer += -2 - oparg;
-                    assert(WITHIN_STACK_BOUNDS());
+                    ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                     if (new_frame == NULL) {
                         JUMP_TO_LABEL(error);
                     }
@@ -1611,7 +1564,7 @@
                     PyStackRef_CLOSE(tmp);
                     stack_pointer = _PyFrame_GetStackPointer(frame);
                     stack_pointer += -2 - oparg;
-                    assert(WITHIN_STACK_BOUNDS());
+                    ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                     JUMP_TO_LABEL(error);
                 }
                 stack_pointer[-2 - oparg] = callable;
@@ -1664,7 +1617,7 @@
                 PyStackRef_CLOSE(tmp);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -2 - oparg;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 if (res_o == NULL) {
                     JUMP_TO_LABEL(error);
                 }
@@ -1674,7 +1627,7 @@
             {
                 stack_pointer[0] = res;
                 stack_pointer += 1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 int err = check_periodics(tstate);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -1778,7 +1731,7 @@
                     tstate, init, NULL, args-1, oparg+1, NULL, shim);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -2 - oparg;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 if (temp == NULL) {
                     _PyFrame_SetStackPointer(frame, stack_pointer);
                     _PyEval_FrameClearAndPop(tstate, shim);
@@ -1936,7 +1889,7 @@
                 assert(tstate->interp->eval_frame == NULL);
                 _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame);
                 stack_pointer += -2 - oparg;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 assert(temp->previous == frame || temp->previous->previous == frame);
                 CALL_STAT_INC(inlined_py_calls);
@@ -2046,7 +1999,7 @@
                 );
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -2 - oparg;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 if (temp == NULL) {
                     JUMP_TO_LABEL(error);
                 }
@@ -2118,60 +2071,24 @@
                     JUMP_TO_PREDICTED(CALL);
                 }
                 STAT_INC(CALL, hit);
-                STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
-                if (CONVERSION_FAILED(args_o)) {
-                    _PyFrame_SetStackPointer(frame, stack_pointer);
-                    _PyStackRef tmp;
-                    for (int _i = oparg; --_i >= 0;) {
-                        tmp = args[_i];
-                        args[_i] = PyStackRef_NULL;
-                        PyStackRef_CLOSE(tmp);
-                    }
-                    tmp = self_or_null;
-                    self_or_null = PyStackRef_NULL;
-                    stack_pointer[-1 - oparg] = self_or_null;
-                    PyStackRef_XCLOSE(tmp);
-                    tmp = callable;
-                    callable = PyStackRef_NULL;
-                    stack_pointer[-2 - oparg] = callable;
-                    PyStackRef_CLOSE(tmp);
-                    stack_pointer = _PyFrame_GetStackPointer(frame);
-                    stack_pointer += -2 - oparg;
-                    assert(WITHIN_STACK_BOUNDS());
-                    JUMP_TO_LABEL(error);
-                }
                 _PyFrame_SetStackPointer(frame, stack_pointer);
-                PyObject *res_o = tp->tp_vectorcall((PyObject *)tp, args_o, total_args, NULL);
+                PyObject *res_o = _Py_CallBuiltinClass_StackRefSteal(
+                    callable,
+                    arguments,
+                    total_args);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
-                STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
-                _PyFrame_SetStackPointer(frame, stack_pointer);
-                _PyStackRef tmp;
-                for (int _i = oparg; --_i >= 0;) {
-                    tmp = args[_i];
-                    args[_i] = PyStackRef_NULL;
-                    PyStackRef_CLOSE(tmp);
-                }
-                tmp = self_or_null;
-                self_or_null = PyStackRef_NULL;
-                stack_pointer[-1 - oparg] = self_or_null;
-                PyStackRef_XCLOSE(tmp);
-                tmp = callable;
-                callable = PyStackRef_NULL;
-                stack_pointer[-2 - oparg] = callable;
-                PyStackRef_CLOSE(tmp);
-                stack_pointer = _PyFrame_GetStackPointer(frame);
-                stack_pointer += -2 - oparg;
-                assert(WITHIN_STACK_BOUNDS());
                 if (res_o == NULL) {
+                    stack_pointer += -2 - oparg;
+                    ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                     JUMP_TO_LABEL(error);
                 }
                 res = PyStackRef_FromPyObjectSteal(res_o);
             }
             // _CHECK_PERIODIC_AT_END
             {
-                stack_pointer[0] = res;
-                stack_pointer += 1;
-                assert(WITHIN_STACK_BOUNDS());
+                stack_pointer[-2 - oparg] = res;
+                stack_pointer += -1 - oparg;
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 int err = check_periodics(tstate);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -2204,13 +2121,13 @@
                 args = &stack_pointer[-oparg];
                 self_or_null = stack_pointer[-1 - oparg];
                 callable = stack_pointer[-2 - oparg];
-                PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
                 int total_args = oparg;
                 _PyStackRef *arguments = args;
                 if (!PyStackRef_IsNull(self_or_null)) {
                     arguments--;
                     total_args++;
                 }
+                PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
                 if (!PyCFunction_CheckExact(callable_o)) {
                     UPDATE_MISS_STATS(CALL);
                     assert(_PyOpcode_Deopt[opcode] == (CALL));
@@ -2222,65 +2139,25 @@
                     JUMP_TO_PREDICTED(CALL);
                 }
                 STAT_INC(CALL, hit);
-                PyCFunction cfunc = PyCFunction_GET_FUNCTION(callable_o);
-                STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
-                if (CONVERSION_FAILED(args_o)) {
-                    _PyFrame_SetStackPointer(frame, stack_pointer);
-                    _PyStackRef tmp;
-                    for (int _i = oparg; --_i >= 0;) {
-                        tmp = args[_i];
-                        args[_i] = PyStackRef_NULL;
-                        PyStackRef_CLOSE(tmp);
-                    }
-                    tmp = self_or_null;
-                    self_or_null = PyStackRef_NULL;
-                    stack_pointer[-1 - oparg] = self_or_null;
-                    PyStackRef_XCLOSE(tmp);
-                    tmp = callable;
-                    callable = PyStackRef_NULL;
-                    stack_pointer[-2 - oparg] = callable;
-                    PyStackRef_CLOSE(tmp);
-                    stack_pointer = _PyFrame_GetStackPointer(frame);
-                    stack_pointer += -2 - oparg;
-                    assert(WITHIN_STACK_BOUNDS());
-                    JUMP_TO_LABEL(error);
-                }
                 _PyFrame_SetStackPointer(frame, stack_pointer);
-                PyObject *res_o = _PyCFunctionFast_CAST(cfunc)(
-                    PyCFunction_GET_SELF(callable_o),
-                    args_o,
-                    total_args);
+                PyObject *res_o = _Py_BuiltinCallFast_StackRefSteal(
+                    callable,
+                    arguments,
+                    total_args
+                );
                 stack_pointer = _PyFrame_GetStackPointer(frame);
-                STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
-                assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
-                _PyFrame_SetStackPointer(frame, stack_pointer);
-                _PyStackRef tmp;
-                for (int _i = oparg; --_i >= 0;) {
-                    tmp = args[_i];
-                    args[_i] = PyStackRef_NULL;
-                    PyStackRef_CLOSE(tmp);
-                }
-                tmp = self_or_null;
-                self_or_null = PyStackRef_NULL;
-                stack_pointer[-1 - oparg] = self_or_null;
-                PyStackRef_XCLOSE(tmp);
-                tmp = callable;
-                callable = PyStackRef_NULL;
-                stack_pointer[-2 - oparg] = callable;
-                PyStackRef_CLOSE(tmp);
-                stack_pointer = _PyFrame_GetStackPointer(frame);
-                stack_pointer += -2 - oparg;
-                assert(WITHIN_STACK_BOUNDS());
                 if (res_o == NULL) {
+                    stack_pointer += -2 - oparg;
+                    ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                     JUMP_TO_LABEL(error);
                 }
                 res = PyStackRef_FromPyObjectSteal(res_o);
             }
             // _CHECK_PERIODIC_AT_END
             {
-                stack_pointer[0] = res;
-                stack_pointer += 1;
-                assert(WITHIN_STACK_BOUNDS());
+                stack_pointer[-2 - oparg] = res;
+                stack_pointer += -1 - oparg;
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 int err = check_periodics(tstate);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -2313,13 +2190,13 @@
                 args = &stack_pointer[-oparg];
                 self_or_null = stack_pointer[-1 - oparg];
                 callable = stack_pointer[-2 - oparg];
-                PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
                 int total_args = oparg;
                 _PyStackRef *arguments = args;
                 if (!PyStackRef_IsNull(self_or_null)) {
                     arguments--;
                     total_args++;
                 }
+                PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
                 if (!PyCFunction_CheckExact(callable_o)) {
                     UPDATE_MISS_STATS(CALL);
                     assert(_PyOpcode_Deopt[opcode] == (CALL));
@@ -2332,64 +2209,20 @@
                 }
                 STAT_INC(CALL, hit);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
-                PyCFunctionFastWithKeywords cfunc =
-                _PyCFunctionFastWithKeywords_CAST(PyCFunction_GET_FUNCTION(callable_o));
+                PyObject *res_o = _Py_BuiltinCallFastWithKeywords_StackRefSteal(callable, arguments, total_args);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
-                STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
-                if (CONVERSION_FAILED(args_o)) {
-                    _PyFrame_SetStackPointer(frame, stack_pointer);
-                    _PyStackRef tmp;
-                    for (int _i = oparg; --_i >= 0;) {
-                        tmp = args[_i];
-                        args[_i] = PyStackRef_NULL;
-                        PyStackRef_CLOSE(tmp);
-                    }
-                    tmp = self_or_null;
-                    self_or_null = PyStackRef_NULL;
-                    stack_pointer[-1 - oparg] = self_or_null;
-                    PyStackRef_XCLOSE(tmp);
-                    tmp = callable;
-                    callable = PyStackRef_NULL;
-                    stack_pointer[-2 - oparg] = callable;
-                    PyStackRef_CLOSE(tmp);
-                    stack_pointer = _PyFrame_GetStackPointer(frame);
-                    stack_pointer += -2 - oparg;
-                    assert(WITHIN_STACK_BOUNDS());
-                    JUMP_TO_LABEL(error);
-                }
-                _PyFrame_SetStackPointer(frame, stack_pointer);
-                PyObject *res_o = cfunc(PyCFunction_GET_SELF(callable_o), args_o, total_args, NULL);
-                stack_pointer = _PyFrame_GetStackPointer(frame);
-                STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
-                assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
-                _PyFrame_SetStackPointer(frame, stack_pointer);
-                _PyStackRef tmp;
-                for (int _i = oparg; --_i >= 0;) {
-                    tmp = args[_i];
-                    args[_i] = PyStackRef_NULL;
-                    PyStackRef_CLOSE(tmp);
-                }
-                tmp = self_or_null;
-                self_or_null = PyStackRef_NULL;
-                stack_pointer[-1 - oparg] = self_or_null;
-                PyStackRef_XCLOSE(tmp);
-                tmp = callable;
-                callable = PyStackRef_NULL;
-                stack_pointer[-2 - oparg] = callable;
-                PyStackRef_CLOSE(tmp);
-                stack_pointer = _PyFrame_GetStackPointer(frame);
-                stack_pointer += -2 - oparg;
-                assert(WITHIN_STACK_BOUNDS());
                 if (res_o == NULL) {
+                    stack_pointer += -2 - oparg;
+                    ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                     JUMP_TO_LABEL(error);
                 }
                 res = PyStackRef_FromPyObjectSteal(res_o);
             }
             // _CHECK_PERIODIC_AT_END
             {
-                stack_pointer[0] = res;
-                stack_pointer += 1;
-                assert(WITHIN_STACK_BOUNDS());
+                stack_pointer[-2 - oparg] = res;
+                stack_pointer += -1 - oparg;
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 int err = check_periodics(tstate);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -2460,7 +2293,7 @@
                 PyStackRef_CLOSE(arg);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -2 - oparg;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(callable);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -2473,7 +2306,7 @@
             {
                 stack_pointer[0] = res;
                 stack_pointer += 1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 int err = check_periodics(tstate);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -2592,14 +2425,14 @@
                         int code_flags = ((PyCodeObject *)PyFunction_GET_CODE(func))->co_flags;
                         PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(func));
                         stack_pointer += -2;
-                        assert(WITHIN_STACK_BOUNDS());
+                        ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                         _PyFrame_SetStackPointer(frame, stack_pointer);
                         _PyInterpreterFrame *new_frame = _PyEvalFramePushAndInit_Ex(
                             tstate, func_st, locals,
                             nargs, callargs, kwargs, frame);
                         stack_pointer = _PyFrame_GetStackPointer(frame);
                         stack_pointer += -2;
-                        assert(WITHIN_STACK_BOUNDS());
+                        ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                         if (new_frame == NULL) {
                             JUMP_TO_LABEL(error);
                         }
@@ -2617,17 +2450,17 @@
                     stack_pointer = _PyFrame_GetStackPointer(frame);
                 }
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_XCLOSE(kwargs_st);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(callargs_st);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -2;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(func_st);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -2640,7 +2473,7 @@
             {
                 stack_pointer[0] = result;
                 stack_pointer += 1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 int err = check_periodics(tstate);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -2667,7 +2500,7 @@
             PyObject *res_o = _PyIntrinsics_UnaryFunctions[oparg].func(tstate, PyStackRef_AsPyObjectBorrow(value));
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(value);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -2677,7 +2510,7 @@
             res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -2709,14 +2542,14 @@
             PyStackRef_CLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (res_o == NULL) {
                 JUMP_TO_LABEL(error);
             }
             res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -2773,17 +2606,17 @@
                 }
                 (void)null;
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(cls);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(instance);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -2;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(callable);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -2792,7 +2625,7 @@
             }
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -2876,7 +2709,7 @@
                     );
                     stack_pointer = _PyFrame_GetStackPointer(frame);
                     stack_pointer += -3 - oparg;
-                    assert(WITHIN_STACK_BOUNDS());
+                    ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                     _PyFrame_SetStackPointer(frame, stack_pointer);
                     PyStackRef_CLOSE(kwnames);
                     stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -2911,7 +2744,7 @@
                     PyStackRef_CLOSE(tmp);
                     stack_pointer = _PyFrame_GetStackPointer(frame);
                     stack_pointer += -3 - oparg;
-                    assert(WITHIN_STACK_BOUNDS());
+                    ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                     JUMP_TO_LABEL(error);
                 }
                 stack_pointer[-3 - oparg] = callable;
@@ -2966,7 +2799,7 @@
                 PyStackRef_CLOSE(tmp);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -3 - oparg;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 if (res_o == NULL) {
                     JUMP_TO_LABEL(error);
                 }
@@ -2974,7 +2807,7 @@
             }
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -3072,12 +2905,12 @@
                 );
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(kwnames);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -2 - oparg;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 if (temp == NULL) {
                     JUMP_TO_LABEL(error);
                 }
@@ -3150,81 +2983,31 @@
                 #if TIER_ONE
                 assert(opcode != INSTRUMENTED_CALL);
                 #endif
-                PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
                 int total_args = oparg;
                 _PyStackRef *arguments = args;
                 if (!PyStackRef_IsNull(self_or_null)) {
                     arguments--;
                     total_args++;
                 }
-                STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
-                if (CONVERSION_FAILED(args_o)) {
-                    _PyFrame_SetStackPointer(frame, stack_pointer);
-                    _PyStackRef tmp = kwnames;
-                    kwnames = PyStackRef_NULL;
-                    stack_pointer[-1] = kwnames;
-                    PyStackRef_CLOSE(tmp);
-                    for (int _i = oparg; --_i >= 0;) {
-                        tmp = args[_i];
-                        args[_i] = PyStackRef_NULL;
-                        PyStackRef_CLOSE(tmp);
-                    }
-                    tmp = self_or_null;
-                    self_or_null = PyStackRef_NULL;
-                    stack_pointer[-2 - oparg] = self_or_null;
-                    PyStackRef_XCLOSE(tmp);
-                    tmp = callable;
-                    callable = PyStackRef_NULL;
-                    stack_pointer[-3 - oparg] = callable;
-                    PyStackRef_CLOSE(tmp);
-                    stack_pointer = _PyFrame_GetStackPointer(frame);
-                    stack_pointer += -3 - oparg;
-                    assert(WITHIN_STACK_BOUNDS());
-                    JUMP_TO_LABEL(error);
-                }
-                PyObject *kwnames_o = PyStackRef_AsPyObjectBorrow(kwnames);
-                int positional_args = total_args - (int)PyTuple_GET_SIZE(kwnames_o);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
-                PyObject *res_o = PyObject_Vectorcall(
-                    callable_o, args_o,
-                    positional_args | PY_VECTORCALL_ARGUMENTS_OFFSET,
-                    kwnames_o);
+                PyObject *res_o = _Py_VectorCall_StackRefSteal(
+                    callable,
+                    arguments,
+                    total_args,
+                    kwnames);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
-                stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
-                _PyFrame_SetStackPointer(frame, stack_pointer);
-                PyStackRef_CLOSE(kwnames);
-                stack_pointer = _PyFrame_GetStackPointer(frame);
-                STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
-                assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
-                _PyFrame_SetStackPointer(frame, stack_pointer);
-                _PyStackRef tmp;
-                for (int _i = oparg; --_i >= 0;) {
-                    tmp = args[_i];
-                    args[_i] = PyStackRef_NULL;
-                    PyStackRef_CLOSE(tmp);
-                }
-                tmp = self_or_null;
-                self_or_null = PyStackRef_NULL;
-                stack_pointer[-1 - oparg] = self_or_null;
-                PyStackRef_XCLOSE(tmp);
-                tmp = callable;
-                callable = PyStackRef_NULL;
-                stack_pointer[-2 - oparg] = callable;
-                PyStackRef_CLOSE(tmp);
-                stack_pointer = _PyFrame_GetStackPointer(frame);
-                stack_pointer += -2 - oparg;
-                assert(WITHIN_STACK_BOUNDS());
                 if (res_o == NULL) {
+                    stack_pointer += -3 - oparg;
+                    ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                     JUMP_TO_LABEL(error);
                 }
                 res = PyStackRef_FromPyObjectSteal(res_o);
             }
             // _CHECK_PERIODIC_AT_END
             {
-                stack_pointer[0] = res;
-                stack_pointer += 1;
-                assert(WITHIN_STACK_BOUNDS());
+                stack_pointer[-3 - oparg] = res;
+                stack_pointer += -2 - oparg;
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 int err = check_periodics(tstate);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -3309,12 +3092,12 @@
                 );
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(kwnames);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -2 - oparg;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 if (temp == NULL) {
                     JUMP_TO_LABEL(error);
                 }
@@ -3400,12 +3183,12 @@
                     JUMP_TO_LABEL(error);
                 }
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(arg);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -2;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(callable);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -3413,7 +3196,7 @@
             }
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -3479,12 +3262,12 @@
                 int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg));
                 UNLOCK_OBJECT(self_o);
                 stack_pointer += -2;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(self);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(callable);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -3554,63 +3337,27 @@
                     JUMP_TO_PREDICTED(CALL);
                 }
                 STAT_INC(CALL, hit);
-                int nargs = total_args - 1;
-                STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
-                if (CONVERSION_FAILED(args_o)) {
-                    _PyFrame_SetStackPointer(frame, stack_pointer);
-                    _PyStackRef tmp;
-                    for (int _i = oparg; --_i >= 0;) {
-                        tmp = args[_i];
-                        args[_i] = PyStackRef_NULL;
-                        PyStackRef_CLOSE(tmp);
-                    }
-                    tmp = self_or_null;
-                    self_or_null = PyStackRef_NULL;
-                    stack_pointer[-1 - oparg] = self_or_null;
-                    PyStackRef_XCLOSE(tmp);
-                    tmp = callable;
-                    callable = PyStackRef_NULL;
-                    stack_pointer[-2 - oparg] = callable;
-                    PyStackRef_CLOSE(tmp);
-                    stack_pointer = _PyFrame_GetStackPointer(frame);
-                    stack_pointer += -2 - oparg;
-                    assert(WITHIN_STACK_BOUNDS());
-                    JUMP_TO_LABEL(error);
-                }
                 _PyFrame_SetStackPointer(frame, stack_pointer);
-                PyCFunctionFast cfunc = _PyCFunctionFast_CAST(meth->ml_meth);
-                PyObject *res_o = cfunc(self, (args_o + 1), nargs);
+                PyObject *res_o = _PyCallMethodDescriptorFast_StackRefSteal(
+                    callable,
+                    meth,
+                    self,
+                    arguments,
+                    total_args
+                );
                 stack_pointer = _PyFrame_GetStackPointer(frame);
-                STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
-                assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
-                _PyFrame_SetStackPointer(frame, stack_pointer);
-                _PyStackRef tmp;
-                for (int _i = oparg; --_i >= 0;) {
-                    tmp = args[_i];
-                    args[_i] = PyStackRef_NULL;
-                    PyStackRef_CLOSE(tmp);
-                }
-                tmp = self_or_null;
-                self_or_null = PyStackRef_NULL;
-                stack_pointer[-1 - oparg] = self_or_null;
-                PyStackRef_XCLOSE(tmp);
-                tmp = callable;
-                callable = PyStackRef_NULL;
-                stack_pointer[-2 - oparg] = callable;
-                PyStackRef_CLOSE(tmp);
-                stack_pointer = _PyFrame_GetStackPointer(frame);
-                stack_pointer += -2 - oparg;
-                assert(WITHIN_STACK_BOUNDS());
                 if (res_o == NULL) {
+                    stack_pointer += -2 - oparg;
+                    ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                     JUMP_TO_LABEL(error);
                 }
                 res = PyStackRef_FromPyObjectSteal(res_o);
             }
             // _CHECK_PERIODIC_AT_END
             {
-                stack_pointer[0] = res;
-                stack_pointer += 1;
-                assert(WITHIN_STACK_BOUNDS());
+                stack_pointer[-2 - oparg] = res;
+                stack_pointer += -1 - oparg;
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 int err = check_periodics(tstate);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -3676,64 +3423,27 @@
                     JUMP_TO_PREDICTED(CALL);
                 }
                 STAT_INC(CALL, hit);
-                int nargs = total_args - 1;
-                STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
-                if (CONVERSION_FAILED(args_o)) {
-                    _PyFrame_SetStackPointer(frame, stack_pointer);
-                    _PyStackRef tmp;
-                    for (int _i = oparg; --_i >= 0;) {
-                        tmp = args[_i];
-                        args[_i] = PyStackRef_NULL;
-                        PyStackRef_CLOSE(tmp);
-                    }
-                    tmp = self_or_null;
-                    self_or_null = PyStackRef_NULL;
-                    stack_pointer[-1 - oparg] = self_or_null;
-                    PyStackRef_XCLOSE(tmp);
-                    tmp = callable;
-                    callable = PyStackRef_NULL;
-                    stack_pointer[-2 - oparg] = callable;
-                    PyStackRef_CLOSE(tmp);
-                    stack_pointer = _PyFrame_GetStackPointer(frame);
-                    stack_pointer += -2 - oparg;
-                    assert(WITHIN_STACK_BOUNDS());
-                    JUMP_TO_LABEL(error);
-                }
                 _PyFrame_SetStackPointer(frame, stack_pointer);
-                PyCFunctionFastWithKeywords cfunc =
-                _PyCFunctionFastWithKeywords_CAST(meth->ml_meth);
-                PyObject *res_o = cfunc(self, (args_o + 1), nargs, NULL);
+                PyObject *res_o = _PyCallMethodDescriptorFastWithKeywords_StackRefSteal(
+                    callable,
+                    meth,
+                    self,
+                    arguments,
+                    total_args
+                );
                 stack_pointer = _PyFrame_GetStackPointer(frame);
-                STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
-                assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
-                _PyFrame_SetStackPointer(frame, stack_pointer);
-                _PyStackRef tmp;
-                for (int _i = oparg; --_i >= 0;) {
-                    tmp = args[_i];
-                    args[_i] = PyStackRef_NULL;
-                    PyStackRef_CLOSE(tmp);
-                }
-                tmp = self_or_null;
-                self_or_null = PyStackRef_NULL;
-                stack_pointer[-1 - oparg] = self_or_null;
-                PyStackRef_XCLOSE(tmp);
-                tmp = callable;
-                callable = PyStackRef_NULL;
-                stack_pointer[-2 - oparg] = callable;
-                PyStackRef_CLOSE(tmp);
-                stack_pointer = _PyFrame_GetStackPointer(frame);
-                stack_pointer += -2 - oparg;
-                assert(WITHIN_STACK_BOUNDS());
                 if (res_o == NULL) {
+                    stack_pointer += -2 - oparg;
+                    ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                     JUMP_TO_LABEL(error);
                 }
                 res = PyStackRef_FromPyObjectSteal(res_o);
             }
             // _CHECK_PERIODIC_AT_END
             {
-                stack_pointer[0] = res;
-                stack_pointer += 1;
-                assert(WITHIN_STACK_BOUNDS());
+                stack_pointer[-2 - oparg] = res;
+                stack_pointer += -1 - oparg;
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 int err = check_periodics(tstate);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -3813,7 +3523,7 @@
                 PyStackRef_CLOSE(self_stackref);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -2 - oparg;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(callable);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -3826,7 +3536,7 @@
             {
                 stack_pointer[0] = res;
                 stack_pointer += 1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 int err = check_periodics(tstate);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -3922,7 +3632,7 @@
                 PyStackRef_CLOSE(tmp);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -2 - oparg;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 if (res_o == NULL) {
                     JUMP_TO_LABEL(error);
                 }
@@ -3932,7 +3642,7 @@
             {
                 stack_pointer[0] = res;
                 stack_pointer += 1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 int err = check_periodics(tstate);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -3983,71 +3693,31 @@
                 #if TIER_ONE
                 assert(opcode != INSTRUMENTED_CALL);
                 #endif
-                PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable);
                 int total_args = oparg;
                 _PyStackRef *arguments = args;
                 if (!PyStackRef_IsNull(self_or_null)) {
                     arguments--;
                     total_args++;
                 }
-                STACKREFS_TO_PYOBJECTS(arguments, total_args, args_o);
-                if (CONVERSION_FAILED(args_o)) {
-                    _PyFrame_SetStackPointer(frame, stack_pointer);
-                    _PyStackRef tmp;
-                    for (int _i = oparg; --_i >= 0;) {
-                        tmp = args[_i];
-                        args[_i] = PyStackRef_NULL;
-                        PyStackRef_CLOSE(tmp);
-                    }
-                    tmp = self_or_null;
-                    self_or_null = PyStackRef_NULL;
-                    stack_pointer[-1 - oparg] = self_or_null;
-                    PyStackRef_XCLOSE(tmp);
-                    tmp = callable;
-                    callable = PyStackRef_NULL;
-                    stack_pointer[-2 - oparg] = callable;
-                    PyStackRef_CLOSE(tmp);
-                    stack_pointer = _PyFrame_GetStackPointer(frame);
-                    stack_pointer += -2 - oparg;
-                    assert(WITHIN_STACK_BOUNDS());
-                    JUMP_TO_LABEL(error);
-                }
                 _PyFrame_SetStackPointer(frame, stack_pointer);
-                PyObject *res_o = PyObject_Vectorcall(
-                    callable_o, args_o,
-                    total_args | PY_VECTORCALL_ARGUMENTS_OFFSET,
-                    NULL);
+                PyObject *res_o = _Py_VectorCall_StackRefSteal(
+                    callable,
+                    arguments,
+                    total_args,
+                    PyStackRef_NULL);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
-                STACKREFS_TO_PYOBJECTS_CLEANUP(args_o);
-                assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL));
-                _PyFrame_SetStackPointer(frame, stack_pointer);
-                _PyStackRef tmp;
-                for (int _i = oparg; --_i >= 0;) {
-                    tmp = args[_i];
-                    args[_i] = PyStackRef_NULL;
-                    PyStackRef_CLOSE(tmp);
-                }
-                tmp = self_or_null;
-                self_or_null = PyStackRef_NULL;
-                stack_pointer[-1 - oparg] = self_or_null;
-                PyStackRef_XCLOSE(tmp);
-                tmp = callable;
-                callable = PyStackRef_NULL;
-                stack_pointer[-2 - oparg] = callable;
-                PyStackRef_CLOSE(tmp);
-                stack_pointer = _PyFrame_GetStackPointer(frame);
-                stack_pointer += -2 - oparg;
-                assert(WITHIN_STACK_BOUNDS());
                 if (res_o == NULL) {
+                    stack_pointer += -2 - oparg;
+                    ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                     JUMP_TO_LABEL(error);
                 }
                 res = PyStackRef_FromPyObjectSteal(res_o);
             }
             // _CHECK_PERIODIC_AT_END
             {
-                stack_pointer[0] = res;
-                stack_pointer += 1;
-                assert(WITHIN_STACK_BOUNDS());
+                stack_pointer[-2 - oparg] = res;
+                stack_pointer += -1 - oparg;
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 int err = check_periodics(tstate);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -4158,7 +3828,7 @@
                 assert(tstate->interp->eval_frame == NULL);
                 _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame);
                 stack_pointer += -2 - oparg;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 assert(temp->previous == frame || temp->previous->previous == frame);
                 CALL_STAT_INC(inlined_py_calls);
@@ -4240,7 +3910,7 @@
                 );
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -2 - oparg;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 if (temp == NULL) {
                     JUMP_TO_LABEL(error);
                 }
@@ -4319,7 +3989,7 @@
                 (void)callable;
                 (void)null;
                 stack_pointer += -3;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(arg);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -4332,7 +4002,7 @@
             {
                 stack_pointer[0] = res;
                 stack_pointer += 1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 int err = check_periodics(tstate);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -4391,7 +4061,7 @@
                 (void)callable;
                 (void)null;
                 stack_pointer += -3;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(arg);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -4404,7 +4074,7 @@
             {
                 stack_pointer[0] = res;
                 stack_pointer += 1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 int err = check_periodics(tstate);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -4462,7 +4132,7 @@
                 res = PyStackRef_FromPyObjectNew(Py_TYPE(arg_o));
                 stack_pointer[-3] = res;
                 stack_pointer += -2;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(arg);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -4501,7 +4171,7 @@
                 PyStackRef_CLOSE(tmp);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -2;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 JUMP_TO_LABEL(error);
             }
             PyObject *match_o = NULL;
@@ -4519,7 +4189,7 @@
             PyStackRef_CLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (res < 0) {
                 JUMP_TO_LABEL(error);
             }
@@ -4537,7 +4207,7 @@
             stack_pointer[0] = rest;
             stack_pointer[1] = match;
             stack_pointer += 2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -4567,14 +4237,14 @@
             int res = PyErr_GivenExceptionMatches(left_o, right_o);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(right);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             b = res ? PyStackRef_True : PyStackRef_False;
             stack_pointer[0] = b;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -4621,7 +4291,7 @@
                 PyStackRef_CLOSE(tmp);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -3;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 none = PyStackRef_None;
             }
             else {
@@ -4633,7 +4303,7 @@
             stack_pointer[0] = none;
             stack_pointer[1] = value;
             stack_pointer += 2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -4686,7 +4356,7 @@
                 PyStackRef_CLOSE(tmp);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -2;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 if (res_o == NULL) {
                     JUMP_TO_LABEL(error);
                 }
@@ -4706,7 +4376,7 @@
             }
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -4761,7 +4431,7 @@
             }
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -4820,7 +4490,7 @@
             }
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -4879,7 +4549,7 @@
             }
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -4931,7 +4601,7 @@
                 PyStackRef_CLOSE(tmp);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -2;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 if (res < 0) {
                     JUMP_TO_LABEL(error);
                 }
@@ -4939,7 +4609,7 @@
             }
             stack_pointer[0] = b;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -4989,7 +4659,7 @@
                 PyStackRef_CLOSE(tmp);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -2;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 if (res < 0) {
                     JUMP_TO_LABEL(error);
                 }
@@ -4997,7 +4667,7 @@
             }
             stack_pointer[0] = b;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -5047,7 +4717,7 @@
                 PyStackRef_CLOSE(tmp);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -2;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 if (res < 0) {
                     JUMP_TO_LABEL(error);
                 }
@@ -5055,7 +4725,7 @@
             }
             stack_pointer[0] = b;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -5077,7 +4747,7 @@
             PyObject *result_o = conv_fn(PyStackRef_AsPyObjectBorrow(value));
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(value);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -5087,7 +4757,7 @@
             result = PyStackRef_FromPyObjectSteal(result_o);
             stack_pointer[0] = result;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -5105,7 +4775,7 @@
             top = PyStackRef_DUP(bottom);
             stack_pointer[0] = top;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -5145,7 +4815,7 @@
             int err = PyObject_DelAttr(PyStackRef_AsPyObjectBorrow(owner), name);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(owner);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -5285,7 +4955,7 @@
             PyStackRef_CLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (err) {
                 JUMP_TO_LABEL(error);
             }
@@ -5317,14 +4987,14 @@
                 _PyEval_FormatKwargsError(tstate, callable_o, update_o);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(update);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 JUMP_TO_LABEL(error);
             }
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(update);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -5360,14 +5030,14 @@
                     stack_pointer = _PyFrame_GetStackPointer(frame);
                 }
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(update);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 JUMP_TO_LABEL(error);
             }
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(update);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -5407,7 +5077,7 @@
                 PyStackRef_CLOSE(tmp);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -2;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             }
             else {
                 Py_INCREF(exc);
@@ -5429,7 +5099,7 @@
             _PyStackRef value;
             value = stack_pointer[-1];
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(value);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -5452,7 +5122,7 @@
             val = value;
             stack_pointer[-2] = val;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(receiver);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -5518,7 +5188,7 @@
                 JUMP_TO_LABEL(error);
             }
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -5555,7 +5225,7 @@
                 PyObject *res_o = PyObject_Format(value_o, NULL);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(value);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -5570,7 +5240,7 @@
             }
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -5599,14 +5269,14 @@
             PyStackRef_CLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (res_o == NULL) {
                 JUMP_TO_LABEL(error);
             }
             res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -5660,7 +5330,7 @@
             stack_pointer[-1] = null_or_index;
             stack_pointer[0] = next;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -5815,7 +5485,7 @@
             stack_pointer[-1] = null_or_index;
             stack_pointer[0] = next;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -5882,7 +5552,7 @@
             }
             stack_pointer[0] = next;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -5938,7 +5608,7 @@
             stack_pointer[-1] = null_or_index;
             stack_pointer[0] = next;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -5968,7 +5638,7 @@
                               type->tp_name);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(obj);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -5978,7 +5648,7 @@
             iter_o = (*getter)(obj_o);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(obj);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -5999,7 +5669,7 @@
             iter = PyStackRef_FromPyObjectSteal(iter_o);
             stack_pointer[0] = iter;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -6023,7 +5693,7 @@
             awaitable = PyStackRef_FromPyObjectSteal(awaitable_o);
             stack_pointer[0] = awaitable;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -6042,7 +5712,7 @@
             PyObject *iter_o = _PyEval_GetAwaitable(PyStackRef_AsPyObjectBorrow(iterable), oparg);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(iterable);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -6052,7 +5722,7 @@
             iter = PyStackRef_FromPyObjectSteal(iter_o);
             stack_pointer[0] = iter;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -6084,7 +5754,7 @@
                 PyObject *iter_o = PyObject_GetIter(PyStackRef_AsPyObjectBorrow(iterable));
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(iterable);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -6098,7 +5768,7 @@
             stack_pointer[-1] = iter;
             stack_pointer[0] = index_or_null;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -6126,7 +5796,7 @@
             len = PyStackRef_FromPyObjectSteal(len_o);
             stack_pointer[0] = len;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -6196,7 +5866,7 @@
             res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -6228,14 +5898,14 @@
             PyStackRef_CLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (res_o == NULL) {
                 JUMP_TO_LABEL(error);
             }
             res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -6329,7 +5999,7 @@
                     );
                     stack_pointer = _PyFrame_GetStackPointer(frame);
                     stack_pointer += -2 - oparg;
-                    assert(WITHIN_STACK_BOUNDS());
+                    ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                     if (new_frame == NULL) {
                         JUMP_TO_LABEL(error);
                     }
@@ -6355,7 +6025,7 @@
                     PyStackRef_CLOSE(tmp);
                     stack_pointer = _PyFrame_GetStackPointer(frame);
                     stack_pointer += -2 - oparg;
-                    assert(WITHIN_STACK_BOUNDS());
+                    ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                     JUMP_TO_LABEL(error);
                 }
                 _PyFrame_SetStackPointer(frame, stack_pointer);
@@ -6406,7 +6076,7 @@
                 PyStackRef_CLOSE(tmp);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -2 - oparg;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 if (res_o == NULL) {
                     JUMP_TO_LABEL(error);
                 }
@@ -6416,7 +6086,7 @@
             {
                 stack_pointer[0] = res;
                 stack_pointer += 1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 int err = check_periodics(tstate);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -6535,14 +6205,14 @@
                         int code_flags = ((PyCodeObject *)PyFunction_GET_CODE(func))->co_flags;
                         PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(func));
                         stack_pointer += -2;
-                        assert(WITHIN_STACK_BOUNDS());
+                        ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                         _PyFrame_SetStackPointer(frame, stack_pointer);
                         _PyInterpreterFrame *new_frame = _PyEvalFramePushAndInit_Ex(
                             tstate, func_st, locals,
                             nargs, callargs, kwargs, frame);
                         stack_pointer = _PyFrame_GetStackPointer(frame);
                         stack_pointer += -2;
-                        assert(WITHIN_STACK_BOUNDS());
+                        ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                         if (new_frame == NULL) {
                             JUMP_TO_LABEL(error);
                         }
@@ -6560,17 +6230,17 @@
                     stack_pointer = _PyFrame_GetStackPointer(frame);
                 }
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_XCLOSE(kwargs_st);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(callargs_st);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -2;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(func_st);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -6583,7 +6253,7 @@
             {
                 stack_pointer[0] = result;
                 stack_pointer += 1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 int err = check_periodics(tstate);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -6682,7 +6352,7 @@
                     );
                     stack_pointer = _PyFrame_GetStackPointer(frame);
                     stack_pointer += -3 - oparg;
-                    assert(WITHIN_STACK_BOUNDS());
+                    ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                     _PyFrame_SetStackPointer(frame, stack_pointer);
                     PyStackRef_CLOSE(kwnames);
                     stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -6715,7 +6385,7 @@
                     PyStackRef_CLOSE(tmp);
                     stack_pointer = _PyFrame_GetStackPointer(frame);
                     stack_pointer += -3 - oparg;
-                    assert(WITHIN_STACK_BOUNDS());
+                    ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                     JUMP_TO_LABEL(error);
                 }
                 _PyFrame_SetStackPointer(frame, stack_pointer);
@@ -6768,7 +6438,7 @@
                 PyStackRef_CLOSE(tmp);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -3 - oparg;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 if (res_o == NULL) {
                     JUMP_TO_LABEL(error);
                 }
@@ -6776,7 +6446,7 @@
             }
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -6820,7 +6490,7 @@
                     PyStackRef_CLOSE(tmp);
                     stack_pointer = _PyFrame_GetStackPointer(frame);
                     stack_pointer += -2;
-                    assert(WITHIN_STACK_BOUNDS());
+                    ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 }
                 else {
                     Py_INCREF(exc);
@@ -6855,7 +6525,7 @@
                 }
             }
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(value);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -6889,7 +6559,7 @@
             val = value;
             stack_pointer[-2] = val;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(receiver);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -6928,7 +6598,7 @@
             stack_pointer[-1] = null_or_index;
             stack_pointer[0] = next;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -7088,7 +6758,7 @@
                         PyStackRef_CLOSE(tmp);
                         stack_pointer = _PyFrame_GetStackPointer(frame);
                         stack_pointer += -3;
-                        assert(WITHIN_STACK_BOUNDS());
+                        ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                         JUMP_TO_LABEL(error);
                     }
                 }
@@ -7133,7 +6803,7 @@
                 PyStackRef_CLOSE(tmp);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -3;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 if (super == NULL) {
                     JUMP_TO_LABEL(error);
                 }
@@ -7156,7 +6826,7 @@
             }
             stack_pointer[0] = attr;
             stack_pointer += 1 + (oparg & 1);
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -7194,7 +6864,7 @@
             (void)index_or_null;
             INSTRUMENTED_JUMP(prev_instr, this_instr+1, PY_MONITORING_EVENT_BRANCH_RIGHT);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(iter);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -7221,7 +6891,7 @@
                 INSTRUMENTED_JUMP(this_instr, next_instr + oparg, PY_MONITORING_EVENT_BRANCH_RIGHT);
             }
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -7245,14 +6915,14 @@
             }
             else {
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(value);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += 1;
             }
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -7273,7 +6943,7 @@
             RECORD_BRANCH_TAKEN(this_instr[1].cache, jump);
             if (jump) {
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(value);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -7305,7 +6975,7 @@
                 INSTRUMENTED_JUMP(this_instr, next_instr + oparg, PY_MONITORING_EVENT_BRANCH_RIGHT);
             }
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -7420,7 +7090,7 @@
                 assert(frame->owner != FRAME_OWNED_BY_INTERPRETER);
                 _PyStackRef temp = PyStackRef_MakeHeapSafe(retval);
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 assert(STACK_LEVEL() == 0);
                 _Py_LeaveRecursiveCallPy(tstate);
@@ -7434,7 +7104,7 @@
             }
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -7478,7 +7148,7 @@
                 gen->gi_frame_state = FRAME_SUSPENDED + oparg;
                 _PyStackRef temp = retval;
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 tstate->exc_info = gen->gi_exc_state.previous_item;
                 gen->gi_exc_state.previous_item = NULL;
@@ -7502,7 +7172,7 @@
             }
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -7530,7 +7200,7 @@
             if (!PyStackRef_IsNull(executor)) {
                 tstate->current_executor = PyStackRef_AsPyObjectBorrow(executor);
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(executor);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -7566,11 +7236,11 @@
             PyStackRef_CLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             b = res ? PyStackRef_True : PyStackRef_False;
             stack_pointer[0] = b;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -7738,7 +7408,7 @@
                 JUMP_TO_LABEL(pop_1_error);
             }
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -7774,7 +7444,7 @@
                     stack_pointer = _PyFrame_GetStackPointer(frame);
                 }
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(iterable_st);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -7782,7 +7452,7 @@
             }
             assert(Py_IsNone(none_val));
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(iterable_st);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -7838,7 +7508,7 @@
                     }
                     else {
                         stack_pointer += -1;
-                        assert(WITHIN_STACK_BOUNDS());
+                        ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                         _PyFrame_SetStackPointer(frame, stack_pointer);
                         PyStackRef_CLOSE(owner);
                         stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -7854,7 +7524,7 @@
                     PyObject *attr_o = PyObject_GetAttr(PyStackRef_AsPyObjectBorrow(owner), name);
                     stack_pointer = _PyFrame_GetStackPointer(frame);
                     stack_pointer += -1;
-                    assert(WITHIN_STACK_BOUNDS());
+                    ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                     _PyFrame_SetStackPointer(frame, stack_pointer);
                     PyStackRef_CLOSE(owner);
                     stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -7866,7 +7536,7 @@
                 }
             }
             stack_pointer += (oparg&1);
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -7924,7 +7594,7 @@
                 }
             }
             stack_pointer += (oparg & 1);
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -7992,7 +7662,7 @@
                 }
             }
             stack_pointer += (oparg & 1);
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -8048,7 +7718,7 @@
                 tstate, PyStackRef_FromPyObjectNew(f), 2, frame);
             new_frame->localsplus[0] = owner;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             new_frame->localsplus[1] = PyStackRef_FromPyObjectNew(name);
             frame->return_offset = 10u ;
             DISPATCH_INLINED(new_frame);
@@ -8130,7 +7800,7 @@
                 }
             }
             stack_pointer += (oparg & 1);
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -8186,7 +7856,7 @@
             stack_pointer[-1] = attr;
             stack_pointer[0] = self;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -8232,7 +7902,7 @@
             stack_pointer[-1] = attr;
             stack_pointer[0] = self;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -8299,7 +7969,7 @@
             stack_pointer[-1] = attr;
             stack_pointer[0] = self;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -8373,7 +8043,7 @@
                 }
             }
             stack_pointer += (oparg & 1);
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -8412,7 +8082,7 @@
                 STAT_INC(LOAD_ATTR, hit);
                 assert(descr != NULL);
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(owner);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -8420,7 +8090,7 @@
             }
             stack_pointer[0] = attr;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -8480,7 +8150,7 @@
                 STAT_INC(LOAD_ATTR, hit);
                 assert(descr != NULL);
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(owner);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -8488,7 +8158,7 @@
             }
             stack_pointer[0] = attr;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -8573,7 +8243,7 @@
                 assert(tstate->interp->eval_frame == NULL);
                 _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame);
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 assert(temp->previous == frame || temp->previous->previous == frame);
                 CALL_STAT_INC(inlined_py_calls);
@@ -8651,7 +8321,7 @@
                 }
             }
             stack_pointer += (oparg & 1);
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -8761,7 +8431,7 @@
                 }
             }
             stack_pointer += (oparg & 1);
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -8791,7 +8461,7 @@
             bc = PyStackRef_FromPyObjectSteal(bc_o);
             stack_pointer[0] = bc;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -8808,7 +8478,7 @@
             value = PyStackRef_FromPyObjectNew(tstate->interp->common_consts[oparg]);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -8825,7 +8495,7 @@
             value = PyStackRef_FromPyObjectBorrow(obj);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -8845,7 +8515,7 @@
             if (PyStackRef_IsNull(value)) {
                 stack_pointer[0] = value;
                 stack_pointer += 1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 _PyEval_FormatExcUnbound(tstate, _PyFrame_GetCode(frame), oparg);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -8853,7 +8523,7 @@
             }
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -8870,7 +8540,7 @@
             value = PyStackRef_DUP(GETLOCAL(oparg));
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -8887,7 +8557,7 @@
             GETLOCAL(oparg) = PyStackRef_NULL;
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -8904,7 +8574,7 @@
             value = PyStackRef_Borrow(GETLOCAL(oparg));
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -8925,7 +8595,7 @@
             stack_pointer[0] = value1;
             stack_pointer[1] = value2;
             stack_pointer += 2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -8951,7 +8621,7 @@
             value = PyStackRef_DUP(value_s);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -8972,7 +8642,7 @@
             stack_pointer[0] = value1;
             stack_pointer[1] = value2;
             stack_pointer += 2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -9010,14 +8680,14 @@
                 }
             }
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(class_dict_st);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             value = PyStackRef_FromPyObjectSteal(value_o);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -9038,7 +8708,7 @@
             int err = PyMapping_GetOptionalItem(PyStackRef_AsPyObjectBorrow(mod_or_class_dict), name, &v_o);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(mod_or_class_dict);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -9092,7 +8762,7 @@
             v = PyStackRef_FromPyObjectSteal(v_o);
             stack_pointer[0] = v;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -9148,7 +8818,7 @@
                 }
             }
             stack_pointer += 1 + (oparg & 1);
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -9228,7 +8898,7 @@
             }
             stack_pointer[0] = res;
             stack_pointer += 1 + (oparg & 1);
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -9295,7 +8965,7 @@
             }
             stack_pointer[0] = res;
             stack_pointer += 1 + (oparg & 1);
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -9319,7 +8989,7 @@
             locals = PyStackRef_FromPyObjectNew(l);
             stack_pointer[0] = locals;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -9342,7 +9012,7 @@
             v = PyStackRef_FromPyObjectSteal(v_o);
             stack_pointer[0] = v;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -9360,7 +9030,7 @@
             value = PyStackRef_FromPyObjectBorrow(obj);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -9386,7 +9056,7 @@
                 method_and_self = &stack_pointer[-1];
                 PyObject *name = _Py_SpecialMethods[oparg].name;
                 stack_pointer += 1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 int err = _PyObject_LookupSpecialMethod(name, method_and_self);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -9475,7 +9145,7 @@
                         PyStackRef_CLOSE(tmp);
                         stack_pointer = _PyFrame_GetStackPointer(frame);
                         stack_pointer += -3;
-                        assert(WITHIN_STACK_BOUNDS());
+                        ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                         JUMP_TO_LABEL(error);
                     }
                 }
@@ -9520,7 +9190,7 @@
                 PyStackRef_CLOSE(tmp);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -3;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 if (super == NULL) {
                     JUMP_TO_LABEL(error);
                 }
@@ -9543,7 +9213,7 @@
             }
             stack_pointer[0] = attr;
             stack_pointer += 1 + (oparg & 1);
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -9598,14 +9268,14 @@
             PyStackRef_CLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -3;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (attr == NULL) {
                 JUMP_TO_LABEL(error);
             }
             attr_st = PyStackRef_FromPyObjectSteal(attr);
             stack_pointer[0] = attr_st;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -9658,7 +9328,7 @@
                 self_or_null = self_st;
             } else {
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(self_st);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -9666,7 +9336,7 @@
                 stack_pointer += 1;
             }
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             _PyStackRef tmp = global_super_st;
             global_super_st = self_or_null;
@@ -9678,12 +9348,12 @@
             PyStackRef_CLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             attr = PyStackRef_FromPyObjectSteal(attr_o);
             stack_pointer[0] = attr;
             stack_pointer[1] = self_or_null;
             stack_pointer += 2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -9725,7 +9395,7 @@
             PyFunction_New(codeobj, GLOBALS());
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(codeobj_st);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -9737,7 +9407,7 @@
             func = PyStackRef_FromPyObjectSteal((PyObject *)func_obj);
             stack_pointer[0] = func;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -9768,7 +9438,7 @@
                 JUMP_TO_LABEL(pop_2_error);
             }
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -9807,7 +9477,7 @@
             PyStackRef_CLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -3;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (attrs_o) {
                 assert(PyTuple_CheckExact(attrs_o));
                 attrs = PyStackRef_FromPyObjectSteal(attrs_o);
@@ -9820,7 +9490,7 @@
             }
             stack_pointer[0] = attrs;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -9847,7 +9517,7 @@
             values_or_none = PyStackRef_FromPyObjectSteal(values_or_none_o);
             stack_pointer[0] = values_or_none;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -9866,7 +9536,7 @@
             res = match ? PyStackRef_True : PyStackRef_False;
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -9885,7 +9555,7 @@
             res = match ? PyStackRef_True : PyStackRef_False;
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -9928,7 +9598,7 @@
                        ? NULL : PyStackRef_AsPyObjectSteal(exc_value));
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -9946,7 +9616,7 @@
             iter = stack_pointer[-2];
             (void)index_or_null;
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(iter);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -9971,7 +9641,7 @@
             RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
             JUMPBY(flag ? oparg : next_instr->op.code == NOT_TAKEN);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -10014,7 +9684,7 @@
                 JUMPBY(flag ? oparg : next_instr->op.code == NOT_TAKEN);
             }
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -10057,7 +9727,7 @@
                 JUMPBY(flag ? oparg : next_instr->op.code == NOT_TAKEN);
             }
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -10079,7 +9749,7 @@
             RECORD_BRANCH_TAKEN(this_instr[1].cache, flag);
             JUMPBY(flag ? oparg : next_instr->op.code == NOT_TAKEN);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -10094,7 +9764,7 @@
             _PyStackRef value;
             value = stack_pointer[-1];
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_XCLOSE(value);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -10126,7 +9796,7 @@
             stack_pointer[-1] = prev_exc;
             stack_pointer[0] = new_exc;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -10142,7 +9812,7 @@
             res = PyStackRef_NULL;
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -10162,7 +9832,7 @@
             PyObject *cause = oparg == 2 ? PyStackRef_AsPyObjectSteal(args[1]) : NULL;
             PyObject *exc = oparg > 0 ? PyStackRef_AsPyObjectSteal(args[0]) : NULL;
             stack_pointer += -oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             int err = do_raise(tstate, exc, cause);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -10196,7 +9866,7 @@
             }
             assert(exc && PyExceptionInstance_Check(exc));
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             _PyErr_SetRaisedException(tstate, exc);
             monitor_reraise(tstate, frame, this_instr);
@@ -10365,7 +10035,7 @@
             LLTRACE_RESUME_FRAME();
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -10383,7 +10053,7 @@
             assert(frame->owner != FRAME_OWNED_BY_INTERPRETER);
             _PyStackRef temp = PyStackRef_MakeHeapSafe(retval);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             assert(STACK_LEVEL() == 0);
             _Py_LeaveRecursiveCallPy(tstate);
@@ -10396,7 +10066,7 @@
             LLTRACE_RESUME_FRAME();
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -10445,7 +10115,7 @@
                     _PyInterpreterFrame *gen_frame = &gen->gi_iframe;
                     _PyFrame_StackPush(gen_frame, PyStackRef_MakeHeapSafe(v));
                     stack_pointer += -1;
-                    assert(WITHIN_STACK_BOUNDS());
+                    ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                     gen->gi_frame_state = FRAME_EXECUTING;
                     gen->gi_exc_state.previous_item = tstate->exc_info;
                     tstate->exc_info = &gen->gi_exc_state;
@@ -10485,7 +10155,7 @@
                     }
                     else {
                         stack_pointer += -1;
-                        assert(WITHIN_STACK_BOUNDS());
+                        ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                         _PyFrame_SetStackPointer(frame, stack_pointer);
                         PyStackRef_CLOSE(v);
                         stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -10493,7 +10163,7 @@
                     }
                 }
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(v);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -10501,7 +10171,7 @@
             }
             stack_pointer[0] = retval;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -10561,7 +10231,7 @@
                 assert(tstate->interp->eval_frame == NULL);
                 _PyInterpreterFrame *temp = PyStackRef_Unwrap(new_frame);
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 assert(temp->previous == frame || temp->previous->previous == frame);
                 CALL_STAT_INC(inlined_py_calls);
@@ -10640,7 +10310,7 @@
                 JUMP_TO_LABEL(pop_1_error);
             }
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -10668,7 +10338,7 @@
             *ptr = attr;
             stack_pointer[-2] = func_out;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -10689,7 +10359,7 @@
                                     PyStackRef_AsPyObjectBorrow(iterable));
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(iterable);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -10748,7 +10418,7 @@
                 PyStackRef_CLOSE(tmp);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -2;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 if (err) {
                     JUMP_TO_LABEL(error);
                 }
@@ -10823,7 +10493,7 @@
                 }
                 UNLOCK_OBJECT(owner_o);
                 stack_pointer += -2;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(owner);
                 Py_XDECREF(old_value);
@@ -10874,7 +10544,7 @@
                 FT_ATOMIC_STORE_PTR_RELEASE(*(PyObject **)addr, PyStackRef_AsPyObjectSteal(value));
                 UNLOCK_OBJECT(owner_o);
                 stack_pointer += -2;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(owner);
                 Py_XDECREF(old_value);
@@ -10962,7 +10632,7 @@
                 UNLOCK_OBJECT(dict);
                 STAT_INC(STORE_ATTR, hit);
                 stack_pointer += -2;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(owner);
                 Py_XDECREF(old_value);
@@ -10986,7 +10656,7 @@
             PyCell_SetTakeRef(cell, PyStackRef_AsPyObjectSteal(v));
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -11003,7 +10673,7 @@
             _PyStackRef tmp = GETLOCAL(oparg);
             GETLOCAL(oparg) = value;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_XCLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -11050,14 +10720,14 @@
             _PyStackRef tmp = GETLOCAL(oparg1);
             GETLOCAL(oparg1) = value1;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_XCLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
             tmp = GETLOCAL(oparg2);
             GETLOCAL(oparg2) = value2;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_XCLOSE(tmp);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -11079,7 +10749,7 @@
             int err = PyDict_SetItem(GLOBALS(), name, PyStackRef_AsPyObjectBorrow(v));
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(v);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -11108,7 +10778,7 @@
                               "no locals found when storing %R", name);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(v);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -11125,7 +10795,7 @@
                 stack_pointer = _PyFrame_GetStackPointer(frame);
             }
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(v);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -11169,7 +10839,7 @@
                 }
                 else {
                     stack_pointer += -2;
-                    assert(WITHIN_STACK_BOUNDS());
+                    ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                     _PyFrame_SetStackPointer(frame, stack_pointer);
                     err = PyObject_SetItem(PyStackRef_AsPyObjectBorrow(container), slice, PyStackRef_AsPyObjectBorrow(v));
                     Py_DECREF(slice);
@@ -11187,7 +10857,7 @@
                 PyStackRef_CLOSE(tmp);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -4;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 if (err) {
                     JUMP_TO_LABEL(error);
                 }
@@ -11246,7 +10916,7 @@
                 PyStackRef_CLOSE(tmp);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -3;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 if (err) {
                     JUMP_TO_LABEL(error);
                 }
@@ -11294,7 +10964,7 @@
                     PyStackRef_AsPyObjectSteal(value));
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -3;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(dict_st);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -11377,7 +11047,7 @@
                 UNLOCK_OBJECT(list);
                 PyStackRef_CLOSE_SPECIALIZED(sub_st, _PyLong_ExactDealloc);
                 stack_pointer += -3;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(list_st);
                 Py_DECREF(old_value);
@@ -11443,7 +11113,7 @@
                 int err = PyObject_IsTrue(PyStackRef_AsPyObjectBorrow(value));
                 stack_pointer = _PyFrame_GetStackPointer(frame);
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(value);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -11454,7 +11124,7 @@
             }
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -11489,7 +11159,7 @@
             {
                 value = owner;
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(value);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -11497,7 +11167,7 @@
             }
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -11554,7 +11224,7 @@
             }
             else {
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(value);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -11671,7 +11341,7 @@
                 else {
                     assert(Py_SIZE(value_o));
                     stack_pointer += -1;
-                    assert(WITHIN_STACK_BOUNDS());
+                    ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                     _PyFrame_SetStackPointer(frame, stack_pointer);
                     PyStackRef_CLOSE(value);
                     stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -11714,7 +11384,7 @@
                 if (err < 0) {
                     JUMP_TO_LABEL(error);
                 }
-                DISPATCH_GOTO_NON_TRACING();
+                DISPATCH();
             }
             _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
             if ((_tstate->jit_tracer_state.prev_state.instr->op.code == CALL_LIST_APPEND &&
@@ -11760,7 +11430,7 @@
             PyObject *res_o = PyNumber_Invert(PyStackRef_AsPyObjectBorrow(value));
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(value);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -11770,7 +11440,7 @@
             res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -11789,7 +11459,7 @@
             PyObject *res_o = PyNumber_Negative(PyStackRef_AsPyObjectBorrow(value));
             stack_pointer = _PyFrame_GetStackPointer(frame);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             PyStackRef_CLOSE(value);
             stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -11799,7 +11469,7 @@
             res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -11835,7 +11505,7 @@
             top = &stack_pointer[(oparg & 0xFF) + (oparg >> 8)];
             PyObject *seq_o = PyStackRef_AsPyObjectSteal(seq);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             int res = _PyEval_UnpackIterableStackRef(tstate, seq_o, oparg & 0xFF, oparg >> 8, top);
             Py_DECREF(seq_o);
@@ -11844,7 +11514,7 @@
                 JUMP_TO_LABEL(error);
             }
             stack_pointer += 1 + (oparg & 0xFF) + (oparg >> 8);
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -11885,7 +11555,7 @@
                 top = &stack_pointer[-1 + oparg];
                 PyObject *seq_o = PyStackRef_AsPyObjectSteal(seq);
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 int res = _PyEval_UnpackIterableStackRef(tstate, seq_o, oparg, -1, top);
                 Py_DECREF(seq_o);
@@ -11895,7 +11565,7 @@
                 }
             }
             stack_pointer += oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -11950,7 +11620,7 @@
                 }
                 UNLOCK_OBJECT(seq_o);
                 stack_pointer += -1 + oparg;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(seq);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -12000,7 +11670,7 @@
                     *values++ = PyStackRef_FromPyObjectNew(items[i]);
                 }
                 stack_pointer += -1 + oparg;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(seq);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -12051,7 +11721,7 @@
                 stack_pointer[-1] = val1;
                 stack_pointer[0] = val0;
                 stack_pointer += 1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 PyStackRef_CLOSE(seq);
                 stack_pointer = _PyFrame_GetStackPointer(frame);
@@ -12100,7 +11770,7 @@
             res = PyStackRef_FromPyObjectSteal(res_o);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
@@ -12123,7 +11793,7 @@
             gen->gi_frame_state = FRAME_SUSPENDED + oparg;
             _PyStackRef temp = retval;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             _PyFrame_SetStackPointer(frame, stack_pointer);
             tstate->exc_info = gen->gi_exc_state.previous_item;
             gen->gi_exc_state.previous_item = NULL;
@@ -12146,7 +11816,7 @@
             LLTRACE_RESUME_FRAME();
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             DISPATCH();
         }
 
diff --git a/Python/import.c b/Python/import.c
index 9ab2d3b3552..e91c95b40d9 100644
--- a/Python/import.c
+++ b/Python/import.c
@@ -3,6 +3,7 @@
 #include "Python.h"
 #include "pycore_audit.h"         // _PySys_Audit()
 #include "pycore_ceval.h"
+#include "pycore_critical_section.h"  // Py_BEGIN_CRITICAL_SECTION()
 #include "pycore_hashtable.h"     // _Py_hashtable_new_full()
 #include "pycore_import.h"        // _PyImport_BootstrapImp()
 #include "pycore_initconfig.h"    // _PyStatus_OK()
@@ -309,13 +310,8 @@ PyImport_GetModule(PyObject *name)
    if not, create a new one and insert it in the modules dictionary. */
 
 static PyObject *
-import_add_module(PyThreadState *tstate, PyObject *name)
+import_add_module_lock_held(PyObject *modules, PyObject *name)
 {
-    PyObject *modules = get_modules_dict(tstate, false);
-    if (modules == NULL) {
-        return NULL;
-    }
-
     PyObject *m;
     if (PyMapping_GetOptionalItem(modules, name, &m) < 0) {
         return NULL;
@@ -335,6 +331,21 @@ import_add_module(PyThreadState *tstate, PyObject *name)
     return m;
 }
 
+static PyObject *
+import_add_module(PyThreadState *tstate, PyObject *name)
+{
+    PyObject *modules = get_modules_dict(tstate, false);
+    if (modules == NULL) {
+        return NULL;
+    }
+
+    PyObject *m;
+    Py_BEGIN_CRITICAL_SECTION(modules);
+    m = import_add_module_lock_held(modules, name);
+    Py_END_CRITICAL_SECTION();
+    return m;
+}
+
 PyObject *
 PyImport_AddModuleRef(const char *name)
 {
@@ -1550,25 +1561,11 @@ _PyImport_CheckGILForModule(PyObject* module, PyObject *module_name)
     if (!PyModule_Check(module) ||
         ((PyModuleObject *)module)->md_requires_gil)
     {
-        if (_PyEval_EnableGILPermanent(tstate)) {
-            int warn_result = PyErr_WarnFormat(
-                PyExc_RuntimeWarning,
-                1,
-                "The global interpreter lock (GIL) has been enabled to load "
-                "module '%U', which has not declared that it can run safely "
-                "without the GIL. To override this behavior and keep the GIL "
-                "disabled (at your own risk), run with PYTHON_GIL=0 or -Xgil=0.",
-                module_name
-            );
-            if (warn_result < 0) {
-                return warn_result;
-            }
+        if (PyModule_Check(module)) {
+            assert(((PyModuleObject *)module)->md_token_is_def);
         }
-
-        const PyConfig *config = _PyInterpreterState_GetConfig(tstate->interp);
-        if (config->enable_gil == _PyConfig_GIL_DEFAULT && config->verbose) {
-            PySys_FormatStderr("# loading module '%U', which requires the GIL\n",
-                               module_name);
+        if (_PyImport_EnableGILAndWarn(tstate, module_name) < 0) {
+            return -1;
         }
     }
     else {
@@ -1577,6 +1574,28 @@ _PyImport_CheckGILForModule(PyObject* module, PyObject *module_name)
 
     return 0;
 }
+
+int
+_PyImport_EnableGILAndWarn(PyThreadState *tstate, PyObject *module_name)
+{
+    if (_PyEval_EnableGILPermanent(tstate)) {
+        return PyErr_WarnFormat(
+            PyExc_RuntimeWarning,
+            1,
+            "The global interpreter lock (GIL) has been enabled to load "
+            "module '%U', which has not declared that it can run safely "
+            "without the GIL. To override this behavior and keep the GIL "
+            "disabled (at your own risk), run with PYTHON_GIL=0 or -Xgil=0.",
+            module_name
+        );
+    }
+    const PyConfig *config = _PyInterpreterState_GetConfig(tstate->interp);
+    if (config->enable_gil == _PyConfig_GIL_DEFAULT && config->verbose) {
+        PySys_FormatStderr("# loading module '%U', which requires the GIL\n",
+                            module_name);
+    }
+    return 0;
+}
 #endif
 
 static PyThreadState *
@@ -4785,6 +4804,8 @@ _imp_create_dynamic_impl(PyObject *module, PyObject *spec, PyObject *file)
     _PyImport_GetModuleExportHooks(&info, fp, &p0, &ex0);
     if (ex0) {
         mod = import_run_modexport(tstate, ex0, &info, spec);
+        // Modules created from slots handle GIL enablement (Py_mod_gil slot)
+        // when they're created.
         goto cleanup;
     }
     if (p0 == NULL) {
diff --git a/Python/importdl.c b/Python/importdl.c
index 61a9cdaf375..537e8d869dc 100644
--- a/Python/importdl.c
+++ b/Python/importdl.c
@@ -156,7 +156,6 @@ _Py_ext_module_loader_info_init_for_builtin(
                             PyObject *name)
 {
     assert(PyUnicode_Check(name));
-    assert(PyUnicode_FindChar(name, '.', 0, PyUnicode_GetLength(name), -1) == -1);
     assert(PyUnicode_GetLength(name) > 0);
 
     PyObject *name_encoded = PyUnicode_AsEncodedString(name, "ascii", NULL);
diff --git a/Python/instrumentation.c b/Python/instrumentation.c
index 72b7433022f..9e750433cff 100644
--- a/Python/instrumentation.c
+++ b/Python/instrumentation.c
@@ -2021,12 +2021,12 @@ _PyMonitoring_SetEvents(int tool_id, _PyMonitoringEventSet events)
     if (existing_events == events) {
         return 0;
     }
-    set_events(&interp->monitors, tool_id, events);
     uint32_t new_version = global_version(interp) + MONITORING_VERSION_INCREMENT;
     if (new_version == 0) {
         PyErr_Format(PyExc_OverflowError, "events set too many times");
         return -1;
     }
+    set_events(&interp->monitors, tool_id, events);
     set_global_version(tstate, new_version);
 #ifdef _Py_TIER2
     _Py_Executors_InvalidateAll(interp, 1);
diff --git a/Python/jit.c b/Python/jit.c
index 7ab0f8ddd43..47d3d7a5d27 100644
--- a/Python/jit.c
+++ b/Python/jit.c
@@ -60,6 +60,10 @@ jit_error(const char *message)
 static unsigned char *
 jit_alloc(size_t size)
 {
+    if (size > PY_MAX_JIT_CODE_SIZE) {
+        jit_error("code too big; refactor bytecodes.c to keep uop size down, or reduce maximum trace length.");
+        return NULL;
+    }
     assert(size);
     assert(size % get_page_size() == 0);
 #ifdef MS_WINDOWS
diff --git a/Python/marshal.c b/Python/marshal.c
index 8b56de65755..69d6dd7cf0f 100644
--- a/Python/marshal.c
+++ b/Python/marshal.c
@@ -310,7 +310,7 @@ w_PyLong(const PyLongObject *ob, char flag, WFILE *p)
     }
     if (!long_export.digits) {
         int8_t sign = long_export.value < 0 ? -1 : 1;
-        uint64_t abs_value = Py_ABS(long_export.value);
+        uint64_t abs_value = _Py_ABS_CAST(uint64_t, long_export.value);
         uint64_t d = abs_value;
         long l = 0;
 
diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c
index 8d7b734e17c..51722556554 100644
--- a/Python/optimizer_analysis.c
+++ b/Python/optimizer_analysis.c
@@ -144,10 +144,6 @@ incorrect_keys(PyObject *obj, uint32_t version)
 
 #define CURRENT_FRAME_IS_INIT_SHIM() (ctx->frame->code == ((PyCodeObject *)&_Py_InitCleanup))
 
-#define WITHIN_STACK_BOUNDS() \
-    (CURRENT_FRAME_IS_INIT_SHIM() || (STACK_LEVEL() >= 0 && STACK_LEVEL() <= STACK_SIZE()))
-
-
 #define GETLOCAL(idx)          ((ctx->frame->locals[idx]))
 
 #define REPLACE_OP(INST, OP, ARG, OPERAND)    \
@@ -192,6 +188,27 @@ incorrect_keys(PyObject *obj, uint32_t version)
 
 #define JUMP_TO_LABEL(label) goto label;
 
+static int
+check_stack_bounds(JitOptContext *ctx, JitOptRef *stack_pointer, int offset, int opcode)
+{
+    int stack_level = (int)(stack_pointer + (offset) - ctx->frame->stack);
+    int should_check = !CURRENT_FRAME_IS_INIT_SHIM() ||
+        (opcode == _RETURN_VALUE) ||
+        (opcode == _RETURN_GENERATOR) ||
+        (opcode == _YIELD_VALUE);
+    if (should_check && (stack_level < 0 || stack_level > STACK_SIZE())) {
+        ctx->contradiction = true;
+        ctx->done = true;
+        return 1;
+    }
+    return 0;
+}
+
+#define CHECK_STACK_BOUNDS(offset) \
+    if (check_stack_bounds(ctx, stack_pointer, offset, opcode)) { \
+        break; \
+    } \
+
 static int
 optimize_to_bool(
     _PyUOpInstruction *this_instr,
@@ -278,6 +295,36 @@ get_co_name(JitOptContext *ctx, int index)
     return PyTuple_GET_ITEM(get_current_code_object(ctx)->co_names, index);
 }
 
+#ifdef Py_DEBUG
+void
+_Py_opt_assert_within_stack_bounds(
+    _Py_UOpsAbstractFrame *frame, JitOptRef *stack_pointer,
+    const char *filename, int lineno
+) {
+    if (frame->code == ((PyCodeObject *)&_Py_InitCleanup)) {
+        return;
+    }
+    int level = (int)(stack_pointer - frame->stack);
+    if (level < 0) {
+        printf("Stack underflow (depth = %d) at %s:%d\n", level, filename, lineno);
+        fflush(stdout);
+        abort();
+    }
+    int size = (int)(frame->stack_len);
+    if (level > size) {
+        printf("Stack overflow (depth = %d) at %s:%d\n", level, filename, lineno);
+        fflush(stdout);
+        abort();
+    }
+}
+#endif
+
+#ifdef Py_DEBUG
+#define ASSERT_WITHIN_STACK_BOUNDS(F, L) _Py_opt_assert_within_stack_bounds(ctx->frame, stack_pointer, (F), (L))
+#else
+#define ASSERT_WITHIN_STACK_BOUNDS(F, L) (void)0
+#endif
+
 // TODO (gh-134584) generate most of this table automatically
 const uint16_t op_without_decref_inputs[MAX_UOP_ID + 1] = {
     [_BINARY_OP_MULTIPLY_FLOAT] = _BINARY_OP_MULTIPLY_FLOAT__NO_DECREF_INPUTS,
diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h
index 9ebd113df2d..85bebed5867 100644
--- a/Python/optimizer_cases.c.h
+++ b/Python/optimizer_cases.c.h
@@ -33,27 +33,30 @@
             if (sym_is_null(value)) {
                 ctx->done = true;
             }
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _LOAD_FAST: {
             JitOptRef value;
             value = GETLOCAL(oparg);
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _LOAD_FAST_BORROW: {
             JitOptRef value;
             value = PyJitRef_Borrow(GETLOCAL(oparg));
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -62,9 +65,10 @@
             value = GETLOCAL(oparg);
             JitOptRef temp = sym_new_null(ctx);
             GETLOCAL(oparg) = temp;
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -74,9 +78,10 @@
             PyObject *val = PyTuple_GET_ITEM(co->co_consts, oparg);
             REPLACE_OP(this_instr, _LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)val);
             value = PyJitRef_Borrow(sym_new_const(ctx, val));
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -87,9 +92,10 @@
             assert(_Py_IsImmortal(val));
             REPLACE_OP(this_instr, _LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)val);
             value = PyJitRef_Borrow(sym_new_const(ctx, val));
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -97,8 +103,9 @@
             JitOptRef value;
             value = stack_pointer[-1];
             GETLOCAL(oparg) = value;
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -120,68 +127,78 @@
             else if (typ == &PyUnicode_Type) {
                 REPLACE_OP(this_instr, _POP_TOP_UNICODE, 0, 0);
             }
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _POP_TOP_NOP: {
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _POP_TOP_INT: {
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _POP_TOP_FLOAT: {
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _POP_TOP_UNICODE: {
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _POP_TWO: {
+            CHECK_STACK_BOUNDS(-2);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _PUSH_NULL: {
             JitOptRef res;
             res = sym_new_null(ctx);
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _END_FOR: {
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _POP_ITER: {
+            CHECK_STACK_BOUNDS(-2);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _END_SEND: {
             JitOptRef val;
             val = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = val;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -504,15 +521,17 @@
                         REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
                     }
                 }
+                CHECK_STACK_BOUNDS(-1);
                 stack_pointer[-2] = res;
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 break;
             }
             res = sym_new_compact_int(ctx);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -554,15 +573,17 @@
                         REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
                     }
                 }
+                CHECK_STACK_BOUNDS(-1);
                 stack_pointer[-2] = res;
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 break;
             }
             res = sym_new_compact_int(ctx);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -604,15 +625,17 @@
                         REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
                     }
                 }
+                CHECK_STACK_BOUNDS(-1);
                 stack_pointer[-2] = res;
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 break;
             }
             res = sym_new_compact_int(ctx);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -673,18 +696,20 @@
                         REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
                     }
                 }
+                CHECK_STACK_BOUNDS(-1);
                 stack_pointer[-2] = res;
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 break;
             }
             res = sym_new_type(ctx, &PyFloat_Type);
             if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) {
                 REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0);
             }
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -725,18 +750,20 @@
                         REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
                     }
                 }
+                CHECK_STACK_BOUNDS(-1);
                 stack_pointer[-2] = res;
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 break;
             }
             res = sym_new_type(ctx, &PyFloat_Type);
             if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) {
                 REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0);
             }
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -777,45 +804,50 @@
                         REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
                     }
                 }
+                CHECK_STACK_BOUNDS(-1);
                 stack_pointer[-2] = res;
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 break;
             }
             res = sym_new_type(ctx, &PyFloat_Type);
             if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) {
                 REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0);
             }
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _BINARY_OP_MULTIPLY_FLOAT__NO_DECREF_INPUTS: {
             JitOptRef res;
             res = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _BINARY_OP_ADD_FLOAT__NO_DECREF_INPUTS: {
             JitOptRef res;
             res = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _BINARY_OP_SUBTRACT_FLOAT__NO_DECREF_INPUTS: {
             JitOptRef res;
             res = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -856,15 +888,17 @@
                         REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
                     }
                 }
+                CHECK_STACK_BOUNDS(-1);
                 stack_pointer[-2] = res;
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 break;
             }
             res = sym_new_type(ctx, &PyUnicode_Type);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -888,8 +922,9 @@
                 res = sym_new_type(ctx, &PyUnicode_Type);
             }
             GETLOCAL(this_instr->operand0) = res;
+            CHECK_STACK_BOUNDS(-2);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -900,9 +935,10 @@
         case _BINARY_OP_EXTEND: {
             JitOptRef res;
             res = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -920,42 +956,47 @@
             else {
                 res = sym_new_not_null(ctx);
             }
+            CHECK_STACK_BOUNDS(-2);
             stack_pointer[-3] = res;
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _STORE_SLICE: {
+            CHECK_STACK_BOUNDS(-4);
             stack_pointer += -4;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _BINARY_OP_SUBSCR_LIST_INT: {
             JitOptRef res;
             res = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _BINARY_OP_SUBSCR_LIST_SLICE: {
             JitOptRef res;
             res = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _BINARY_OP_SUBSCR_STR_INT: {
             JitOptRef res;
             res = sym_new_type(ctx, &PyUnicode_Type);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1002,9 +1043,10 @@
             else {
                 res = sym_new_not_null(ctx);
             }
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1031,18 +1073,20 @@
         case _BINARY_OP_SUBSCR_DICT: {
             JitOptRef res;
             res = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _BINARY_OP_SUBSCR_CHECK_FUNC: {
             JitOptRef getitem;
             getitem = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = getitem;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1050,45 +1094,52 @@
             JitOptRef new_frame;
             new_frame = PyJitRef_NULL;
             ctx->done = true;
+            CHECK_STACK_BOUNDS(-2);
             stack_pointer[-3] = new_frame;
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _LIST_APPEND: {
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _SET_ADD: {
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _STORE_SUBSCR: {
+            CHECK_STACK_BOUNDS(-3);
             stack_pointer += -3;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _STORE_SUBSCR_LIST_INT: {
+            CHECK_STACK_BOUNDS(-3);
             stack_pointer += -3;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _STORE_SUBSCR_DICT: {
+            CHECK_STACK_BOUNDS(-3);
             stack_pointer += -3;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _DELETE_SUBSCR: {
+            CHECK_STACK_BOUNDS(-2);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1102,9 +1153,10 @@
         case _CALL_INTRINSIC_2: {
             JitOptRef res;
             res = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1113,8 +1165,9 @@
             JitOptRef res;
             retval = stack_pointer[-1];
             JitOptRef temp = PyJitRef_StripReferenceInfo(retval);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             ctx->frame->stack_pointer = stack_pointer;
             PyCodeObject *returning_code = get_code_with_logging(this_instr);
             if (returning_code == NULL) {
@@ -1134,9 +1187,10 @@
             }
             stack_pointer = ctx->frame->stack_pointer;
             res = temp;
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1150,9 +1204,10 @@
         case _GET_ANEXT: {
             JitOptRef awaitable;
             awaitable = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = awaitable;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1178,8 +1233,9 @@
             JitOptRef value;
             retval = stack_pointer[-1];
             JitOptRef temp = PyJitRef_StripReferenceInfo(retval);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             ctx->frame->stack_pointer = stack_pointer;
             PyCodeObject *returning_code = get_code_with_logging(this_instr);
             if (returning_code == NULL) {
@@ -1193,39 +1249,44 @@
             }
             stack_pointer = ctx->frame->stack_pointer;
             value = temp;
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _POP_EXCEPT: {
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _LOAD_COMMON_CONSTANT: {
             JitOptRef value;
             value = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _LOAD_BUILD_CLASS: {
             JitOptRef bc;
             bc = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = bc;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _STORE_NAME: {
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1242,8 +1303,9 @@
             for (int i = 0; i < oparg; i++) {
                 values[i] = sym_new_unknown(ctx);
             }
+            CHECK_STACK_BOUNDS(-1 + oparg);
             stack_pointer += -1 + oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1254,10 +1316,11 @@
             seq = stack_pointer[-1];
             val0 = sym_tuple_getitem(ctx, seq, 0);
             val1 = sym_tuple_getitem(ctx, seq, 1);
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[-1] = val1;
             stack_pointer[0] = val0;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1269,8 +1332,9 @@
             for (int i = 0; i < oparg; i++) {
                 values[i] = sym_tuple_getitem(ctx, seq, oparg - i - 1);
             }
+            CHECK_STACK_BOUNDS(-1 + oparg);
             stack_pointer += -1 + oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1280,8 +1344,9 @@
             for (int _i = oparg; --_i >= 0;) {
                 values[_i] = sym_new_not_null(ctx);
             }
+            CHECK_STACK_BOUNDS(-1 + oparg);
             stack_pointer += -1 + oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1295,26 +1360,30 @@
             for (int i = 0; i < totalargs; i++) {
                 values[i] = sym_new_unknown(ctx);
             }
+            CHECK_STACK_BOUNDS((oparg & 0xFF) + (oparg >> 8));
             stack_pointer += (oparg & 0xFF) + (oparg >> 8);
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _STORE_ATTR: {
+            CHECK_STACK_BOUNDS(-2);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _DELETE_ATTR: {
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _STORE_GLOBAL: {
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1325,9 +1394,10 @@
         case _LOAD_LOCALS: {
             JitOptRef locals;
             locals = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = locals;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1336,9 +1406,10 @@
         case _LOAD_NAME: {
             JitOptRef v;
             v = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = v;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1346,8 +1417,9 @@
             JitOptRef *res;
             res = &stack_pointer[0];
             res[0] = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(1);
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1361,8 +1433,9 @@
             else {
                 REPLACE_OP(this_instr, _NOP, 0, 0);
             }
+            CHECK_STACK_BOUNDS((oparg & 1));
             stack_pointer += (oparg & 1);
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1426,9 +1499,10 @@
             else {
                 res = sym_new_const(ctx, cnst);
             }
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1462,9 +1536,10 @@
             else {
                 res = sym_new_const(ctx, cnst);
             }
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1490,15 +1565,17 @@
         case _LOAD_DEREF: {
             JitOptRef value;
             value = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _STORE_DEREF: {
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1509,27 +1586,30 @@
         case _BUILD_STRING: {
             JitOptRef str;
             str = sym_new_type(ctx, &PyUnicode_Type);
+            CHECK_STACK_BOUNDS(1 - oparg);
             stack_pointer[-oparg] = str;
             stack_pointer += 1 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _BUILD_INTERPOLATION: {
             JitOptRef interpolation;
             interpolation = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(-1 - (oparg & 1));
             stack_pointer[-2 - (oparg & 1)] = interpolation;
             stack_pointer += -1 - (oparg & 1);
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _BUILD_TEMPLATE: {
             JitOptRef template;
             template = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = template;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1538,48 +1618,54 @@
             JitOptRef tup;
             values = &stack_pointer[-oparg];
             tup = sym_new_tuple(ctx, oparg, values);
+            CHECK_STACK_BOUNDS(1 - oparg);
             stack_pointer[-oparg] = tup;
             stack_pointer += 1 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _BUILD_LIST: {
             JitOptRef list;
             list = sym_new_type(ctx, &PyList_Type);
+            CHECK_STACK_BOUNDS(1 - oparg);
             stack_pointer[-oparg] = list;
             stack_pointer += 1 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _LIST_EXTEND: {
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _SET_UPDATE: {
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _BUILD_SET: {
             JitOptRef set;
             set = sym_new_type(ctx, &PySet_Type);
+            CHECK_STACK_BOUNDS(1 - oparg);
             stack_pointer[-oparg] = set;
             stack_pointer += 1 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _BUILD_MAP: {
             JitOptRef map;
             map = sym_new_type(ctx, &PyDict_Type);
+            CHECK_STACK_BOUNDS(1 - oparg*2);
             stack_pointer[-oparg*2] = map;
             stack_pointer += 1 - oparg*2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1588,29 +1674,33 @@
         }
 
         case _DICT_UPDATE: {
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _DICT_MERGE: {
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _MAP_ADD: {
+            CHECK_STACK_BOUNDS(-2);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _LOAD_SUPER_ATTR_ATTR: {
             JitOptRef attr_st;
             attr_st = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(-2);
             stack_pointer[-3] = attr_st;
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1619,10 +1709,11 @@
             JitOptRef self_or_null;
             attr = sym_new_not_null(ctx);
             self_or_null = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-3] = attr;
             stack_pointer[-2] = self_or_null;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1638,8 +1729,9 @@
             if (oparg & 1) {
                 self_or_null[0] = sym_new_unknown(ctx);
             }
+            CHECK_STACK_BOUNDS((oparg&1));
             stack_pointer += (oparg&1);
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1780,20 +1872,23 @@
         }
 
         case _STORE_ATTR_INSTANCE_VALUE: {
+            CHECK_STACK_BOUNDS(-2);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _STORE_ATTR_WITH_HINT: {
+            CHECK_STACK_BOUNDS(-2);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _STORE_ATTR_SLOT: {
+            CHECK_STACK_BOUNDS(-2);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1840,9 +1935,10 @@
                         REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
                     }
                 }
+                CHECK_STACK_BOUNDS(-1);
                 stack_pointer[-2] = res;
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 break;
             }
             if (oparg & 16) {
@@ -1851,9 +1947,10 @@
             else {
                 res = _Py_uop_sym_new_not_null(ctx);
             }
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1891,15 +1988,17 @@
                         REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
                     }
                 }
+                CHECK_STACK_BOUNDS(-1);
                 stack_pointer[-2] = res;
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 break;
             }
             res = sym_new_type(ctx, &PyBool_Type);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1941,15 +2040,17 @@
                         REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
                     }
                 }
+                CHECK_STACK_BOUNDS(-1);
                 stack_pointer[-2] = res;
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 break;
             }
             res = sym_new_type(ctx, &PyBool_Type);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -1989,24 +2090,27 @@
                         REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
                     }
                 }
+                CHECK_STACK_BOUNDS(-1);
                 stack_pointer[-2] = res;
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 break;
             }
             res = sym_new_type(ctx, &PyBool_Type);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _IS_OP: {
             JitOptRef b;
             b = sym_new_type(ctx, &PyBool_Type);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = b;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2042,15 +2146,17 @@
                         REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
                     }
                 }
+                CHECK_STACK_BOUNDS(-1);
                 stack_pointer[-2] = b;
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 break;
             }
             b = sym_new_type(ctx, &PyBool_Type);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = b;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2068,18 +2174,20 @@
         case _CONTAINS_OP_SET: {
             JitOptRef b;
             b = sym_new_type(ctx, &PyBool_Type);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = b;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _CONTAINS_OP_DICT: {
             JitOptRef b;
             b = sym_new_type(ctx, &PyBool_Type);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = b;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2103,18 +2211,20 @@
         case _IMPORT_NAME: {
             JitOptRef res;
             res = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _IMPORT_FROM: {
             JitOptRef res;
             res = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2149,51 +2259,57 @@
                     REPLACE_OP(this_instr, _LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)temp);
                 }
                 len = sym_new_const(ctx, temp);
+                CHECK_STACK_BOUNDS(1);
                 stack_pointer[0] = len;
                 stack_pointer += 1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 Py_DECREF(temp);
                 stack_pointer += -1;
             }
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = len;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _MATCH_CLASS: {
             JitOptRef attrs;
             attrs = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(-2);
             stack_pointer[-3] = attrs;
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _MATCH_MAPPING: {
             JitOptRef res;
             res = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _MATCH_SEQUENCE: {
             JitOptRef res;
             res = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _MATCH_KEYS: {
             JitOptRef values_or_none;
             values_or_none = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = values_or_none;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2210,10 +2326,11 @@
                 iter = sym_new_not_null(ctx);
                 index_or_null = sym_new_unknown(ctx);
             }
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[-1] = iter;
             stack_pointer[0] = index_or_null;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2229,9 +2346,10 @@
         case _FOR_ITER_TIER_TWO: {
             JitOptRef next;
             next = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = next;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2252,9 +2370,10 @@
         case _ITER_NEXT_LIST_TIER_TWO: {
             JitOptRef next;
             next = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = next;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2277,9 +2396,10 @@
         case _ITER_NEXT_TUPLE: {
             JitOptRef next;
             next = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = next;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2296,9 +2416,10 @@
         case _ITER_NEXT_RANGE: {
             JitOptRef next;
             next = sym_new_type(ctx, &PyLong_Type);
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = next;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2306,9 +2427,10 @@
             JitOptRef gen_frame;
             gen_frame = PyJitRef_NULL;
             ctx->done = true;
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = gen_frame;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2319,8 +2441,9 @@
             method_and_self = &stack_pointer[-1];
             method_and_self[0] = sym_new_null(ctx);
             method_and_self[1] = self;
+            CHECK_STACK_BOUNDS(1);
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2335,9 +2458,10 @@
         case _WITH_EXCEPT_START: {
             JitOptRef res;
             res = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2346,10 +2470,11 @@
             JitOptRef new_exc;
             prev_exc = sym_new_not_null(ctx);
             new_exc = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[-1] = prev_exc;
             stack_pointer[0] = new_exc;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2374,10 +2499,11 @@
                            _LOAD_CONST_UNDER_INLINE_BORROW,
                            _LOAD_CONST_UNDER_INLINE);
             self = owner;
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[-1] = attr;
             stack_pointer[0] = self;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2394,10 +2520,11 @@
                            _LOAD_CONST_UNDER_INLINE_BORROW,
                            _LOAD_CONST_UNDER_INLINE);
             self = owner;
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[-1] = attr;
             stack_pointer[0] = self;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2448,10 +2575,11 @@
                            _LOAD_CONST_UNDER_INLINE_BORROW,
                            _LOAD_CONST_UNDER_INLINE);
             self = owner;
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[-1] = attr;
             stack_pointer[0] = self;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2483,9 +2611,10 @@
                 break;
             }
             new_frame = PyJitRef_Wrap((JitOptSymbol *)frame_new(ctx, co, 0, NULL, 0));
+            CHECK_STACK_BOUNDS(-1 - oparg);
             stack_pointer[-2 - oparg] = new_frame;
             stack_pointer += -1 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2531,9 +2660,10 @@
         case _CALL_NON_PY_GENERAL: {
             JitOptRef res;
             res = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(-1 - oparg);
             stack_pointer[-2 - oparg] = res;
             stack_pointer += -1 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2616,17 +2746,19 @@
             } else {
                 new_frame = PyJitRef_Wrap((JitOptSymbol *)frame_new(ctx, co, 0, NULL, 0));
             }
+            CHECK_STACK_BOUNDS(-1 - oparg);
             stack_pointer[-2 - oparg] = new_frame;
             stack_pointer += -1 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _PUSH_FRAME: {
             JitOptRef new_frame;
             new_frame = stack_pointer[-1];
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             if (!CURRENT_FRAME_IS_INIT_SHIM()) {
                 ctx->frame->stack_pointer = stack_pointer;
             }
@@ -2703,9 +2835,10 @@
             else {
                 res = sym_new_not_null(ctx);
             }
+            CHECK_STACK_BOUNDS(-2);
             stack_pointer[-3] = res;
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2729,9 +2862,10 @@
             else {
                 res = sym_new_type(ctx, &PyUnicode_Type);
             }
+            CHECK_STACK_BOUNDS(-2);
             stack_pointer[-3] = res;
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2755,9 +2889,10 @@
             else {
                 res = sym_new_type(ctx, &PyTuple_Type);
             }
+            CHECK_STACK_BOUNDS(-2);
             stack_pointer[-3] = res;
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2797,51 +2932,57 @@
             assert((this_instr + 1)->opcode == _PUSH_FRAME);
             PyCodeObject *co = get_code_with_logging((this_instr + 1));
             init_frame = PyJitRef_Wrap((JitOptSymbol *)frame_new(ctx, co, 0, args-1, oparg+1));
+            CHECK_STACK_BOUNDS(-1 - oparg);
             stack_pointer[-2 - oparg] = init_frame;
             stack_pointer += -1 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _EXIT_INIT_CHECK: {
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _CALL_BUILTIN_CLASS: {
             JitOptRef res;
             res = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(-1 - oparg);
             stack_pointer[-2 - oparg] = res;
             stack_pointer += -1 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _CALL_BUILTIN_O: {
             JitOptRef res;
             res = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(-1 - oparg);
             stack_pointer[-2 - oparg] = res;
             stack_pointer += -1 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _CALL_BUILTIN_FAST: {
             JitOptRef res;
             res = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(-1 - oparg);
             stack_pointer[-2 - oparg] = res;
             stack_pointer += -1 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _CALL_BUILTIN_FAST_WITH_KEYWORDS: {
             JitOptRef res;
             res = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(-1 - oparg);
             stack_pointer[-2 - oparg] = res;
             stack_pointer += -1 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2872,15 +3013,17 @@
                            0, (uintptr_t)temp);
                 }
                 res = sym_new_const(ctx, temp);
+                CHECK_STACK_BOUNDS(-2);
                 stack_pointer[-3] = res;
                 stack_pointer += -2;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 Py_DECREF(temp);
                 stack_pointer += 2;
             }
+            CHECK_STACK_BOUNDS(-2);
             stack_pointer[-3] = res;
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2912,9 +3055,10 @@
                 sym_set_const(res, out);
                 REPLACE_OP(this_instr, _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)out);
             }
+            CHECK_STACK_BOUNDS(-3);
             stack_pointer[-4] = res;
             stack_pointer += -3;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2930,44 +3074,49 @@
         }
 
         case _CALL_LIST_APPEND: {
+            CHECK_STACK_BOUNDS(-3);
             stack_pointer += -3;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _CALL_METHOD_DESCRIPTOR_O: {
             JitOptRef res;
             res = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(-1 - oparg);
             stack_pointer[-2 - oparg] = res;
             stack_pointer += -1 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: {
             JitOptRef res;
             res = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(-1 - oparg);
             stack_pointer[-2 - oparg] = res;
             stack_pointer += -1 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _CALL_METHOD_DESCRIPTOR_NOARGS: {
             JitOptRef res;
             res = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(-1 - oparg);
             stack_pointer[-2 - oparg] = res;
             stack_pointer += -1 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _CALL_METHOD_DESCRIPTOR_FAST: {
             JitOptRef res;
             res = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(-1 - oparg);
             stack_pointer[-2 - oparg] = res;
             stack_pointer += -1 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -2988,9 +3137,10 @@
                 break;
             }
             new_frame = PyJitRef_Wrap((JitOptSymbol *)frame_new(ctx, co, 0, NULL, 0));
+            CHECK_STACK_BOUNDS(-2 - oparg);
             stack_pointer[-3 - oparg] = new_frame;
             stack_pointer += -2 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -3013,9 +3163,10 @@
         case _CALL_KW_NON_PY: {
             JitOptRef res;
             res = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(-2 - oparg);
             stack_pointer[-3 - oparg] = res;
             stack_pointer += -2 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -3035,9 +3186,10 @@
         case _SET_FUNCTION_ATTRIBUTE: {
             JitOptRef func_out;
             func_out = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = func_out;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -3056,18 +3208,20 @@
             }
             stack_pointer = ctx->frame->stack_pointer;
             res = sym_new_unknown(ctx);
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = res;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _BUILD_SLICE: {
             JitOptRef slice;
             slice = sym_new_type(ctx, &PySlice_Type);
+            CHECK_STACK_BOUNDS(1 - oparg);
             stack_pointer[-oparg] = slice;
             stack_pointer += 1 - oparg;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -3088,9 +3242,10 @@
         case _FORMAT_WITH_SPEC: {
             JitOptRef res;
             res = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -3100,9 +3255,10 @@
             bottom = stack_pointer[-1 - (oparg-1)];
             assert(oparg > 0);
             top = bottom;
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = top;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -3139,9 +3295,10 @@
                         REPLACE_OP(this_instr, _POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result);
                     }
                 }
+                CHECK_STACK_BOUNDS(-1);
                 stack_pointer[-2] = res;
                 stack_pointer += -1;
-                assert(WITHIN_STACK_BOUNDS());
+                ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
                 break;
             }
             bool lhs_int = sym_matches_type(lhs, &PyLong_Type);
@@ -3177,9 +3334,10 @@
             else {
                 res = sym_new_type(ctx, &PyFloat_Type);
             }
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = res;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -3224,8 +3382,9 @@
                 eliminate_pop_guard(this_instr, value != Py_True);
             }
             sym_set_const(flag, Py_True);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -3238,8 +3397,9 @@
                 eliminate_pop_guard(this_instr, value != Py_False);
             }
             sym_set_const(flag, Py_False);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -3256,8 +3416,9 @@
                 eliminate_pop_guard(this_instr, true);
             }
             sym_set_const(val, Py_None);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -3273,8 +3434,9 @@
                 assert(!sym_matches_type(val, &_PyNone_Type));
                 eliminate_pop_guard(this_instr, false);
             }
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -3317,9 +3479,10 @@
             JitOptRef value;
             PyObject *ptr = (PyObject *)this_instr->operand0;
             value = sym_new_const(ctx, ptr);
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -3335,27 +3498,31 @@
             JitOptRef value;
             PyObject *ptr = (PyObject *)this_instr->operand0;
             value = PyJitRef_Borrow(sym_new_const(ctx, ptr));
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[0] = value;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _POP_CALL: {
+            CHECK_STACK_BOUNDS(-2);
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _POP_CALL_ONE: {
+            CHECK_STACK_BOUNDS(-3);
             stack_pointer += -3;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
         case _POP_CALL_TWO: {
+            CHECK_STACK_BOUNDS(-4);
             stack_pointer += -4;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -3370,9 +3537,10 @@
         case _POP_TWO_LOAD_CONST_INLINE_BORROW: {
             JitOptRef value;
             value = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = value;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -3380,9 +3548,10 @@
             JitOptRef value;
             PyObject *ptr = (PyObject *)this_instr->operand0;
             value = PyJitRef_Borrow(sym_new_const(ctx, ptr));
+            CHECK_STACK_BOUNDS(-1);
             stack_pointer[-2] = value;
             stack_pointer += -1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -3390,9 +3559,10 @@
             JitOptRef value;
             PyObject *ptr = (PyObject *)this_instr->operand0;
             value = PyJitRef_Borrow(sym_new_const(ctx, ptr));
+            CHECK_STACK_BOUNDS(-2);
             stack_pointer[-3] = value;
             stack_pointer += -2;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -3400,9 +3570,10 @@
             JitOptRef value;
             PyObject *ptr = (PyObject *)this_instr->operand0;
             value = PyJitRef_Borrow(sym_new_const(ctx, ptr));
+            CHECK_STACK_BOUNDS(-3);
             stack_pointer[-4] = value;
             stack_pointer += -3;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -3411,10 +3582,11 @@
             JitOptRef new;
             value = sym_new_not_null(ctx);
             new = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[-1] = value;
             stack_pointer[0] = new;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
@@ -3423,10 +3595,11 @@
             JitOptRef new;
             value = sym_new_not_null(ctx);
             new = sym_new_not_null(ctx);
+            CHECK_STACK_BOUNDS(1);
             stack_pointer[-1] = value;
             stack_pointer[0] = new;
             stack_pointer += 1;
-            assert(WITHIN_STACK_BOUNDS());
+            ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
             break;
         }
 
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index 67368b5ce07..2527dca71d7 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -2571,7 +2571,7 @@ Py_EndInterpreter(PyThreadState *tstate)
     if (tstate != _PyThreadState_GET()) {
         Py_FatalError("thread is not current");
     }
-    if (tstate->current_frame != NULL) {
+    if (tstate->current_frame != tstate->base_frame) {
         Py_FatalError("thread still has a frame");
     }
     interp->finalizing = 1;
diff --git a/Python/pystate.c b/Python/pystate.c
index c12a1418e74..2956e785405 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -1482,7 +1482,31 @@ init_threadstate(_PyThreadStateImpl *_tstate,
     // This is cleared when PyGILState_Ensure() creates the thread state.
     tstate->gilstate_counter = 1;
 
-    tstate->current_frame = NULL;
+    // Initialize the embedded base frame - sentinel at the bottom of the frame stack
+    _tstate->base_frame.previous = NULL;
+    _tstate->base_frame.f_executable = PyStackRef_None;
+    _tstate->base_frame.f_funcobj = PyStackRef_NULL;
+    _tstate->base_frame.f_globals = NULL;
+    _tstate->base_frame.f_builtins = NULL;
+    _tstate->base_frame.f_locals = NULL;
+    _tstate->base_frame.frame_obj = NULL;
+    _tstate->base_frame.instr_ptr = NULL;
+    _tstate->base_frame.stackpointer = _tstate->base_frame.localsplus;
+    _tstate->base_frame.return_offset = 0;
+    _tstate->base_frame.owner = FRAME_OWNED_BY_INTERPRETER;
+    _tstate->base_frame.visited = 0;
+#ifdef Py_DEBUG
+    _tstate->base_frame.lltrace = 0;
+#endif
+#ifdef Py_GIL_DISABLED
+    _tstate->base_frame.tlbc_index = 0;
+#endif
+    _tstate->base_frame.localsplus[0] = PyStackRef_NULL;
+
+    // current_frame starts pointing to the base frame
+    tstate->current_frame = &_tstate->base_frame;
+    // base_frame pointer for profilers to validate stack unwinding
+    tstate->base_frame = &_tstate->base_frame;
     tstate->datastack_chunk = NULL;
     tstate->datastack_top = NULL;
     tstate->datastack_limit = NULL;
@@ -1660,7 +1684,7 @@ PyThreadState_Clear(PyThreadState *tstate)
 
     int verbose = _PyInterpreterState_GetConfig(tstate->interp)->verbose;
 
-    if (verbose && tstate->current_frame != NULL) {
+    if (verbose && tstate->current_frame != tstate->base_frame) {
         /* bpo-20526: After the main thread calls
            _PyInterpreterState_SetFinalizing() in Py_FinalizeEx()
            (or in Py_EndInterpreter() for subinterpreters),
diff --git a/Python/pystrhex.c b/Python/pystrhex.c
index 38484f5a7d4..af2f5c5dce5 100644
--- a/Python/pystrhex.c
+++ b/Python/pystrhex.c
@@ -42,8 +42,7 @@ static PyObject *_Py_strhex_impl(const char* argbuf, const Py_ssize_t arglen,
     else {
         bytes_per_sep_group = 0;
     }
-
-    unsigned int abs_bytes_per_sep = Py_ABS(bytes_per_sep_group);
+    unsigned int abs_bytes_per_sep = _Py_ABS_CAST(unsigned int, bytes_per_sep_group);
     Py_ssize_t resultlen = 0;
     if (bytes_per_sep_group && arglen > 0) {
         /* How many sep characters we'll be inserting. */
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
index 49ce0a97d47..272be504a68 100644
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -1181,7 +1181,7 @@ _PyErr_Display(PyObject *file, PyObject *unused, PyObject *value, PyObject *tb)
     }
     if (print_exception_recursive(&ctx, value) < 0) {
         PyErr_Clear();
-        _PyObject_Dump(value);
+        PyUnstable_Object_Dump(value);
         fprintf(stderr, "lost sys.stderr\n");
     }
     Py_XDECREF(ctx.seen);
@@ -1199,14 +1199,14 @@ PyErr_Display(PyObject *unused, PyObject *value, PyObject *tb)
     PyObject *file;
     if (PySys_GetOptionalAttr(&_Py_ID(stderr), &file) < 0) {
         PyObject *exc = PyErr_GetRaisedException();
-        _PyObject_Dump(value);
+        PyUnstable_Object_Dump(value);
         fprintf(stderr, "lost sys.stderr\n");
-        _PyObject_Dump(exc);
+        PyUnstable_Object_Dump(exc);
         Py_DECREF(exc);
         return;
     }
     if (file == NULL) {
-        _PyObject_Dump(value);
+        PyUnstable_Object_Dump(value);
         fprintf(stderr, "lost sys.stderr\n");
         return;
     }
diff --git a/Python/remote_debug.h b/Python/remote_debug.h
index e7676013197..1c02870d3af 100644
--- a/Python/remote_debug.h
+++ b/Python/remote_debug.h
@@ -158,7 +158,7 @@ static mach_port_t pid_to_task(pid_t pid);
 #endif
 
 // Initialize the process handle
-static int
+UNUSED static int
 _Py_RemoteDebug_InitProcHandle(proc_handle_t *handle, pid_t pid) {
     handle->pid = pid;
 #if defined(__APPLE__) && defined(TARGET_OS_OSX) && TARGET_OS_OSX
@@ -188,7 +188,7 @@ _Py_RemoteDebug_InitProcHandle(proc_handle_t *handle, pid_t pid) {
 }
 
 // Clean up the process handle
-static void
+UNUSED static void
 _Py_RemoteDebug_CleanupProcHandle(proc_handle_t *handle) {
 #ifdef MS_WINDOWS
     if (handle->hProcess != NULL) {
@@ -875,7 +875,7 @@ search_windows_map_for_section(proc_handle_t* handle, const char* secname, const
 #endif // MS_WINDOWS
 
 // Get the PyRuntime section address for any platform
-static uintptr_t
+UNUSED static uintptr_t
 _Py_RemoteDebug_GetPyRuntimeAddress(proc_handle_t* handle)
 {
     uintptr_t address;
@@ -1102,6 +1102,115 @@ _Py_RemoteDebug_ReadRemoteMemory(proc_handle_t *handle, uintptr_t remote_address
 #endif
 }
 
+#if defined(__linux__) && HAVE_PROCESS_VM_READV
+// Fallback write using /proc/pid/mem
+static int
+_Py_RemoteDebug_WriteRemoteMemoryFallback(proc_handle_t *handle, uintptr_t remote_address, size_t len, const void* src)
+{
+    if (handle->memfd == -1) {
+        if (open_proc_mem_fd(handle) < 0) {
+            return -1;
+        }
+    }
+
+    struct iovec local[1];
+    Py_ssize_t result = 0;
+    Py_ssize_t written = 0;
+
+    do {
+        local[0].iov_base = (char*)src + result;
+        local[0].iov_len = len - result;
+        off_t offset = remote_address + result;
+
+        written = pwritev(handle->memfd, local, 1, offset);
+        if (written < 0) {
+            PyErr_SetFromErrno(PyExc_OSError);
+            return -1;
+        }
+
+        result += written;
+    } while ((size_t)written != local[0].iov_len);
+    return 0;
+}
+#endif // __linux__
+
+// Platform-independent memory write function
+UNUSED static int
+_Py_RemoteDebug_WriteRemoteMemory(proc_handle_t *handle, uintptr_t remote_address, size_t len, const void* src)
+{
+#ifdef MS_WINDOWS
+    SIZE_T written = 0;
+    SIZE_T result = 0;
+    do {
+        if (!WriteProcessMemory(handle->hProcess, (LPVOID)(remote_address + result), (const char*)src + result, len - result, &written)) {
+            PyErr_SetFromWindowsErr(0);
+            DWORD error = GetLastError();
+            _set_debug_exception_cause(PyExc_OSError,
+                "WriteProcessMemory failed for PID %d at address 0x%lx "
+                "(size %zu, partial write %zu bytes): Windows error %lu",
+                handle->pid, remote_address + result, len - result, result, error);
+            return -1;
+        }
+        result += written;
+    } while (result < len);
+    return 0;
+#elif defined(__linux__) && HAVE_PROCESS_VM_READV
+    if (handle->memfd != -1) {
+        return _Py_RemoteDebug_WriteRemoteMemoryFallback(handle, remote_address, len, src);
+    }
+    struct iovec local[1];
+    struct iovec remote[1];
+    Py_ssize_t result = 0;
+    Py_ssize_t written = 0;
+
+    do {
+        local[0].iov_base = (void*)((char*)src + result);
+        local[0].iov_len = len - result;
+        remote[0].iov_base = (void*)((char*)remote_address + result);
+        remote[0].iov_len = len - result;
+
+        written = process_vm_writev(handle->pid, local, 1, remote, 1, 0);
+        if (written < 0) {
+            if (errno == ENOSYS) {
+                return _Py_RemoteDebug_WriteRemoteMemoryFallback(handle, remote_address, len, src);
+            }
+            PyErr_SetFromErrno(PyExc_OSError);
+            _set_debug_exception_cause(PyExc_OSError,
+                "process_vm_writev failed for PID %d at address 0x%lx "
+                "(size %zu, partial write %zd bytes): %s",
+                handle->pid, remote_address + result, len - result, result, strerror(errno));
+            return -1;
+        }
+
+        result += written;
+    } while ((size_t)written != local[0].iov_len);
+    return 0;
+#elif defined(__APPLE__) && defined(TARGET_OS_OSX) && TARGET_OS_OSX
+    kern_return_t kr = mach_vm_write(
+        handle->task,
+        (mach_vm_address_t)remote_address,
+        (vm_offset_t)src,
+        (mach_msg_type_number_t)len);
+
+    if (kr != KERN_SUCCESS) {
+        switch (kr) {
+        case KERN_PROTECTION_FAILURE:
+            PyErr_SetString(PyExc_PermissionError, "Not enough permissions to write memory");
+            break;
+        case KERN_INVALID_ARGUMENT:
+            PyErr_SetString(PyExc_PermissionError, "Invalid argument to mach_vm_write");
+            break;
+        default:
+            PyErr_Format(PyExc_RuntimeError, "Unknown error writing memory: %d", (int)kr);
+        }
+        return -1;
+    }
+    return 0;
+#else
+    Py_UNREACHABLE();
+#endif
+}
+
 UNUSED static int
 _Py_RemoteDebug_PagedReadRemoteMemory(proc_handle_t *handle,
                                       uintptr_t addr,
@@ -1158,7 +1267,7 @@ _Py_RemoteDebug_PagedReadRemoteMemory(proc_handle_t *handle,
     return _Py_RemoteDebug_ReadRemoteMemory(handle, addr, size, out);
 }
 
-static int
+UNUSED static int
 _Py_RemoteDebug_ReadDebugOffsets(
     proc_handle_t *handle,
     uintptr_t *runtime_start_address,
diff --git a/Python/remote_debugging.c b/Python/remote_debugging.c
index 71ffb17ed68..5b50b95db94 100644
--- a/Python/remote_debugging.c
+++ b/Python/remote_debugging.c
@@ -24,104 +24,11 @@ read_memory(proc_handle_t *handle, uintptr_t remote_address, size_t len, void* d
     return _Py_RemoteDebug_ReadRemoteMemory(handle, remote_address, len, dst);
 }
 
-// Why is pwritev not guarded? Except on Android API level 23 (no longer
-// supported), HAVE_PROCESS_VM_READV is sufficient.
-#if defined(__linux__) && HAVE_PROCESS_VM_READV
-static int
-write_memory_fallback(proc_handle_t *handle, uintptr_t remote_address, size_t len, const void* src)
-{
-    if (handle->memfd == -1) {
-        if (open_proc_mem_fd(handle) < 0) {
-            return -1;
-        }
-    }
-
-    struct iovec local[1];
-    Py_ssize_t result = 0;
-    Py_ssize_t written = 0;
-
-    do {
-        local[0].iov_base = (char*)src + result;
-        local[0].iov_len = len - result;
-        off_t offset = remote_address + result;
-
-        written = pwritev(handle->memfd, local, 1, offset);
-        if (written < 0) {
-            PyErr_SetFromErrno(PyExc_OSError);
-            return -1;
-        }
-
-        result += written;
-    } while ((size_t)written != local[0].iov_len);
-    return 0;
-}
-#endif // __linux__
-
+// Use the shared write function from remote_debug.h
 static int
 write_memory(proc_handle_t *handle, uintptr_t remote_address, size_t len, const void* src)
 {
-#ifdef MS_WINDOWS
-    SIZE_T written = 0;
-    SIZE_T result = 0;
-    do {
-        if (!WriteProcessMemory(handle->hProcess, (LPVOID)(remote_address + result), (const char*)src + result, len - result, &written)) {
-            PyErr_SetFromWindowsErr(0);
-            return -1;
-        }
-        result += written;
-    } while (result < len);
-    return 0;
-#elif defined(__linux__) && HAVE_PROCESS_VM_READV
-    if (handle->memfd != -1) {
-        return write_memory_fallback(handle, remote_address, len, src);
-    }
-    struct iovec local[1];
-    struct iovec remote[1];
-    Py_ssize_t result = 0;
-    Py_ssize_t written = 0;
-
-    do {
-        local[0].iov_base = (void*)((char*)src + result);
-        local[0].iov_len = len - result;
-        remote[0].iov_base = (void*)((char*)remote_address + result);
-        remote[0].iov_len = len - result;
-
-        written = process_vm_writev(handle->pid, local, 1, remote, 1, 0);
-        if (written < 0) {
-            if (errno == ENOSYS) {
-                return write_memory_fallback(handle, remote_address, len, src);
-            }
-            PyErr_SetFromErrno(PyExc_OSError);
-            return -1;
-        }
-
-        result += written;
-    } while ((size_t)written != local[0].iov_len);
-    return 0;
-#elif defined(__APPLE__) && TARGET_OS_OSX
-    kern_return_t kr = mach_vm_write(
-        pid_to_task(handle->pid),
-        (mach_vm_address_t)remote_address,
-        (vm_offset_t)src,
-        (mach_msg_type_number_t)len);
-
-    if (kr != KERN_SUCCESS) {
-        switch (kr) {
-        case KERN_PROTECTION_FAILURE:
-            PyErr_SetString(PyExc_PermissionError, "Not enough permissions to write memory");
-            break;
-        case KERN_INVALID_ARGUMENT:
-            PyErr_SetString(PyExc_PermissionError, "Invalid argument to mach_vm_write");
-            break;
-        default:
-            PyErr_Format(PyExc_RuntimeError, "Unknown error writing memory: %d", (int)kr);
-        }
-        return -1;
-    }
-    return 0;
-#else
-    Py_UNREACHABLE();
-#endif
+    return _Py_RemoteDebug_WriteRemoteMemory(handle, remote_address, len, src);
 }
 
 static int
diff --git a/Python/traceback.c b/Python/traceback.c
index 521d6322a5c..8af63c22a9f 100644
--- a/Python/traceback.c
+++ b/Python/traceback.c
@@ -1036,7 +1036,7 @@ static int
 dump_frame(int fd, _PyInterpreterFrame *frame)
 {
     if (frame->owner == FRAME_OWNED_BY_INTERPRETER) {
-        /* Ignore trampoline frame */
+        /* Ignore trampoline frames and base frame sentinel */
         return 0;
     }
 
@@ -1094,6 +1094,9 @@ tstate_is_freed(PyThreadState *tstate)
     if (_PyMem_IsPtrFreed(tstate->interp)) {
         return 1;
     }
+    if (_PyMem_IsULongFreed(tstate->thread_id)) {
+        return 1;
+    }
     return 0;
 }
 
@@ -1113,7 +1116,7 @@ dump_traceback(int fd, PyThreadState *tstate, int write_header)
     }
 
     if (tstate_is_freed(tstate)) {
-        PUTS(fd, "  <tstate is freed>\n");
+        PUTS(fd, "  <freed thread state>\n");
         return;
     }
 
@@ -1138,12 +1141,16 @@ dump_traceback(int fd, PyThreadState *tstate, int write_header)
             PUTS(fd, "  <freed frame>\n");
             break;
         }
+        // Read frame->previous early since memory can be freed during
+        // dump_frame()
+        _PyInterpreterFrame *previous = frame->previous;
+
         if (dump_frame(fd, frame) < 0) {
             PUTS(fd, "  <invalid frame>\n");
             break;
         }
 
-        frame = frame->previous;
+        frame = previous;
         if (frame == NULL) {
             break;
         }
@@ -1240,7 +1247,9 @@ write_thread_id(int fd, PyThreadState *tstate, int is_current)
                         tstate->thread_id,
                         sizeof(unsigned long) * 2);
 
-    write_thread_name(fd, tstate);
+    if (!_PyMem_IsULongFreed(tstate->thread_id)) {
+        write_thread_name(fd, tstate);
+    }
 
     PUTS(fd, " (most recent call first):\n");
 }
@@ -1298,7 +1307,6 @@ _Py_DumpTracebackThreads(int fd, PyInterpreterState *interp,
         return "unable to get the thread head state";
 
     /* Dump the traceback of each thread */
-    tstate = PyInterpreterState_ThreadHead(interp);
     unsigned int nthreads = 0;
     _Py_BEGIN_SUPPRESS_IPH
     do
@@ -1309,11 +1317,18 @@ _Py_DumpTracebackThreads(int fd, PyInterpreterState *interp,
             PUTS(fd, "...\n");
             break;
         }
+
+        if (tstate_is_freed(tstate)) {
+            PUTS(fd, "<freed thread state>\n");
+            break;
+        }
+
         write_thread_id(fd, tstate, tstate == current_tstate);
         if (tstate == current_tstate && tstate->interp->gc.collecting) {
             PUTS(fd, "  Garbage-collecting\n");
         }
         dump_traceback(fd, tstate, 0);
+
         tstate = PyThreadState_Next(tstate);
         nthreads++;
     } while (tstate != NULL);
diff --git a/Python/tracemalloc.c b/Python/tracemalloc.c
index 005bdd37828..20351618721 100644
--- a/Python/tracemalloc.c
+++ b/Python/tracemalloc.c
@@ -46,7 +46,7 @@ typedef struct tracemalloc_frame frame_t;
 typedef struct tracemalloc_traceback traceback_t;
 
 #define TRACEBACK_SIZE(NFRAME) \
-        (sizeof(traceback_t) + sizeof(frame_t) * (NFRAME - 1))
+        (sizeof(traceback_t) + sizeof(frame_t) * (NFRAME))
 
 static const int MAX_NFRAME = UINT16_MAX;
 
@@ -329,8 +329,9 @@ traceback_new(void)
     traceback->nframe = 0;
     traceback->total_nframe = 0;
     traceback_get_frames(traceback);
-    if (traceback->nframe == 0)
-        return &tracemalloc_empty_traceback;
+    if (traceback->nframe == 0) {
+        return tracemalloc_empty_traceback;
+    }
     traceback->hash = traceback_hash(traceback);
 
     /* intern the traceback */
@@ -754,12 +755,18 @@ _PyTraceMalloc_Init(void)
         return _PyStatus_NO_MEMORY();
     }
 
-    tracemalloc_empty_traceback.nframe = 1;
-    tracemalloc_empty_traceback.total_nframe = 1;
+    assert(tracemalloc_empty_traceback == NULL);
+    tracemalloc_empty_traceback = raw_malloc(TRACEBACK_SIZE(1));
+    if (tracemalloc_empty_traceback  == NULL) {
+        return _PyStatus_NO_MEMORY();
+    }
+
+    tracemalloc_empty_traceback->nframe = 1;
+    tracemalloc_empty_traceback->total_nframe = 1;
     /* borrowed reference */
-    tracemalloc_empty_traceback.frames[0].filename = &_Py_STR(anon_unknown);
-    tracemalloc_empty_traceback.frames[0].lineno = 0;
-    tracemalloc_empty_traceback.hash = traceback_hash(&tracemalloc_empty_traceback);
+    tracemalloc_empty_traceback->frames[0].filename = &_Py_STR(anon_unknown);
+    tracemalloc_empty_traceback->frames[0].lineno = 0;
+    tracemalloc_empty_traceback->hash = traceback_hash(tracemalloc_empty_traceback);
 
     tracemalloc_config.initialized = TRACEMALLOC_INITIALIZED;
     return _PyStatus_OK();
@@ -782,6 +789,9 @@ tracemalloc_deinit(void)
     _Py_hashtable_destroy(tracemalloc_filenames);
 
     PyThread_tss_delete(&tracemalloc_reentrant_key);
+
+    raw_free(tracemalloc_empty_traceback);
+    tracemalloc_empty_traceback = NULL;
 }
 
 
diff --git a/Tools/build/check_extension_modules.py b/Tools/build/check_extension_modules.py
index 668db8df0bd..f23c1d5286f 100644
--- a/Tools/build/check_extension_modules.py
+++ b/Tools/build/check_extension_modules.py
@@ -23,9 +23,11 @@
 import _imp
 import argparse
 import enum
+import json
 import logging
 import os
 import pathlib
+import pprint
 import re
 import sys
 import sysconfig
@@ -116,6 +118,18 @@
     help="Print a list of module names to stdout and exit",
 )
 
+parser.add_argument(
+    "--generate-missing-stdlib-info",
+    action="store_true",
+    help="Generate file with stdlib module info",
+)
+
+parser.add_argument(
+    "--with-missing-stdlib-config",
+    metavar="CONFIG_FILE",
+    help="Path to JSON config file with custom missing module messages",
+)
+
 
 @enum.unique
 class ModuleState(enum.Enum):
@@ -281,6 +295,39 @@ def list_module_names(self, *, all: bool = False) -> set[str]:
             names.update(WINDOWS_MODULES)
         return names
 
+    def generate_missing_stdlib_info(self, config_path: str | None = None) -> None:
+        config_messages = {}
+        if config_path:
+            try:
+                with open(config_path, encoding='utf-8') as f:
+                    config_messages = json.load(f)
+            except (FileNotFoundError, json.JSONDecodeError) as e:
+                raise RuntimeError(f"Failed to load missing stdlib config {config_path!r}") from e
+
+        messages = {}
+        for name in WINDOWS_MODULES:
+            messages[name] = f"Unsupported platform for Windows-only standard library module {name!r}"
+
+        for modinfo in self.modules:
+            if modinfo.state in (ModuleState.DISABLED, ModuleState.DISABLED_SETUP):
+                messages[modinfo.name] = f"Standard library module disabled during build {modinfo.name!r} was not found"
+            elif modinfo.state == ModuleState.NA:
+                messages[modinfo.name] = f"Unsupported platform for standard library module {modinfo.name!r}"
+
+        messages.update(config_messages)
+
+        content = f'''\
+# Standard library information used by the traceback module for more informative
+# ModuleNotFound error messages.
+# Generated by check_extension_modules.py
+
+_MISSING_STDLIB_MODULE_MESSAGES = {pprint.pformat(messages)}
+'''
+
+        output_path = self.builddir / "_missing_stdlib_info.py"
+        with open(output_path, "w", encoding="utf-8") as f:
+            f.write(content)
+
     def get_builddir(self) -> pathlib.Path:
         try:
             with open(self.pybuilddir_txt, encoding="utf-8") as f:
@@ -499,6 +546,9 @@ def main() -> None:
         names = checker.list_module_names(all=True)
         for name in sorted(names):
             print(name)
+    elif args.generate_missing_stdlib_info:
+        checker.check()
+        checker.generate_missing_stdlib_info(args.with_missing_stdlib_config)
     else:
         checker.check()
         checker.summary(verbose=args.verbose)
diff --git a/Tools/c-analyzer/c_parser/preprocessor/__init__.py b/Tools/c-analyzer/c_parser/preprocessor/__init__.py
index 30a86cbd7dc..f8d2f805cb1 100644
--- a/Tools/c-analyzer/c_parser/preprocessor/__init__.py
+++ b/Tools/c-analyzer/c_parser/preprocessor/__init__.py
@@ -16,6 +16,7 @@
 from . import (
     pure as _pure,
     gcc as _gcc,
+    clang as _clang,
 )
 
 
@@ -234,7 +235,7 @@ def handling_errors(ignore_exc=None, *, log_err=None):
     'bcpp': None,
     # aliases/extras:
     'gcc': _gcc.preprocess,
-    'clang': None,
+    'clang': _clang.preprocess,
 }
 
 
diff --git a/Tools/c-analyzer/c_parser/preprocessor/clang.py b/Tools/c-analyzer/c_parser/preprocessor/clang.py
new file mode 100644
index 00000000000..574a23f8f6d
--- /dev/null
+++ b/Tools/c-analyzer/c_parser/preprocessor/clang.py
@@ -0,0 +1,104 @@
+import os.path
+import re, sys
+
+from . import common as _common
+from . import gcc as _gcc
+
+_normpath = _gcc._normpath
+
+TOOL = 'clang'
+
+META_FILES = {
+    '<built-in>',
+    '<command line>',
+}
+
+
+def preprocess(filename,
+               incldirs=None,
+               includes=None,
+               macros=None,
+               samefiles=None,
+               cwd=None,
+               ):
+    if not cwd or not os.path.isabs(cwd):
+        cwd = os.path.abspath(cwd or '.')
+    filename = _normpath(filename, cwd)
+
+    postargs = _gcc.POST_ARGS
+    basename = os.path.basename(filename)
+    dirname = os.path.basename(os.path.dirname(filename))
+    if (basename not in _gcc.FILES_WITHOUT_INTERNAL_CAPI
+       and dirname not in _gcc.DIRS_WITHOUT_INTERNAL_CAPI):
+        postargs += ('-DPy_BUILD_CORE=1',)
+
+    text = _common.preprocess(
+        TOOL,
+        filename,
+        incldirs=incldirs,
+        includes=includes,
+        macros=macros,
+        #preargs=PRE_ARGS,
+        postargs=postargs,
+        executable=['clang'],
+        compiler='unix',
+        cwd=cwd,
+    )
+    return _iter_lines(text, filename, samefiles, cwd)
+
+
+EXIT_MARKERS = {'# 2 "<built-in>" 2', '# 3 "<built-in>" 2', '# 4 "<built-in>" 2'}
+
+
+def _iter_lines(text, reqfile, samefiles, cwd, raw=False):
+    lines = iter(text.splitlines())
+
+    # The first line is special.
+    # The subsequent lines are consistent.
+    firstlines = [
+        f'# 1 "{reqfile}"',
+        '# 1 "<built-in>" 1',
+        '# 1 "<built-in>" 3',
+        '# 370 "<built-in>" 3',
+        '# 1 "<command line>" 1',
+        '# 1 "<built-in>" 2',
+    ]
+    for expected in firstlines:
+        line = next(lines)
+        if line != expected:
+            raise NotImplementedError((line, expected))
+
+    # Do all the CLI-provided includes.
+    filter_reqfile = (lambda f: _gcc._filter_reqfile(f, reqfile, samefiles))
+    make_info = (lambda lno: _common.FileInfo(reqfile, lno))
+    last = None
+    for line in lines:
+        assert last != reqfile, (last,)
+        # NOTE:condition is clang specific
+        if not line:
+            continue
+        lno, included, flags = _gcc._parse_marker_line(line, reqfile)
+        if not included:
+            raise NotImplementedError((line,))
+        if included == reqfile:
+            # This will be the last one.
+            assert 2 in flags, (line, flags)
+        else:
+            # NOTE:first condition is specific to clang
+            if _normpath(included, cwd) == reqfile:
+                assert 1 in flags or 2 in flags, (line, flags, included, reqfile)
+            else:
+                assert 1 in flags, (line, flags, included, reqfile)
+        yield from _gcc._iter_top_include_lines(
+            lines,
+            _normpath(included, cwd),
+            cwd,
+            filter_reqfile,
+            make_info,
+            raw,
+            EXIT_MARKERS
+        )
+        last = included
+    # The last one is always the requested file.
+    # NOTE:_normpath is clang specific
+    assert _normpath(included, cwd) == reqfile, (line,)
diff --git a/Tools/c-analyzer/c_parser/preprocessor/gcc.py b/Tools/c-analyzer/c_parser/preprocessor/gcc.py
index d20cd19f6e6..4a55a1a24ee 100644
--- a/Tools/c-analyzer/c_parser/preprocessor/gcc.py
+++ b/Tools/c-analyzer/c_parser/preprocessor/gcc.py
@@ -65,6 +65,8 @@
     '-E',
 )
 
+EXIT_MARKERS = {'# 0 "<command-line>" 2', '# 1 "<command-line>" 2'}
+
 
 def preprocess(filename,
                incldirs=None,
@@ -138,6 +140,7 @@ def _iter_lines(text, reqfile, samefiles, cwd, raw=False):
             filter_reqfile,
             make_info,
             raw,
+            EXIT_MARKERS
         )
         last = included
     # The last one is always the requested file.
@@ -146,7 +149,7 @@ def _iter_lines(text, reqfile, samefiles, cwd, raw=False):
 
 def _iter_top_include_lines(lines, topfile, cwd,
                             filter_reqfile, make_info,
-                            raw):
+                            raw, exit_markers):
     partial = 0  # depth
     files = [topfile]
     # We start at 1 in case there are source lines (including blank ones)
@@ -154,12 +157,20 @@ def _iter_top_include_lines(lines, topfile, cwd,
     # _parse_marker_line() that the preprocessor reported lno as 1.
     lno = 1
     for line in lines:
-        if line == '# 0 "<command-line>" 2' or line == '# 1 "<command-line>" 2':
+        if line in exit_markers:
             # We're done with this top-level include.
             return
 
         _lno, included, flags = _parse_marker_line(line)
         if included:
+            # HACK:
+            # Mixes curses.h and ncurses.h marker lines
+            # gcc silently passes this, while clang fails
+            # See: /Include/py_curses.h #if-elif directives
+            # And compare with preprocessor output
+            if os.path.basename(included) == 'curses.h':
+                included = os.path.join(os.path.dirname(included), 'ncurses.h')
+
             lno = _lno
             included = _normpath(included, cwd)
             # We hit a marker line.
diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py
index fd198d7d06c..6332dec3aef 100644
--- a/Tools/c-analyzer/cpython/_parser.py
+++ b/Tools/c-analyzer/cpython/_parser.py
@@ -114,7 +114,10 @@ def format_tsv_lines(lines):
     ('*', './Include/internal/mimalloc'),
 
     ('Modules/_decimal/**/*.c', 'Modules/_decimal/libmpdec'),
+
     ('Modules/_elementtree.c', 'Modules/expat'),
+    ('Modules/pyexpat.c', 'Modules/expat'),
+
     ('Modules/_hacl/*.c', 'Modules/_hacl/include'),
     ('Modules/_hacl/*.c', 'Modules/_hacl/'),
     ('Modules/_hacl/*.h', 'Modules/_hacl/include'),
@@ -125,6 +128,7 @@ def format_tsv_lines(lines):
     ('Modules/sha3module.c', 'Modules/_hacl/include'),
     ('Modules/blake2module.c', 'Modules/_hacl/include'),
     ('Modules/hmacmodule.c', 'Modules/_hacl/include'),
+
     ('Objects/stringlib/*.h', 'Objects'),
 
     # possible system-installed headers, just in case
@@ -340,6 +344,10 @@ def format_tsv_lines(lines):
 
     # Catch-alls:
     _abs('Include/**/*.h'): (5_000, 500),
+
+    # Specific to clang
+    _abs('Modules/selectmodule.c'): (40_000, 3000),
+    _abs('Modules/_testcapi/pyatomic.c'): (30_000, 1000),
 }
 
 
diff --git a/Tools/c-analyzer/cpython/globals-to-fix.tsv b/Tools/c-analyzer/cpython/globals-to-fix.tsv
index 3c3cb2f9c86..301784f773d 100644
--- a/Tools/c-analyzer/cpython/globals-to-fix.tsv
+++ b/Tools/c-analyzer/cpython/globals-to-fix.tsv
@@ -400,6 +400,7 @@ Modules/_tkinter.c	-	tcl_lock	-
 Modules/_tkinter.c	-	excInCmd	-
 Modules/_tkinter.c	-	valInCmd	-
 Modules/_tkinter.c	-	trbInCmd	-
+Modules/socketmodule.c 	- 	netdb_lock 	-
 
 
 ##################################
diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv
index bd4a8cf0d3e..adb183000de 100644
--- a/Tools/c-analyzer/cpython/ignored.tsv
+++ b/Tools/c-analyzer/cpython/ignored.tsv
@@ -16,6 +16,7 @@ filename	funcname	name	reason
 ## indicators for resource availability/capability
 # (set during first init)
 Python/bootstrap_hash.c	py_getrandom	getrandom_works	-
+Python/bootstrap_hash.c	py_getentropy	getentropy_works	-
 Python/fileutils.c	-	_Py_open_cloexec_works	-
 Python/fileutils.c	set_inheritable	ioctl_works	-
 # (set lazily, *after* first init)
diff --git a/Tools/cases_generator/optimizer_generator.py b/Tools/cases_generator/optimizer_generator.py
index 41df073cf6d..ab0a90e234b 100644
--- a/Tools/cases_generator/optimizer_generator.py
+++ b/Tools/cases_generator/optimizer_generator.py
@@ -445,7 +445,7 @@ def generate_abstract_interpreter(
             declare_variables(override, out, skip_inputs=False)
         else:
             declare_variables(uop, out, skip_inputs=True)
-        stack = Stack()
+        stack = Stack(check_stack_bounds=True)
         write_uop(override, uop, out, stack, debug, skip_inputs=(override is None))
         out.start_line()
         out.emit("break;\n")
diff --git a/Tools/cases_generator/stack.py b/Tools/cases_generator/stack.py
index 3a0e7e5d0d5..53499558aed 100644
--- a/Tools/cases_generator/stack.py
+++ b/Tools/cases_generator/stack.py
@@ -216,11 +216,12 @@ def array_or_scalar(var: StackItem | Local) -> str:
     return "array" if var.is_array() else "scalar"
 
 class Stack:
-    def __init__(self) -> None:
+    def __init__(self, check_stack_bounds: bool = False) -> None:
         self.base_offset = PointerOffset.zero()
         self.physical_sp = PointerOffset.zero()
         self.logical_sp = PointerOffset.zero()
         self.variables: list[Local] = []
+        self.check_stack_bounds = check_stack_bounds
 
     def drop(self, var: StackItem, check_liveness: bool) -> None:
         self.logical_sp = self.logical_sp.pop(var)
@@ -296,7 +297,7 @@ def _save_physical_sp(self, out: CWriter) -> None:
             diff = self.logical_sp - self.physical_sp
             out.start_line()
             out.emit(f"stack_pointer += {diff.to_c()};\n")
-            out.emit(f"assert(WITHIN_STACK_BOUNDS());\n")
+            out.emit(f"ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);\n")
             self.physical_sp = self.logical_sp
             self._print(out)
 
@@ -316,8 +317,17 @@ def save_variables(self, out: CWriter) -> None:
                 self._print(out)
             var_offset = var_offset.push(var.item)
 
+    def stack_bound_check(self, out: CWriter) -> None:
+        if not self.check_stack_bounds:
+            return
+        if self.physical_sp != self.logical_sp:
+            diff = self.logical_sp - self.physical_sp
+            out.start_line()
+            out.emit(f"CHECK_STACK_BOUNDS({diff});\n")
+
     def flush(self, out: CWriter) -> None:
         self._print(out)
+        self.stack_bound_check(out)
         self.save_variables(out)
         self._save_physical_sp(out)
         out.start_line()
@@ -347,6 +357,7 @@ def copy(self) -> "Stack":
         other.physical_sp = self.physical_sp
         other.logical_sp = self.logical_sp
         other.variables = [var.copy() for var in self.variables]
+        other.check_stack_bounds = self.check_stack_bounds
         return other
 
     def __eq__(self, other: object) -> bool:
diff --git a/Tools/check-c-api-docs/ignored_c_api.txt b/Tools/check-c-api-docs/ignored_c_api.txt
new file mode 100644
index 00000000000..e81ffd51e19
--- /dev/null
+++ b/Tools/check-c-api-docs/ignored_c_api.txt
@@ -0,0 +1,93 @@
+# pydtrace_probes.h
+PyDTrace_AUDIT
+PyDTrace_FUNCTION_ENTRY
+PyDTrace_FUNCTION_RETURN
+PyDTrace_GC_DONE
+PyDTrace_GC_START
+PyDTrace_IMPORT_FIND_LOAD_DONE
+PyDTrace_IMPORT_FIND_LOAD_START
+PyDTrace_INSTANCE_DELETE_DONE
+PyDTrace_INSTANCE_DELETE_START
+PyDTrace_INSTANCE_NEW_DONE
+PyDTrace_INSTANCE_NEW_START
+PyDTrace_LINE
+# fileobject.h
+Py_FileSystemDefaultEncodeErrors
+Py_FileSystemDefaultEncoding
+Py_HasFileSystemDefaultEncoding
+Py_UTF8Mode
+# pyhash.h
+Py_HASH_EXTERNAL
+# exports.h
+PyAPI_DATA
+Py_EXPORTED_SYMBOL
+Py_IMPORTED_SYMBOL
+Py_LOCAL_SYMBOL
+# modsupport.h
+PyABIInfo_FREETHREADING_AGNOSTIC
+# moduleobject.h
+PyModuleDef_Type
+# object.h
+Py_INVALID_SIZE
+Py_TPFLAGS_HAVE_VERSION_TAG
+Py_TPFLAGS_INLINE_VALUES
+Py_TPFLAGS_IS_ABSTRACT
+# pyexpat.h
+PyExpat_CAPI_MAGIC
+PyExpat_CAPSULE_NAME
+# pyport.h
+Py_ALIGNED
+Py_ARITHMETIC_RIGHT_SHIFT
+Py_CAN_START_THREADS
+Py_FORCE_EXPANSION
+Py_GCC_ATTRIBUTE
+Py_LL
+Py_SAFE_DOWNCAST
+Py_ULL
+Py_VA_COPY
+# unicodeobject.h
+Py_UNICODE_SIZE
+# cpython/methodobject.h
+PyCFunction_GET_CLASS
+# cpython/compile.h
+PyCF_ALLOW_INCOMPLETE_INPUT
+PyCF_COMPILE_MASK
+PyCF_DONT_IMPLY_DEDENT
+PyCF_IGNORE_COOKIE
+PyCF_MASK
+PyCF_MASK_OBSOLETE
+PyCF_SOURCE_IS_UTF8
+# cpython/descrobject.h
+PyDescr_COMMON
+PyDescr_NAME
+PyDescr_TYPE
+PyWrapperFlag_KEYWORDS
+# cpython/fileobject.h
+PyFile_NewStdPrinter
+PyStdPrinter_Type
+Py_UniversalNewlineFgets
+# cpython/setobject.h
+PySet_MINSIZE
+# cpython/ceval.h
+PyUnstable_CopyPerfMapFile
+PyUnstable_PerfTrampoline_CompileCode
+PyUnstable_PerfTrampoline_SetPersistAfterFork
+# cpython/genobject.h
+PyAsyncGenASend_CheckExact
+# cpython/longintrepr.h
+PyLong_BASE
+PyLong_MASK
+PyLong_SHIFT
+# cpython/pyerrors.h
+PyException_HEAD
+# cpython/pyframe.h
+PyUnstable_EXECUTABLE_KINDS
+PyUnstable_EXECUTABLE_KIND_BUILTIN_FUNCTION
+PyUnstable_EXECUTABLE_KIND_METHOD_DESCRIPTOR
+PyUnstable_EXECUTABLE_KIND_PY_FUNCTION
+PyUnstable_EXECUTABLE_KIND_SKIP
+# cpython/pylifecycle.h
+Py_FrozenMain
+# cpython/unicodeobject.h
+PyUnicode_IS_COMPACT
+PyUnicode_IS_COMPACT_ASCII
diff --git a/Tools/check-c-api-docs/main.py b/Tools/check-c-api-docs/main.py
new file mode 100644
index 00000000000..6bdf80a9ae8
--- /dev/null
+++ b/Tools/check-c-api-docs/main.py
@@ -0,0 +1,193 @@
+import re
+from pathlib import Path
+import sys
+import _colorize
+import textwrap
+
+SIMPLE_FUNCTION_REGEX = re.compile(r"PyAPI_FUNC(.+) (\w+)\(")
+SIMPLE_MACRO_REGEX = re.compile(r"# *define *(\w+)(\(.+\))? ")
+SIMPLE_INLINE_REGEX = re.compile(r"static inline .+( |\n)(\w+)")
+SIMPLE_DATA_REGEX = re.compile(r"PyAPI_DATA\(.+\) (\w+)")
+
+CPYTHON = Path(__file__).parent.parent.parent
+INCLUDE = CPYTHON / "Include"
+C_API_DOCS = CPYTHON / "Doc" / "c-api"
+IGNORED = (
+    (CPYTHON / "Tools" / "check-c-api-docs" / "ignored_c_api.txt")
+    .read_text()
+    .split("\n")
+)
+
+for index, line in enumerate(IGNORED):
+    if line.startswith("#"):
+        IGNORED.pop(index)
+
+MISTAKE = """
+If this is a mistake and this script should not be failing, create an
+issue and tag Peter (@ZeroIntensity) on it.\
+"""
+
+
+def found_undocumented(singular: bool) -> str:
+    some = "an" if singular else "some"
+    s = "" if singular else "s"
+    these = "this" if singular else "these"
+    them = "it" if singular else "them"
+    were = "was" if singular else "were"
+
+    return (
+        textwrap.dedent(
+            f"""
+    Found {some} undocumented C API{s}!
+
+    Python requires documentation on all public C API symbols, macros, and types.
+    If {these} API{s} {were} not meant to be public, prefix {them} with a
+    leading underscore (_PySomething_API) or move {them} to the internal C API
+    (pycore_*.h files).
+
+    In exceptional cases, certain APIs can be ignored by adding them to
+    Tools/check-c-api-docs/ignored_c_api.txt
+    """
+        )
+        + MISTAKE
+    )
+
+
+def found_ignored_documented(singular: bool) -> str:
+    some = "a" if singular else "some"
+    s = "" if singular else "s"
+    them = "it" if singular else "them"
+    were = "was" if singular else "were"
+    they = "it" if singular else "they"
+
+    return (
+        textwrap.dedent(
+            f"""
+    Found {some} C API{s} listed in Tools/c-api-docs-check/ignored_c_api.txt, but
+    {they} {were} found in the documentation. To fix this, remove {them} from
+    ignored_c_api.txt.
+    """
+        )
+        + MISTAKE
+    )
+
+
+def is_documented(name: str) -> bool:
+    """
+    Is a name present in the C API documentation?
+    """
+    for path in C_API_DOCS.iterdir():
+        if path.is_dir():
+            continue
+        if path.suffix != ".rst":
+            continue
+
+        text = path.read_text(encoding="utf-8")
+        if name in text:
+            return True
+
+    return False
+
+
+def scan_file_for_docs(filename: str, text: str) -> tuple[list[str], list[str]]:
+    """
+    Scan a header file for  C API functions.
+    """
+    undocumented: list[str] = []
+    documented_ignored: list[str] = []
+    colors = _colorize.get_colors()
+
+    def check_for_name(name: str) -> None:
+        documented = is_documented(name)
+        if documented and (name in IGNORED):
+            documented_ignored.append(name)
+        elif not documented and (name not in IGNORED):
+            undocumented.append(name)
+
+    for function in SIMPLE_FUNCTION_REGEX.finditer(text):
+        name = function.group(2)
+        if not name.startswith("Py"):
+            continue
+
+        check_for_name(name)
+
+    for macro in SIMPLE_MACRO_REGEX.finditer(text):
+        name = macro.group(1)
+        if not name.startswith("Py"):
+            continue
+
+        if "(" in name:
+            name = name[: name.index("(")]
+
+        check_for_name(name)
+
+    for inline in SIMPLE_INLINE_REGEX.finditer(text):
+        name = inline.group(2)
+        if not name.startswith("Py"):
+            continue
+
+        check_for_name(name)
+
+    for data in SIMPLE_DATA_REGEX.finditer(text):
+        name = data.group(1)
+        if not name.startswith("Py"):
+            continue
+
+        check_for_name(name)
+
+    # Remove duplicates and sort alphabetically to keep the output deterministic
+    undocumented = list(set(undocumented))
+    undocumented.sort()
+
+    if undocumented or documented_ignored:
+        print(f"{filename} {colors.RED}BAD{colors.RESET}")
+        for name in undocumented:
+            print(f"{colors.BOLD_RED}UNDOCUMENTED:{colors.RESET} {name}")
+        for name in documented_ignored:
+            print(f"{colors.BOLD_YELLOW}DOCUMENTED BUT IGNORED:{colors.RESET} {name}")
+    else:
+        print(f"{filename} {colors.GREEN}OK{colors.RESET}")
+
+    return undocumented, documented_ignored
+
+
+def main() -> None:
+    print("Scanning for undocumented C API functions...")
+    files = [*INCLUDE.iterdir(), *(INCLUDE / "cpython").iterdir()]
+    all_missing: list[str] = []
+    all_found_ignored: list[str] = []
+
+    for file in files:
+        if file.is_dir():
+            continue
+        assert file.exists()
+        text = file.read_text(encoding="utf-8")
+        missing, ignored = scan_file_for_docs(str(file.relative_to(INCLUDE)), text)
+        all_found_ignored += ignored
+        all_missing += missing
+
+    fail = False
+    to_check = [
+        (all_missing, "missing", found_undocumented(len(all_missing) == 1)),
+        (
+            all_found_ignored,
+            "documented but ignored",
+            found_ignored_documented(len(all_found_ignored) == 1),
+        ),
+    ]
+    for name_list, what, message in to_check:
+        if not name_list:
+            continue
+
+        s = "s" if len(name_list) != 1 else ""
+        print(f"-- {len(name_list)} {what} C API{s} --")
+        for name in name_list:
+            print(f" - {name}")
+        print(message)
+        fail = True
+
+    sys.exit(1 if fail else 0)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Tools/ftscalingbench/ftscalingbench.py b/Tools/ftscalingbench/ftscalingbench.py
index 1a59e25189d..097a065f368 100644
--- a/Tools/ftscalingbench/ftscalingbench.py
+++ b/Tools/ftscalingbench/ftscalingbench.py
@@ -27,6 +27,7 @@
 import sys
 import threading
 import time
+from dataclasses import dataclass
 from operator import methodcaller
 
 # The iterations in individual benchmarks are scaled by this factor.
@@ -202,6 +203,17 @@ def method_caller():
     for i in range(1000 * WORK_SCALE):
         mc(obj)
 
+@dataclass
+class MyDataClass:
+    x: int
+    y: int
+    z: int
+
+@register_benchmark
+def instantiate_dataclass():
+    for _ in range(1000 * WORK_SCALE):
+        obj = MyDataClass(x=1, y=2, z=3)
+
 def bench_one_thread(func):
     t0 = time.perf_counter_ns()
     func()
diff --git a/Tools/inspection/benchmark_external_inspection.py b/Tools/inspection/benchmark_external_inspection.py
index 0ac7ac4d385..9c40c2f4492 100644
--- a/Tools/inspection/benchmark_external_inspection.py
+++ b/Tools/inspection/benchmark_external_inspection.py
@@ -434,7 +434,7 @@ def main():
                     elif args.threads == "only_active":
                         kwargs["only_active_thread"] = True
                     unwinder = _remote_debugging.RemoteUnwinder(
-                        process.pid, **kwargs
+                        process.pid, cache_frames=True, **kwargs
                     )
                     results = benchmark(unwinder, duration_seconds=args.duration)
                 finally:
diff --git a/Tools/jit/_schema.py b/Tools/jit/_schema.py
index c47e9af924a..4e86abe6049 100644
--- a/Tools/jit/_schema.py
+++ b/Tools/jit/_schema.py
@@ -89,6 +89,7 @@ class COFFSection(typing.TypedDict):
     Characteristics: dict[
         typing.Literal["Flags"], list[dict[typing.Literal["Name"], str]]
     ]
+    Name: dict[typing.Literal["Value"], str]
     Number: int
     RawDataSize: int
     Relocations: list[dict[typing.Literal["Relocation"], COFFRelocation]]
diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py
index a76d8ff2792..4c188d74a68 100644
--- a/Tools/jit/_targets.py
+++ b/Tools/jit/_targets.py
@@ -267,6 +267,10 @@ class _COFF(
     def _handle_section(
         self, section: _schema.COFFSection, group: _stencils.StencilGroup
     ) -> None:
+        name = section["Name"]["Value"]
+        if name == ".debug$S":
+            # skip debug sections
+            return
         flags = {flag["Name"] for flag in section["Characteristics"]["Flags"]}
         if "SectionData" in section:
             section_data_bytes = section["SectionData"]["Bytes"]
diff --git a/Tools/jit/template.c b/Tools/jit/template.c
index 857e926d119..0167f1b0ae5 100644
--- a/Tools/jit/template.c
+++ b/Tools/jit/template.c
@@ -86,6 +86,12 @@ do {                                                                      \
 
 #define TIER_TWO 2
 
+#ifdef Py_DEBUG
+#define ASSERT_WITHIN_STACK_BOUNDS(F, L) _Py_assert_within_stack_bounds(frame, stack_pointer, (F), (L))
+#else
+#define ASSERT_WITHIN_STACK_BOUNDS(F, L) (void)0
+#endif
+
 __attribute__((preserve_none)) _Py_CODEUNIT *
 _JIT_ENTRY(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate)
 {
diff --git a/Tools/patchcheck/patchcheck.py b/Tools/patchcheck/patchcheck.py
index afd010a5254..1a5797f7422 100755
--- a/Tools/patchcheck/patchcheck.py
+++ b/Tools/patchcheck/patchcheck.py
@@ -176,12 +176,6 @@ def docs_modified(file_paths):
     return bool(file_paths)
 
 
-@status("Misc/ACKS updated", modal=True)
-def credit_given(file_paths):
-    """Check if Misc/ACKS has been changed."""
-    return os.path.join('Misc', 'ACKS') in file_paths
-
-
 @status("Misc/NEWS.d updated with `blurb`", modal=True)
 def reported_news(file_paths):
     """Check if Misc/NEWS.d has been changed."""
@@ -215,8 +209,6 @@ def main():
     misc_files = {p for p in file_paths if p.startswith('Misc')}
     # Docs updated.
     docs_modified(has_doc_files)
-    # Misc/ACKS changed.
-    credit_given(misc_files)
     # Misc/NEWS changed.
     reported_news(misc_files)
     # Regenerated configure, if necessary.
diff --git a/Tools/peg_generator/pegen/c_generator.py b/Tools/peg_generator/pegen/c_generator.py
index ffa73a64f21..a4e111972bd 100644
--- a/Tools/peg_generator/pegen/c_generator.py
+++ b/Tools/peg_generator/pegen/c_generator.py
@@ -31,6 +31,7 @@
 
 EXTENSION_PREFIX = """\
 #include "pegen.h"
+#include "pycore_ceval.h"
 
 #if defined(Py_DEBUG) && defined(Py_BUILD_CORE)
 #  define D(x) if (p->debug) { x; }
diff --git a/Tools/picklebench/README.md b/Tools/picklebench/README.md
new file mode 100644
index 00000000000..7d52485c386
--- /dev/null
+++ b/Tools/picklebench/README.md
@@ -0,0 +1,232 @@
+# Pickle Chunked Reading Benchmark
+
+This benchmark measures the performance impact of the chunked reading optimization in GH PR #119204 for the pickle module.
+
+## What This Tests
+
+The PR adds chunked reading (1MB chunks) to prevent memory exhaustion when unpickling large objects:
+- **BINBYTES8** - Large bytes objects (protocol 4+)
+- **BINUNICODE8** - Large strings (protocol 4+)
+- **BYTEARRAY8** - Large bytearrays (protocol 5)
+- **FRAME** - Large frames
+- **LONG4** - Large integers
+- An antagonistic mode that tests using memory denial of service inducing malicious pickles.
+
+## Quick Start
+
+```bash
+# Run full benchmark suite (1MiB → 200MiB, takes several minutes)
+build/python Tools/picklebench/memory_dos_impact.py
+
+# Test just a few sizes (quick test: 1, 10, 50 MiB)
+build/python Tools/picklebench/memory_dos_impact.py --sizes 1 10 50
+
+# Test smaller range for faster results
+build/python Tools/picklebench/memory_dos_impact.py --sizes 1 5 10
+
+# Output as markdown for reports
+build/python Tools/picklebench/memory_dos_impact.py --format markdown > results.md
+
+# Test with protocol 4 instead of 5
+build/python Tools/picklebench/memory_dos_impact.py --protocol 4
+```
+
+**Note:** Sizes are specified in MiB. Use `--sizes 1 2 5` for 1MiB, 2MiB, 5MiB objects.
+
+## Antagonistic Mode (DoS Protection Test)
+
+The `--antagonistic` flag tests **malicious pickles** that demonstrate the memory DoS protection:
+
+```bash
+# Quick DoS protection test (claims 10, 50, 100 MB but provides 1KB)
+build/python Tools/picklebench/memory_dos_impact.py --antagonistic --sizes 10 50 100
+
+# Full DoS test (default: 10, 50, 100, 500, 1000, 5000 MB claimed)
+build/python Tools/picklebench/memory_dos_impact.py --antagonistic
+```
+
+### What This Tests
+
+Unlike normal benchmarks that test **legitimate pickles**, antagonistic mode tests:
+- **Truncated BINBYTES8**: Claims 100MB but provides only 1KB (will fail to unpickle)
+- **Truncated BINUNICODE8**: Same for strings
+- **Truncated BYTEARRAY8**: Same for bytearrays
+- **Sparse memo attacks**: PUT at index 1 billion (would allocate huge array before PR)
+
+**Key difference:**
+- **Normal mode**: Tests real data, shows ~5% time overhead
+- **Antagonistic mode**: Tests malicious data, shows ~99% memory savings
+
+### Expected Results
+
+```
+100MB Claimed (actual: 1KB)
+  binbytes8_100MB_claim
+    Peak memory:     1.00 MB (claimed: 100 MB, saved: 99.00 MB, 99.0%)
+    Error: UnpicklingError  ← Expected!
+
+Summary:
+  Average claimed: 126.2 MB
+  Average peak:    0.54 MB
+  Average saved:   125.7 MB (99.6% reduction)
+Protection Status: ✓ Memory DoS attacks mitigated by chunked reading
+```
+
+**Before PR**: Would allocate full claimed size (100MB+), potentially crash
+**After PR**: Allocates 1MB chunks, fails fast with minimal memory
+
+This demonstrates the **security improvement** - protection against memory exhaustion attacks.
+
+## Before/After Comparison
+
+The benchmark includes an automatic comparison feature that runs the same tests on both a baseline and current Python build.
+
+### Option 1: Automatic Comparison (Recommended)
+
+Build both versions, then use `--baseline` to automatically compare:
+
+```bash
+# Build the baseline (main branch without PR)
+git checkout main
+mkdir -p build-main
+cd build-main && ../configure && make -j $(nproc) && cd ..
+
+# Build the current version (with PR)
+git checkout unpickle-overallocate
+mkdir -p build
+cd build && ../configure && make -j $(nproc) && cd ..
+
+# Run automatic comparison (quick test with a few sizes)
+build/python Tools/picklebench/memory_dos_impact.py \
+  --baseline build-main/python \
+  --sizes 1 10 50
+
+# Full comparison (all default sizes)
+build/python Tools/picklebench/memory_dos_impact.py \
+  --baseline build-main/python
+```
+
+The comparison output shows:
+- Side-by-side metrics (Current vs Baseline)
+- Percentage change for time and memory
+- Overall summary statistics
+
+### Interpreting Comparison Results
+
+- **Time change**: Small positive % is expected (chunking adds overhead, typically 5-10%)
+- **Memory change**: Negative % is good (chunking saves memory, especially for large objects)
+- **Trade-off**: Slightly slower but much safer against memory exhaustion attacks
+
+### Option 2: Manual Comparison
+
+Save results separately and compare manually:
+
+```bash
+# Baseline results
+build-main/python Tools/picklebench/memory_dos_impact.py --format json > baseline.json
+
+# Current results
+build/python Tools/picklebench/memory_dos_impact.py --format json > current.json
+
+# Manual comparison
+diff -y <(jq '.' baseline.json) <(jq '.' current.json)
+```
+
+## Understanding the Results
+
+### Critical Sizes
+
+The default test suite includes:
+- **< 1MiB (999,000 bytes)**: No chunking, allocates full size upfront
+- **= 1MiB (1,048,576 bytes)**: Threshold, chunking just starts
+- **> 1MiB (1,048,577 bytes)**: Chunked reading engaged
+- **1, 2, 5, 10MiB**: Show scaling behavior with chunking
+- **20, 50, 100, 200MiB**: Stress test large object handling
+
+**Note:** The full suite may require more than 16GiB of RAM.
+
+### Key Metrics
+
+- **Time (mean)**: Average unpickling time - should be similar before/after
+- **Time (stdev)**: Consistency - lower is better
+- **Peak Memory**: Maximum memory during unpickling - **expected to be LOWER after PR**
+- **Pickle Size**: Size of the serialized data on disk
+
+### Test Types
+
+| Test | What It Stresses |
+|------|------------------|
+| `bytes_*` | BINBYTES8 opcode, raw binary data |
+| `string_ascii_*` | BINUNICODE8 with simple ASCII |
+| `string_utf8_*` | BINUNICODE8 with multibyte UTF-8 (€ chars) |
+| `bytearray_*` | BYTEARRAY8 opcode (protocol 5) |
+| `list_large_items_*` | Multiple chunked reads in sequence |
+| `dict_large_values_*` | Chunking in dict deserialization |
+| `nested_*` | Realistic mixed data structures |
+| `tuple_*` | Immutable structures |
+
+## Expected Results
+
+### Before PR (main branch)
+- Single large allocation per object
+- Risk of memory exhaustion with malicious pickles
+
+### After PR (unpickle-overallocate branch)
+- Chunked allocation (1MB at a time)
+- **Slightly higher CPU time** (multiple allocations + resizing)
+- **Significantly lower peak memory** (no large pre-allocation)
+- Protection against DoS via memory exhaustion
+
+## Advanced Usage
+
+### Test Specific Sizes
+
+```bash
+# Test only 5MiB and 10MiB objects
+build/python Tools/picklebench/memory_dos_impact.py --sizes 5 10
+
+# Test large objects: 50, 100, 200 MiB
+build/python Tools/picklebench/memory_dos_impact.py --sizes 50 100 200
+```
+
+### More Iterations for Stable Timing
+
+```bash
+# Run 10 iterations per test for better statistics
+build/python Tools/picklebench/memory_dos_impact.py --iterations 10 --sizes 1 10
+```
+
+### JSON Output for Analysis
+
+```bash
+# Generate JSON for programmatic analysis
+build/python Tools/picklebench/memory_dos_impact.py --format json | python -m json.tool
+```
+
+## Interpreting Memory Results
+
+The **peak memory** metric shows the maximum memory allocated during unpickling:
+
+- **Without chunking**: Allocates full size immediately
+  - 10MB object → 10MB allocation upfront
+
+- **With chunking**: Allocates in 1MB chunks, grows geometrically
+  - 10MB object → starts with 1MB, grows: 2MB, 4MB, 8MB (final: ~10MB total)
+  - Peak is lower because allocation is incremental
+
+## Typical Results
+
+On a system with the PR applied, you should see:
+
+```
+1.00MiB Test Results
+  bytes_1.00MiB:     ~0.3ms, 1.00MiB peak  (just at threshold)
+
+2.00MiB Test Results
+  bytes_2.00MiB:     ~0.8ms, 2.00MiB peak  (chunked: 1MiB → 2MiB)
+
+10.00MiB Test Results
+  bytes_10.00MiB:    ~3-5ms, 10.00MiB peak (chunked: 1→2→4→8→10 MiB)
+```
+
+Time overhead is minimal (~10-20% for very large objects), but memory safety is significantly improved.
diff --git a/Tools/picklebench/memory_dos_impact.py b/Tools/picklebench/memory_dos_impact.py
new file mode 100755
index 00000000000..3bad6586c46
--- /dev/null
+++ b/Tools/picklebench/memory_dos_impact.py
@@ -0,0 +1,1069 @@
+#!/usr/bin/env python3
+#
+# Author: Claude Sonnet 4.5 as driven by gpshead
+#
+"""
+Microbenchmark for pickle module chunked reading performance (GH PR #119204).
+
+This script generates Python data structures that act as antagonistic load
+tests for the chunked reading code introduced to prevent memory exhaustion when
+unpickling large objects.
+
+The PR adds chunked reading (1MB chunks) for:
+- BINBYTES8 (large bytes)
+- BINUNICODE8 (large strings)
+- BYTEARRAY8 (large bytearrays)
+- FRAME (large frames)
+- LONG4 (large integers)
+
+Including an antagonistic mode that exercies memory denial of service pickles.
+
+Usage:
+    python memory_dos_impact.py --help
+"""
+
+import argparse
+import gc
+import io
+import json
+import os
+import pickle
+import statistics
+import struct
+import subprocess
+import sys
+import tempfile
+import tracemalloc
+from pathlib import Path
+from time import perf_counter
+from typing import Any, Dict, List, Tuple, Optional
+
+
+# Configuration
+MIN_READ_BUF_SIZE = 1 << 20  # 1MB - matches pickle.py _MIN_READ_BUF_SIZE
+
+# Test sizes in MiB
+DEFAULT_SIZES_MIB = [1, 2, 5, 10, 20, 50, 100, 200]
+
+# Convert to bytes, plus threshold boundary tests
+DEFAULT_SIZES = (
+    [999_000]  # Below 1MiB (no chunking)
+    + [size * (1 << 20) for size in DEFAULT_SIZES_MIB]  # MiB to bytes
+    + [1_048_577]  # Just above 1MiB (minimal chunking overhead)
+)
+DEFAULT_SIZES.sort()
+
+# Baseline benchmark configuration
+BASELINE_BENCHMARK_TIMEOUT_SECONDS = 600  # 10 minutes
+
+# Sparse memo attack test configuration
+# Format: test_name -> (memo_index, baseline_memory_note)
+SPARSE_MEMO_TESTS = {
+    "sparse_memo_1M": (1_000_000, "~8 MB array"),
+    "sparse_memo_100M": (100_000_000, "~800 MB array"),
+    "sparse_memo_1B": (1_000_000_000, "~8 GB array"),
+}
+
+
+# Utility functions
+
+def _extract_size_mb(size_key: str) -> float:
+    """Extract numeric MiB value from size_key like '10.00MB' or '1.00MiB'.
+
+    Returns 0.0 for non-numeric keys (they'll be sorted last).
+    """
+    try:
+        return float(size_key.replace('MB', '').replace('MiB', ''))
+    except ValueError:
+        return 999999.0  # Put non-numeric keys last
+
+
+def _format_output(results: Dict[str, Dict[str, Any]], format_type: str, is_antagonistic: bool) -> str:
+    """Format benchmark results according to requested format.
+
+    Args:
+        results: Benchmark results dictionary
+        format_type: Output format ('text', 'markdown', or 'json')
+        is_antagonistic: Whether these are antagonistic (DoS) test results
+
+    Returns:
+        Formatted output string
+    """
+    if format_type == 'json':
+        return Reporter.format_json(results)
+    elif is_antagonistic:
+        # Antagonistic mode uses specialized formatter for text/markdown
+        return Reporter.format_antagonistic(results)
+    elif format_type == 'text':
+        return Reporter.format_text(results)
+    elif format_type == 'markdown':
+        return Reporter.format_markdown(results)
+    else:
+        # Default to text format
+        return Reporter.format_text(results)
+
+
+class AntagonisticGenerator:
+    """Generate malicious/truncated pickles for DoS protection testing.
+
+    These pickles claim large sizes but provide minimal data, causing them to fail
+    during unpickling. They demonstrate the memory protection of chunked reading.
+    """
+
+    @staticmethod
+    def truncated_binbytes8(claimed_size: int, actual_size: int = 1024) -> bytes:
+        """BINBYTES8 claiming `claimed_size` but providing only `actual_size` bytes.
+
+        This will fail with UnpicklingError but demonstrates peak memory usage.
+        Before PR: Allocates full claimed_size
+        After PR: Allocates in 1MB chunks, fails fast
+        """
+        return b'\x8e' + struct.pack('<Q', claimed_size) + b'x' * actual_size
+
+    @staticmethod
+    def truncated_binunicode8(claimed_size: int, actual_size: int = 1024) -> bytes:
+        """BINUNICODE8 claiming `claimed_size` but providing only `actual_size` bytes."""
+        return b'\x8d' + struct.pack('<Q', claimed_size) + b'x' * actual_size
+
+    @staticmethod
+    def truncated_bytearray8(claimed_size: int, actual_size: int = 1024) -> bytes:
+        """BYTEARRAY8 claiming `claimed_size` but providing only `actual_size` bytes."""
+        return b'\x96' + struct.pack('<Q', claimed_size) + b'x' * actual_size
+
+    @staticmethod
+    def truncated_frame(claimed_size: int) -> bytes:
+        """FRAME claiming `claimed_size` but providing minimal data."""
+        return b'\x95' + struct.pack('<Q', claimed_size) + b'N.'
+
+    @staticmethod
+    def sparse_memo_attack(index: int) -> bytes:
+        """LONG_BINPUT with huge sparse index.
+
+        Before PR: Tries to allocate array with `index` slots (OOM)
+        After PR: Uses dict-based memo for sparse indices
+        """
+        return (b'(]r' + struct.pack('<I', index & 0xFFFFFFFF) +
+                b'j' + struct.pack('<I', index & 0xFFFFFFFF) + b't.')
+
+    @staticmethod
+    def multi_claim_attack(count: int, size_each: int) -> bytes:
+        """Multiple BINBYTES8 claims in sequence.
+
+        Tests that multiple large claims don't accumulate memory.
+        """
+        data = b'('  # MARK
+        for _ in range(count):
+            data += b'\x8e' + struct.pack('<Q', size_each) + b'x' * 1024
+        data += b't.'  # TUPLE + STOP
+        return data
+
+
+class DataGenerator:
+    """Generate various types of large data structures for pickle testing."""
+
+    @staticmethod
+    def large_bytes(size: int) -> bytes:
+        """Generate random bytes of specified size."""
+        return os.urandom(size)
+
+    @staticmethod
+    def large_string_ascii(size: int) -> str:
+        """Generate ASCII string of specified size."""
+        return 'x' * size
+
+    @staticmethod
+    def large_string_multibyte(size: int) -> str:
+        """Generate multibyte UTF-8 string (3 bytes per char for €)."""
+        # Each € is 3 bytes in UTF-8
+        return '€' * (size // 3)
+
+    @staticmethod
+    def large_bytearray(size: int) -> bytearray:
+        """Generate bytearray of specified size."""
+        return bytearray(os.urandom(size))
+
+    @staticmethod
+    def list_of_large_bytes(item_size: int, count: int) -> List[bytes]:
+        """Generate list containing multiple large bytes objects."""
+        return [os.urandom(item_size) for _ in range(count)]
+
+    @staticmethod
+    def dict_with_large_values(value_size: int, count: int) -> Dict[str, bytes]:
+        """Generate dict with large bytes values."""
+        return {
+            f'key_{i}': os.urandom(value_size)
+            for i in range(count)
+        }
+
+    @staticmethod
+    def nested_structure(size: int) -> Dict[str, Any]:
+        """Generate nested structure with various large objects."""
+        chunk_size = size // 4
+        return {
+            'name': 'test_object',
+            'data': {
+                'bytes': os.urandom(chunk_size),
+                'string': 's' * chunk_size,
+                'bytearray': bytearray(b'b' * chunk_size),
+            },
+            'items': [os.urandom(chunk_size // 4) for _ in range(4)],
+            'metadata': {
+                'size': size,
+                'type': 'nested',
+            },
+        }
+
+    @staticmethod
+    def tuple_of_large_objects(size: int) -> Tuple[bytes, str, bytearray]:
+        """Generate tuple with large objects (immutable, different pickle path)."""
+        chunk_size = size // 3
+        return (
+            os.urandom(chunk_size),
+            'x' * chunk_size,
+            bytearray(b'y' * chunk_size),
+        )
+
+
+class PickleBenchmark:
+    """Benchmark pickle unpickling performance and memory usage."""
+
+    def __init__(self, obj: Any, protocol: int = 5, iterations: int = 3):
+        self.obj = obj
+        self.protocol = protocol
+        self.iterations = iterations
+        self.pickle_data = pickle.dumps(obj, protocol=protocol)
+        self.pickle_size = len(self.pickle_data)
+
+    def benchmark_time(self) -> Dict[str, float]:
+        """Measure unpickling time over multiple iterations."""
+        times = []
+
+        for _ in range(self.iterations):
+            start = perf_counter()
+            result = pickle.loads(self.pickle_data)
+            elapsed = perf_counter() - start
+            times.append(elapsed)
+
+            # Verify correctness (first iteration only)
+            if len(times) == 1:
+                if result != self.obj:
+                    raise ValueError("Unpickled object doesn't match original!")
+
+        return {
+            'mean': statistics.mean(times),
+            'median': statistics.median(times),
+            'stdev': statistics.stdev(times) if len(times) > 1 else 0.0,
+            'min': min(times),
+            'max': max(times),
+        }
+
+    def benchmark_memory(self) -> int:
+        """Measure peak memory usage during unpickling."""
+        tracemalloc.start()
+
+        # Warmup
+        pickle.loads(self.pickle_data)
+
+        # Actual measurement
+        gc.collect()
+        tracemalloc.reset_peak()
+        result = pickle.loads(self.pickle_data)
+        current, peak = tracemalloc.get_traced_memory()
+
+        tracemalloc.stop()
+
+        # Verify correctness
+        if result != self.obj:
+            raise ValueError("Unpickled object doesn't match original!")
+
+        return peak
+
+    def run_all(self) -> Dict[str, Any]:
+        """Run all benchmarks and return comprehensive results."""
+        time_stats = self.benchmark_time()
+        peak_memory = self.benchmark_memory()
+
+        return {
+            'pickle_size_bytes': self.pickle_size,
+            'pickle_size_mb': self.pickle_size / (1 << 20),
+            'protocol': self.protocol,
+            'time': time_stats,
+            'memory_peak_bytes': peak_memory,
+            'memory_peak_mb': peak_memory / (1 << 20),
+            'iterations': self.iterations,
+        }
+
+
+class AntagonisticBenchmark:
+    """Benchmark antagonistic/malicious pickles that demonstrate DoS protection.
+
+    These pickles are designed to FAIL unpickling, but we measure peak memory
+    usage before the failure to demonstrate the memory protection.
+    """
+
+    def __init__(self, pickle_data: bytes, name: str):
+        self.pickle_data = pickle_data
+        self.name = name
+
+    def measure_peak_memory(self, expect_success: bool = False) -> Dict[str, Any]:
+        """Measure peak memory when attempting to unpickle antagonistic data.
+
+        Args:
+            expect_success: If True, test expects successful unpickling (e.g., sparse memo).
+                          If False, test expects failure (e.g., truncated data).
+        """
+        tracemalloc.start()
+        gc.collect()
+        tracemalloc.reset_peak()
+
+        error_type = None
+        error_msg = None
+        succeeded = False
+
+        try:
+            result = pickle.loads(self.pickle_data)
+            succeeded = True
+            if expect_success:
+                error_type = "Success (expected)"
+            else:
+                error_type = "WARNING: Expected failure but succeeded"
+        except (pickle.UnpicklingError, EOFError, ValueError, OverflowError) as e:
+            if expect_success:
+                error_type = f"UNEXPECTED FAILURE: {type(e).__name__}"
+                error_msg = str(e)[:100]
+            else:
+                # Expected failure for truncated data tests
+                error_type = type(e).__name__
+                error_msg = str(e)[:100]
+
+        current, peak = tracemalloc.get_traced_memory()
+        tracemalloc.stop()
+
+        return {
+            'test_name': self.name,
+            'peak_memory_bytes': peak,
+            'peak_memory_mb': peak / (1 << 20),
+            'error_type': error_type,
+            'error_msg': error_msg,
+            'pickle_size_bytes': len(self.pickle_data),
+            'expected_outcome': 'success' if expect_success else 'failure',
+            'succeeded': succeeded,
+        }
+
+
+class AntagonisticTestSuite:
+    """Manage a suite of antagonistic (DoS protection) tests."""
+
+    # Default sizes in MB to claim (will provide only 1KB actual data)
+    DEFAULT_ANTAGONISTIC_SIZES_MB = [10, 50, 100, 500, 1000, 5000]
+
+    def __init__(self, claimed_sizes_mb: List[int]):
+        self.claimed_sizes_mb = claimed_sizes_mb
+
+    def _run_truncated_test(
+        self,
+        test_type: str,
+        generator_func,
+        claimed_bytes: int,
+        claimed_mb: int,
+        size_key: str,
+        all_results: Dict[str, Dict[str, Any]]
+    ) -> None:
+        """Run a single truncated data test and store results.
+
+        Args:
+            test_type: Type identifier (e.g., 'binbytes8', 'binunicode8')
+            generator_func: Function to generate malicious pickle data
+            claimed_bytes: Size claimed in the pickle (bytes)
+            claimed_mb: Size claimed in the pickle (MB)
+            size_key: Result key for this size (e.g., '10MB')
+            all_results: Dictionary to store results in
+        """
+        test_name = f"{test_type}_{size_key}_claim"
+        data = generator_func(claimed_bytes)
+        bench = AntagonisticBenchmark(data, test_name)
+        result = bench.measure_peak_memory(expect_success=False)
+        result['claimed_mb'] = claimed_mb
+        all_results[size_key][test_name] = result
+
+    def run_all_tests(self) -> Dict[str, Dict[str, Any]]:
+        """Run comprehensive antagonistic test suite."""
+        all_results = {}
+
+        for claimed_mb in self.claimed_sizes_mb:
+            claimed_bytes = claimed_mb << 20
+            size_key = f"{claimed_mb}MB"
+            all_results[size_key] = {}
+
+            # Run truncated data tests (expect failure)
+            self._run_truncated_test('binbytes8', AntagonisticGenerator.truncated_binbytes8,
+                                    claimed_bytes, claimed_mb, size_key, all_results)
+            self._run_truncated_test('binunicode8', AntagonisticGenerator.truncated_binunicode8,
+                                    claimed_bytes, claimed_mb, size_key, all_results)
+            self._run_truncated_test('bytearray8', AntagonisticGenerator.truncated_bytearray8,
+                                    claimed_bytes, claimed_mb, size_key, all_results)
+            self._run_truncated_test('frame', AntagonisticGenerator.truncated_frame,
+                                    claimed_bytes, claimed_mb, size_key, all_results)
+
+        # Test 5: Sparse memo (expect success - dict-based memo works!)
+        all_results["Sparse Memo (Success Expected)"] = {}
+        for test_name, (index, baseline_note) in SPARSE_MEMO_TESTS.items():
+            data = AntagonisticGenerator.sparse_memo_attack(index)
+            bench = AntagonisticBenchmark(data, test_name)
+            result = bench.measure_peak_memory(expect_success=True)
+            result['claimed_mb'] = "N/A"
+            result['baseline_note'] = f"Without PR: {baseline_note}"
+            all_results["Sparse Memo (Success Expected)"][test_name] = result
+
+        # Test 6: Multi-claim attack (expect failure)
+        test_name = "multi_claim_10x100MB"
+        data = AntagonisticGenerator.multi_claim_attack(10, 100 << 20)
+        bench = AntagonisticBenchmark(data, test_name)
+        result = bench.measure_peak_memory(expect_success=False)
+        result['claimed_mb'] = 1000  # 10 * 100MB
+        all_results["Multi-Claim (Failure Expected)"] = {test_name: result}
+
+        return all_results
+
+
+class TestSuite:
+    """Manage a suite of benchmark tests."""
+
+    def __init__(self, sizes: List[int], protocol: int = 5, iterations: int = 3):
+        self.sizes = sizes
+        self.protocol = protocol
+        self.iterations = iterations
+        self.results = {}
+
+    def run_test(self, name: str, obj: Any) -> Dict[str, Any]:
+        """Run benchmark for a single test object."""
+        bench = PickleBenchmark(obj, self.protocol, self.iterations)
+        results = bench.run_all()
+        results['test_name'] = name
+        results['object_type'] = type(obj).__name__
+        return results
+
+    def run_all_tests(self) -> Dict[str, Dict[str, Any]]:
+        """Run comprehensive test suite across all sizes and types."""
+        all_results = {}
+
+        for size in self.sizes:
+            size_key = f"{size / (1 << 20):.2f}MB"
+            all_results[size_key] = {}
+
+            # Test 1: Large bytes object (BINBYTES8)
+            test_name = f"bytes_{size_key}"
+            obj = DataGenerator.large_bytes(size)
+            all_results[size_key][test_name] = self.run_test(test_name, obj)
+
+            # Test 2: Large ASCII string (BINUNICODE8)
+            test_name = f"string_ascii_{size_key}"
+            obj = DataGenerator.large_string_ascii(size)
+            all_results[size_key][test_name] = self.run_test(test_name, obj)
+
+            # Test 3: Large multibyte UTF-8 string
+            if size >= 3:
+                test_name = f"string_utf8_{size_key}"
+                obj = DataGenerator.large_string_multibyte(size)
+                all_results[size_key][test_name] = self.run_test(test_name, obj)
+
+            # Test 4: Large bytearray (BYTEARRAY8, protocol 5)
+            if self.protocol >= 5:
+                test_name = f"bytearray_{size_key}"
+                obj = DataGenerator.large_bytearray(size)
+                all_results[size_key][test_name] = self.run_test(test_name, obj)
+
+            # Test 5: List of large objects (repeated chunking)
+            if size >= MIN_READ_BUF_SIZE * 2:
+                test_name = f"list_large_items_{size_key}"
+                item_size = size // 5
+                obj = DataGenerator.list_of_large_bytes(item_size, 5)
+                all_results[size_key][test_name] = self.run_test(test_name, obj)
+
+            # Test 6: Dict with large values
+            if size >= MIN_READ_BUF_SIZE * 2:
+                test_name = f"dict_large_values_{size_key}"
+                value_size = size // 3
+                obj = DataGenerator.dict_with_large_values(value_size, 3)
+                all_results[size_key][test_name] = self.run_test(test_name, obj)
+
+            # Test 7: Nested structure
+            if size >= MIN_READ_BUF_SIZE:
+                test_name = f"nested_{size_key}"
+                obj = DataGenerator.nested_structure(size)
+                all_results[size_key][test_name] = self.run_test(test_name, obj)
+
+            # Test 8: Tuple (immutable)
+            if size >= 3:
+                test_name = f"tuple_{size_key}"
+                obj = DataGenerator.tuple_of_large_objects(size)
+                all_results[size_key][test_name] = self.run_test(test_name, obj)
+
+        return all_results
+
+
+class Comparator:
+    """Compare benchmark results between current and baseline interpreters."""
+
+    @staticmethod
+    def _extract_json_from_output(output: str) -> Dict[str, Dict[str, Any]]:
+        """Extract JSON data from subprocess output.
+
+        Skips any print statements before the JSON output and parses the JSON.
+
+        Args:
+            output: Raw stdout from subprocess
+
+        Returns:
+            Parsed JSON as dictionary
+
+        Raises:
+            SystemExit: If JSON cannot be found or parsed
+        """
+        output_lines = output.strip().split('\n')
+        json_start = -1
+        for i, line in enumerate(output_lines):
+            if line.strip().startswith('{'):
+                json_start = i
+                break
+
+        if json_start == -1:
+            print("Error: Could not find JSON output from baseline", file=sys.stderr)
+            sys.exit(1)
+
+        json_output = '\n'.join(output_lines[json_start:])
+        try:
+            return json.loads(json_output)
+        except json.JSONDecodeError as e:
+            print(f"Error: Could not parse baseline JSON output: {e}", file=sys.stderr)
+            sys.exit(1)
+
+    @staticmethod
+    def run_baseline_benchmark(baseline_python: str, args: argparse.Namespace) -> Dict[str, Dict[str, Any]]:
+        """Run the benchmark using the baseline Python interpreter."""
+        # Build command to run this script with baseline Python
+        cmd = [
+            baseline_python,
+            __file__,
+            '--format', 'json',
+            '--protocol', str(args.protocol),
+            '--iterations', str(args.iterations),
+        ]
+
+        if args.sizes is not None:
+            cmd.extend(['--sizes'] + [str(s) for s in args.sizes])
+
+        if args.antagonistic:
+            cmd.append('--antagonistic')
+
+        print(f"\nRunning baseline benchmark with: {baseline_python}")
+        print(f"Command: {' '.join(cmd)}\n")
+
+        try:
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=BASELINE_BENCHMARK_TIMEOUT_SECONDS,
+            )
+
+            if result.returncode != 0:
+                print(f"Error running baseline benchmark:", file=sys.stderr)
+                print(result.stderr, file=sys.stderr)
+                sys.exit(1)
+
+            # Extract and parse JSON from output
+            return Comparator._extract_json_from_output(result.stdout)
+
+        except subprocess.TimeoutExpired:
+            print("Error: Baseline benchmark timed out", file=sys.stderr)
+            sys.exit(1)
+
+    @staticmethod
+    def calculate_change(baseline_value: float, current_value: float) -> float:
+        """Calculate percentage change from baseline to current."""
+        if baseline_value == 0:
+            return 0.0
+        return ((current_value - baseline_value) / baseline_value) * 100
+
+    @staticmethod
+    def format_comparison(
+        current_results: Dict[str, Dict[str, Any]],
+        baseline_results: Dict[str, Dict[str, Any]]
+    ) -> str:
+        """Format comparison results as readable text."""
+        lines = []
+        lines.append("=" * 100)
+        lines.append("Pickle Unpickling Benchmark Comparison")
+        lines.append("=" * 100)
+        lines.append("")
+        lines.append("Legend: Current vs Baseline | % Change (+ is slower/more memory, - is faster/less memory)")
+        lines.append("")
+
+        # Sort size keys numerically
+        for size_key in sorted(current_results.keys(), key=_extract_size_mb):
+            if size_key not in baseline_results:
+                continue
+
+            lines.append(f"\n{size_key} Comparison")
+            lines.append("-" * 100)
+
+            current_tests = current_results[size_key]
+            baseline_tests = baseline_results[size_key]
+
+            for test_name in sorted(current_tests.keys()):
+                if test_name not in baseline_tests:
+                    continue
+
+                curr = current_tests[test_name]
+                base = baseline_tests[test_name]
+
+                time_change = Comparator.calculate_change(
+                    base['time']['mean'], curr['time']['mean']
+                )
+                mem_change = Comparator.calculate_change(
+                    base['memory_peak_mb'], curr['memory_peak_mb']
+                )
+
+                lines.append(f"\n  {curr['test_name']}")
+                lines.append(f"    Time:   {curr['time']['mean']*1000:6.2f}ms vs {base['time']['mean']*1000:6.2f}ms | "
+                           f"{time_change:+6.1f}%")
+                lines.append(f"    Memory: {curr['memory_peak_mb']:6.2f}MB vs {base['memory_peak_mb']:6.2f}MB | "
+                           f"{mem_change:+6.1f}%")
+
+        lines.append("\n" + "=" * 100)
+        lines.append("\nSummary:")
+
+        # Calculate overall statistics
+        time_changes = []
+        mem_changes = []
+
+        for size_key in current_results.keys():
+            if size_key not in baseline_results:
+                continue
+            for test_name in current_results[size_key].keys():
+                if test_name not in baseline_results[size_key]:
+                    continue
+                curr = current_results[size_key][test_name]
+                base = baseline_results[size_key][test_name]
+
+                time_changes.append(Comparator.calculate_change(
+                    base['time']['mean'], curr['time']['mean']
+                ))
+                mem_changes.append(Comparator.calculate_change(
+                    base['memory_peak_mb'], curr['memory_peak_mb']
+                ))
+
+        if time_changes:
+            lines.append(f"  Time change:   mean={statistics.mean(time_changes):+.1f}%, "
+                       f"median={statistics.median(time_changes):+.1f}%")
+        if mem_changes:
+            lines.append(f"  Memory change: mean={statistics.mean(mem_changes):+.1f}%, "
+                       f"median={statistics.median(mem_changes):+.1f}%")
+
+        lines.append("=" * 100)
+        return "\n".join(lines)
+
+    @staticmethod
+    def format_antagonistic_comparison(
+        current_results: Dict[str, Dict[str, Any]],
+        baseline_results: Dict[str, Dict[str, Any]]
+    ) -> str:
+        """Format antagonistic benchmark comparison results."""
+        lines = []
+        lines.append("=" * 100)
+        lines.append("Antagonistic Pickle Benchmark Comparison (Memory DoS Protection)")
+        lines.append("=" * 100)
+        lines.append("")
+        lines.append("Legend: Current vs Baseline | Memory Change (- is better, shows memory saved)")
+        lines.append("")
+        lines.append("This compares TWO types of DoS protection:")
+        lines.append("  1. Truncated data → Baseline allocates full claimed size, Current uses chunked reading")
+        lines.append("  2. Sparse memo → Baseline uses huge arrays, Current uses dict-based memo")
+        lines.append("")
+
+        # Track statistics
+        truncated_memory_changes = []
+        sparse_memory_changes = []
+
+        # Sort size keys numerically
+        for size_key in sorted(current_results.keys(), key=_extract_size_mb):
+            if size_key not in baseline_results:
+                continue
+
+            lines.append(f"\n{size_key} Comparison")
+            lines.append("-" * 100)
+
+            current_tests = current_results[size_key]
+            baseline_tests = baseline_results[size_key]
+
+            for test_name in sorted(current_tests.keys()):
+                if test_name not in baseline_tests:
+                    continue
+
+                curr = current_tests[test_name]
+                base = baseline_tests[test_name]
+
+                curr_peak_mb = curr['peak_memory_mb']
+                base_peak_mb = base['peak_memory_mb']
+                expected_outcome = curr.get('expected_outcome', 'failure')
+
+                mem_change = Comparator.calculate_change(base_peak_mb, curr_peak_mb)
+                mem_saved_mb = base_peak_mb - curr_peak_mb
+
+                lines.append(f"\n  {curr['test_name']}")
+                lines.append(f"    Memory: {curr_peak_mb:6.2f}MB vs {base_peak_mb:6.2f}MB | "
+                           f"{mem_change:+6.1f}% ({mem_saved_mb:+.2f}MB saved)")
+
+                # Track based on test type
+                if expected_outcome == 'success':
+                    sparse_memory_changes.append(mem_change)
+                    if curr.get('baseline_note'):
+                        lines.append(f"    Note: {curr['baseline_note']}")
+                else:
+                    truncated_memory_changes.append(mem_change)
+                    claimed_mb = curr.get('claimed_mb', 'N/A')
+                    if claimed_mb != 'N/A':
+                        lines.append(f"    Claimed: {claimed_mb:,}MB")
+
+                # Show status
+                curr_status = curr.get('error_type', 'Unknown')
+                base_status = base.get('error_type', 'Unknown')
+                if curr_status != base_status:
+                    lines.append(f"    Status: {curr_status} (baseline: {base_status})")
+                else:
+                    lines.append(f"    Status: {curr_status}")
+
+        lines.append("\n" + "=" * 100)
+        lines.append("\nSummary:")
+        lines.append("")
+
+        if truncated_memory_changes:
+            lines.append("  Truncated Data Protection (chunked reading):")
+            lines.append(f"    Mean memory change:   {statistics.mean(truncated_memory_changes):+.1f}%")
+            lines.append(f"    Median memory change: {statistics.median(truncated_memory_changes):+.1f}%")
+            avg_change = statistics.mean(truncated_memory_changes)
+            if avg_change < -50:
+                lines.append(f"    Result: ✓ Dramatic memory reduction ({avg_change:.1f}%) - DoS protection working!")
+            elif avg_change < 0:
+                lines.append(f"    Result: ✓ Memory reduced ({avg_change:.1f}%)")
+            else:
+                lines.append(f"    Result: ⚠ Memory increased ({avg_change:.1f}%) - unexpected!")
+            lines.append("")
+
+        if sparse_memory_changes:
+            lines.append("  Sparse Memo Protection (dict-based memo):")
+            lines.append(f"    Mean memory change:   {statistics.mean(sparse_memory_changes):+.1f}%")
+            lines.append(f"    Median memory change: {statistics.median(sparse_memory_changes):+.1f}%")
+            avg_change = statistics.mean(sparse_memory_changes)
+            if avg_change < -50:
+                lines.append(f"    Result: ✓ Dramatic memory reduction ({avg_change:.1f}%) - Dict optimization working!")
+            elif avg_change < 0:
+                lines.append(f"    Result: ✓ Memory reduced ({avg_change:.1f}%)")
+            else:
+                lines.append(f"    Result: ⚠ Memory increased ({avg_change:.1f}%) - unexpected!")
+
+        lines.append("")
+        lines.append("=" * 100)
+        return "\n".join(lines)
+
+
+class Reporter:
+    """Format and display benchmark results."""
+
+    @staticmethod
+    def format_text(results: Dict[str, Dict[str, Any]]) -> str:
+        """Format results as readable text."""
+        lines = []
+        lines.append("=" * 80)
+        lines.append("Pickle Unpickling Benchmark Results")
+        lines.append("=" * 80)
+        lines.append("")
+
+        for size_key, tests in results.items():
+            lines.append(f"\n{size_key} Test Results")
+            lines.append("-" * 80)
+
+            for test_name, data in tests.items():
+                lines.append(f"\n  Test: {data['test_name']}")
+                lines.append(f"  Type: {data['object_type']}")
+                lines.append(f"  Pickle size: {data['pickle_size_mb']:.2f} MB")
+                lines.append(f"  Time (mean): {data['time']['mean']*1000:.2f} ms")
+                lines.append(f"  Time (stdev): {data['time']['stdev']*1000:.2f} ms")
+                lines.append(f"  Peak memory: {data['memory_peak_mb']:.2f} MB")
+                lines.append(f"  Protocol: {data['protocol']}")
+
+        lines.append("\n" + "=" * 80)
+        return "\n".join(lines)
+
+    @staticmethod
+    def format_markdown(results: Dict[str, Dict[str, Any]]) -> str:
+        """Format results as markdown table."""
+        lines = []
+        lines.append("# Pickle Unpickling Benchmark Results\n")
+
+        for size_key, tests in results.items():
+            lines.append(f"## {size_key}\n")
+            lines.append("| Test | Type | Pickle Size (MB) | Time (ms) | Stdev (ms) | Peak Memory (MB) |")
+            lines.append("|------|------|------------------|-----------|------------|------------------|")
+
+            for test_name, data in tests.items():
+                lines.append(
+                    f"| {data['test_name']} | "
+                    f"{data['object_type']} | "
+                    f"{data['pickle_size_mb']:.2f} | "
+                    f"{data['time']['mean']*1000:.2f} | "
+                    f"{data['time']['stdev']*1000:.2f} | "
+                    f"{data['memory_peak_mb']:.2f} |"
+                )
+            lines.append("")
+
+        return "\n".join(lines)
+
+    @staticmethod
+    def format_json(results: Dict[str, Dict[str, Any]]) -> str:
+        """Format results as JSON."""
+        import json
+        return json.dumps(results, indent=2)
+
+    @staticmethod
+    def format_antagonistic(results: Dict[str, Dict[str, Any]]) -> str:
+        """Format antagonistic benchmark results."""
+        lines = []
+        lines.append("=" * 100)
+        lines.append("Antagonistic Pickle Benchmark (Memory DoS Protection Test)")
+        lines.append("=" * 100)
+        lines.append("")
+        lines.append("This benchmark tests TWO types of DoS protection:")
+        lines.append("  1. Truncated data attacks → Expect FAILURE with minimal memory before failure")
+        lines.append("  2. Sparse memo attacks → Expect SUCCESS with dict-based memo (vs huge array)")
+        lines.append("")
+
+        # Sort size keys numerically
+        for size_key in sorted(results.keys(), key=_extract_size_mb):
+            tests = results[size_key]
+
+            # Determine test type from first test
+            if tests:
+                first_test = next(iter(tests.values()))
+                expected_outcome = first_test.get('expected_outcome', 'failure')
+                claimed_mb = first_test.get('claimed_mb', 'N/A')
+
+                # Header varies by test type
+                if "Sparse Memo" in size_key:
+                    lines.append(f"\n{size_key}")
+                    lines.append("-" * 100)
+                elif "Multi-Claim" in size_key:
+                    lines.append(f"\n{size_key}")
+                    lines.append("-" * 100)
+                elif claimed_mb != 'N/A':
+                    lines.append(f"\n{size_key} Claimed (actual: 1KB) - Expect Failure")
+                    lines.append("-" * 100)
+                else:
+                    lines.append(f"\n{size_key}")
+                    lines.append("-" * 100)
+
+            for test_name, data in tests.items():
+                peak_mb = data['peak_memory_mb']
+                claimed = data.get('claimed_mb', 'N/A')
+                expected_outcome = data.get('expected_outcome', 'failure')
+                succeeded = data.get('succeeded', False)
+                baseline_note = data.get('baseline_note', '')
+
+                lines.append(f"  {data['test_name']}")
+
+                # Format output based on test type
+                if expected_outcome == 'success':
+                    # Sparse memo test - show success with dict
+                    status_icon = "✓" if succeeded else "✗"
+                    lines.append(f"    Peak memory: {peak_mb:8.2f} MB {status_icon}")
+                    lines.append(f"    Status: {data['error_type']}")
+                    if baseline_note:
+                        lines.append(f"    {baseline_note}")
+                else:
+                    # Truncated data test - show savings before failure
+                    if claimed != 'N/A':
+                        saved_mb = claimed - peak_mb
+                        savings_pct = (saved_mb / claimed * 100) if claimed > 0 else 0
+                        lines.append(f"    Peak memory: {peak_mb:8.2f} MB (claimed: {claimed:,} MB, saved: {saved_mb:.2f} MB, {savings_pct:.1f}%)")
+                    else:
+                        lines.append(f"    Peak memory: {peak_mb:8.2f} MB")
+                    lines.append(f"    Status: {data['error_type']}")
+
+        lines.append("\n" + "=" * 100)
+
+        # Calculate statistics by test type
+        truncated_claimed = 0
+        truncated_peak = 0
+        truncated_count = 0
+
+        sparse_peak_total = 0
+        sparse_count = 0
+
+        for size_key, tests in results.items():
+            for test_name, data in tests.items():
+                expected_outcome = data.get('expected_outcome', 'failure')
+
+                if expected_outcome == 'failure':
+                    # Truncated data test
+                    claimed = data.get('claimed_mb', 0)
+                    if claimed != 'N/A' and claimed > 0:
+                        truncated_claimed += claimed
+                        truncated_peak += data['peak_memory_mb']
+                        truncated_count += 1
+                else:
+                    # Sparse memo test
+                    sparse_peak_total += data['peak_memory_mb']
+                    sparse_count += 1
+
+        lines.append("\nSummary:")
+        lines.append("")
+
+        if truncated_count > 0:
+            avg_claimed = truncated_claimed / truncated_count
+            avg_peak = truncated_peak / truncated_count
+            avg_saved = avg_claimed - avg_peak
+            avg_savings_pct = (avg_saved / avg_claimed * 100) if avg_claimed > 0 else 0
+
+            lines.append("  Truncated Data Protection (chunked reading):")
+            lines.append(f"    Average claimed: {avg_claimed:,.1f} MB")
+            lines.append(f"    Average peak:    {avg_peak:,.2f} MB")
+            lines.append(f"    Average saved:   {avg_saved:,.2f} MB ({avg_savings_pct:.1f}% reduction)")
+            lines.append(f"    Status: ✓ Fails fast with minimal memory")
+            lines.append("")
+
+        if sparse_count > 0:
+            avg_sparse_peak = sparse_peak_total / sparse_count
+            lines.append("  Sparse Memo Protection (dict-based memo):")
+            lines.append(f"    Average peak:    {avg_sparse_peak:,.2f} MB")
+            lines.append(f"    Status: ✓ Succeeds with dict (vs GB-sized arrays without PR)")
+            lines.append(f"    Note: Compare with --baseline to see actual memory savings")
+
+        lines.append("")
+        lines.append("=" * 100)
+        return "\n".join(lines)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Benchmark pickle unpickling performance for large objects"
+    )
+    parser.add_argument(
+        '--sizes',
+        type=int,
+        nargs='+',
+        default=None,
+        metavar='MiB',
+        help=f'Object sizes to test in MiB (default: {DEFAULT_SIZES_MIB})'
+    )
+    parser.add_argument(
+        '--protocol',
+        type=int,
+        default=5,
+        choices=[0, 1, 2, 3, 4, 5],
+        help='Pickle protocol version (default: 5)'
+    )
+    parser.add_argument(
+        '--iterations',
+        type=int,
+        default=3,
+        help='Number of benchmark iterations (default: 3)'
+    )
+    parser.add_argument(
+        '--format',
+        choices=['text', 'markdown', 'json'],
+        default='text',
+        help='Output format (default: text)'
+    )
+    parser.add_argument(
+        '--baseline',
+        type=str,
+        metavar='PYTHON',
+        help='Path to baseline Python interpreter for comparison (e.g., ../main-build/python)'
+    )
+    parser.add_argument(
+        '--antagonistic',
+        action='store_true',
+        help='Run antagonistic/malicious pickle tests (DoS protection benchmark)'
+    )
+
+    args = parser.parse_args()
+
+    # Handle antagonistic mode
+    if args.antagonistic:
+        # Antagonistic mode uses claimed sizes in MB, not actual data sizes
+        if args.sizes is None:
+            claimed_sizes_mb = AntagonisticTestSuite.DEFAULT_ANTAGONISTIC_SIZES_MB
+        else:
+            claimed_sizes_mb = args.sizes
+
+        print(f"Running ANTAGONISTIC pickle benchmark (DoS protection test)...")
+        print(f"Claimed sizes: {claimed_sizes_mb} MiB (actual data: 1KB each)")
+        print(f"NOTE: These pickles will FAIL to unpickle (expected)")
+        print()
+
+        # Run antagonistic benchmark suite
+        suite = AntagonisticTestSuite(claimed_sizes_mb)
+        results = suite.run_all_tests()
+
+        # Format and display results
+        if args.baseline:
+            # Verify baseline Python exists
+            baseline_path = Path(args.baseline)
+            if not baseline_path.exists():
+                print(f"Error: Baseline Python not found: {args.baseline}", file=sys.stderr)
+                return 1
+
+            # Run baseline benchmark
+            baseline_results = Comparator.run_baseline_benchmark(args.baseline, args)
+
+            # Show comparison
+            comparison_output = Comparator.format_antagonistic_comparison(results, baseline_results)
+            print(comparison_output)
+        else:
+            # Format and display results
+            output = _format_output(results, args.format, is_antagonistic=True)
+            print(output)
+
+    else:
+        # Normal mode: legitimate pickle benchmarks
+        # Convert sizes from MiB to bytes
+        if args.sizes is None:
+            sizes_bytes = DEFAULT_SIZES
+        else:
+            sizes_bytes = [size * (1 << 20) for size in args.sizes]
+
+        print(f"Running pickle benchmark with protocol {args.protocol}...")
+        print(f"Test sizes: {[f'{s/(1<<20):.2f}MiB' for s in sizes_bytes]}")
+        print(f"Iterations per test: {args.iterations}")
+        print()
+
+        # Run benchmark suite
+        suite = TestSuite(sizes_bytes, args.protocol, args.iterations)
+        results = suite.run_all_tests()
+
+        # If baseline comparison requested, run baseline and compare
+        if args.baseline:
+            # Verify baseline Python exists
+            baseline_path = Path(args.baseline)
+            if not baseline_path.exists():
+                print(f"Error: Baseline Python not found: {args.baseline}", file=sys.stderr)
+                return 1
+
+            # Run baseline benchmark
+            baseline_results = Comparator.run_baseline_benchmark(args.baseline, args)
+
+            # Show comparison
+            comparison_output = Comparator.format_comparison(results, baseline_results)
+            print(comparison_output)
+
+        else:
+            # Format and display results
+            output = _format_output(results, args.format, is_antagonistic=False)
+            print(output)
+
+    return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/Tools/wasm/wasi/__main__.py b/Tools/wasm/wasi/__main__.py
index bb5ce30c5d8..3d9a2472d4d 100644
--- a/Tools/wasm/wasi/__main__.py
+++ b/Tools/wasm/wasi/__main__.py
@@ -5,6 +5,8 @@
 import functools
 import os
 
+import tomllib
+
 try:
     from os import process_cpu_count as cpu_count
 except ImportError:
@@ -16,7 +18,10 @@
 import sysconfig
 import tempfile
 
-CHECKOUT = pathlib.Path(__file__).parent.parent.parent.parent
+HERE = pathlib.Path(__file__).parent
+
+# Path is: cpython/Tools/wasm/wasi
+CHECKOUT = HERE.parent.parent.parent
 assert (CHECKOUT / "configure").is_file(), (
     "Please update the location of the file"
 )
@@ -31,7 +36,7 @@
     b"# Required to statically build extension modules."
 )
 
-WASI_SDK_VERSION = 25
+WASI_SDK_VERSION = 29
 
 WASMTIME_VAR_NAME = "WASMTIME"
 WASMTIME_HOST_RUNNER_VAR = f"{{{WASMTIME_VAR_NAME}}}"
@@ -211,9 +216,10 @@ def make_build_python(context, working_dir):
     log("🎉", f"{binary} {version}")
 
 
-def find_wasi_sdk():
+def find_wasi_sdk(config):
     """Find the path to the WASI SDK."""
     wasi_sdk_path = None
+    wasi_sdk_version = config["targets"]["wasi-sdk"]
 
     if wasi_sdk_path_env_var := os.environ.get("WASI_SDK_PATH"):
         wasi_sdk_path = pathlib.Path(wasi_sdk_path_env_var)
@@ -227,7 +233,7 @@ def find_wasi_sdk():
         # ``wasi-sdk-{WASI_SDK_VERSION}.0-x86_64-linux``.
         potential_sdks = [
             path
-            for path in opt_path.glob(f"wasi-sdk-{WASI_SDK_VERSION}.0*")
+            for path in opt_path.glob(f"wasi-sdk-{wasi_sdk_version}.0*")
             if path.is_dir()
         ]
         if len(potential_sdks) == 1:
@@ -243,12 +249,12 @@ def find_wasi_sdk():
         found_version = version_details.splitlines()[0]
         # Make sure there's a trailing dot to avoid false positives if somehow the
         # supported version is a prefix of the found version (e.g. `25` and `2567`).
-        if not found_version.startswith(f"{WASI_SDK_VERSION}."):
+        if not found_version.startswith(f"{wasi_sdk_version}."):
             major_version = found_version.partition(".")[0]
             log(
                 "⚠️",
                 f" Found WASI SDK {major_version}, "
-                f"but WASI SDK {WASI_SDK_VERSION} is the supported version",
+                f"but WASI SDK {wasi_sdk_version} is the supported version",
             )
 
     return wasi_sdk_path
@@ -269,7 +275,7 @@ def wasi_sdk_env(context):
     for env_var, binary_name in list(env.items()):
         env[env_var] = os.fsdecode(wasi_sdk_path / "bin" / binary_name)
 
-    if wasi_sdk_path != pathlib.Path("/opt/wasi-sdk"):
+    if not wasi_sdk_path.name.startswith("wasi-sdk"):
         for compiler in ["CC", "CPP", "CXX"]:
             env[compiler] += f" --sysroot={sysroot}"
 
@@ -304,9 +310,7 @@ def configure_wasi_python(context, working_dir):
             "specify via $WASI_SDK_PATH or --wasi-sdk"
         )
 
-    config_site = os.fsdecode(
-        CHECKOUT / "Tools" / "wasm" / "wasi" / "config.site-wasm32-wasi"
-    )
+    config_site = os.fsdecode(HERE / "config.site-wasm32-wasi")
 
     wasi_build_dir = working_dir.relative_to(CHECKOUT)
 
@@ -324,10 +328,7 @@ def configure_wasi_python(context, working_dir):
     # Use PYTHONPATH to include sysconfig data which must be anchored to the
     # WASI guest's `/` directory.
     args = {
-        "GUEST_DIR": "/",
-        "HOST_DIR": CHECKOUT,
-        "ENV_VAR_NAME": "PYTHONPATH",
-        "ENV_VAR_VALUE": f"/{sysconfig_data_dir}",
+        "PYTHONPATH": f"/{sysconfig_data_dir}",
         "PYTHON_WASM": working_dir / "python.wasm",
     }
     # Check dynamically for wasmtime in case it was specified manually via
@@ -389,18 +390,6 @@ def make_wasi_python(context, working_dir):
     )
 
 
-def build_all(context):
-    """Build everything."""
-    steps = [
-        configure_build_python,
-        make_build_python,
-        configure_wasi_python,
-        make_wasi_python,
-    ]
-    for step in steps:
-        step(context)
-
-
 def clean_contents(context):
     """Delete all files created by this script."""
     if CROSS_BUILD_DIR.exists():
@@ -412,21 +401,35 @@ def clean_contents(context):
             log("🧹", f"Deleting generated {LOCAL_SETUP} ...")
 
 
+def build_steps(*steps):
+    """Construct a command from other steps."""
+
+    def builder(context):
+        for step in steps:
+            step(context)
+
+    return builder
+
+
 def main():
-    default_host_triple = "wasm32-wasip1"
-    default_wasi_sdk = find_wasi_sdk()
+    with (HERE / "config.toml").open("rb") as file:
+        config = tomllib.load(file)
+    default_wasi_sdk = find_wasi_sdk(config)
+    default_host_triple = config["targets"]["host-triple"]
     default_host_runner = (
         f"{WASMTIME_HOST_RUNNER_VAR} run "
-        # Make sure the stack size will work for a pydebug
-        # build.
-        # Use 16 MiB stack.
-        "--wasm max-wasm-stack=16777216 "
-        # Enable thread support; causes use of preview1.
-        # "--wasm threads=y --wasi threads=y "
+        # For setting PYTHONPATH to the sysconfig data directory.
+        "--env PYTHONPATH={PYTHONPATH} "
         # Map the checkout to / to load the stdlib from /Lib.
-        "--dir {HOST_DIR}::{GUEST_DIR} "
-        # Set PYTHONPATH to the sysconfig data.
-        "--env {ENV_VAR_NAME}={ENV_VAR_VALUE}"
+        f"--dir {os.fsdecode(CHECKOUT)}::/ "
+        # Flags involving --optimize, --codegen, --debug, --wasm, and --wasi can be kept
+        # in a config file.
+        # We are using such a file to act as defaults in case a user wants to override
+        # only some of the settings themselves, make it easy to modify settings
+        # post-build so that they immediately apply to the Makefile instead of having to
+        # regenerate it, and allow for easy copying of the settings for anyone else who
+        # may want to use them.
+        f"--config {os.fsdecode(HERE / 'wasmtime.toml')}"
     )
     default_logdir = pathlib.Path(tempfile.gettempdir())
 
@@ -439,6 +442,9 @@ def main():
     make_build = subcommands.add_parser(
         "make-build-python", help="Run `make` for the build Python"
     )
+    build_python = subcommands.add_parser(
+        "build-python", help="Build the build Python"
+    )
     configure_host = subcommands.add_parser(
         "configure-host",
         help="Run `configure` for the "
@@ -449,6 +455,9 @@ def main():
     make_host = subcommands.add_parser(
         "make-host", help="Run `make` for the host/WASI"
     )
+    build_host = subcommands.add_parser(
+        "build-host", help="Build the host/WASI Python"
+    )
     subcommands.add_parser(
         "clean", help="Delete files and directories created by this script"
     )
@@ -456,8 +465,10 @@ def main():
         build,
         configure_build,
         make_build,
+        build_python,
         configure_host,
         make_host,
+        build_host,
     ):
         subcommand.add_argument(
             "--quiet",
@@ -472,7 +483,12 @@ def main():
             default=default_logdir,
             help=f"Directory to store log files; defaults to {default_logdir}",
         )
-    for subcommand in configure_build, configure_host:
+    for subcommand in (
+        configure_build,
+        configure_host,
+        build_python,
+        build_host,
+    ):
         subcommand.add_argument(
             "--clean",
             action="store_true",
@@ -480,11 +496,17 @@ def main():
             dest="clean",
             help="Delete any relevant directories before building",
         )
-    for subcommand in build, configure_build, configure_host:
+    for subcommand in (
+        build,
+        configure_build,
+        configure_host,
+        build_python,
+        build_host,
+    ):
         subcommand.add_argument(
             "args", nargs="*", help="Extra arguments to pass to `configure`"
         )
-    for subcommand in build, configure_host:
+    for subcommand in build, configure_host, build_host:
         subcommand.add_argument(
             "--wasi-sdk",
             type=pathlib.Path,
@@ -500,7 +522,7 @@ def main():
             help="Command template for running the WASI host; defaults to "
             f"`{default_host_runner}`",
         )
-    for subcommand in build, configure_host, make_host:
+    for subcommand in build, configure_host, make_host, build_host:
         subcommand.add_argument(
             "--host-triple",
             action="store",
@@ -512,12 +534,17 @@ def main():
     context = parser.parse_args()
     context.init_dir = pathlib.Path().absolute()
 
+    build_build_python = build_steps(configure_build_python, make_build_python)
+    build_wasi_python = build_steps(configure_wasi_python, make_wasi_python)
+
     dispatch = {
         "configure-build-python": configure_build_python,
         "make-build-python": make_build_python,
+        "build-python": build_build_python,
         "configure-host": configure_wasi_python,
         "make-host": make_wasi_python,
-        "build": build_all,
+        "build-host": build_wasi_python,
+        "build": build_steps(build_build_python, build_wasi_python),
         "clean": clean_contents,
     }
     dispatch[context.subcommand](context)
diff --git a/Tools/wasm/wasi/config.toml b/Tools/wasm/wasi/config.toml
new file mode 100644
index 00000000000..7ca2f76f56d
--- /dev/null
+++ b/Tools/wasm/wasi/config.toml
@@ -0,0 +1,6 @@
+# Any data that can vary between Python versions is to be kept in this file.
+# This allows for blanket copying of the WASI build code between supported
+# Python versions.
+[targets]
+wasi-sdk = 29
+host-triple = "wasm32-wasip1"
diff --git a/Tools/wasm/wasi/wasmtime.toml b/Tools/wasm/wasi/wasmtime.toml
new file mode 100644
index 00000000000..5a45e8c3db9
--- /dev/null
+++ b/Tools/wasm/wasi/wasmtime.toml
@@ -0,0 +1,5 @@
+# https://docs.wasmtime.dev/cli-options.html#cli-options-using-toml-file
+
+[wasm]
+# 32 MiB; big enough for the test suite to pass under a debug build.
+max-wasm-stack = 33_554_432
diff --git a/configure b/configure
index a4514f80c3a..7561fb9c7ad 100755
--- a/configure
+++ b/configure
@@ -1012,6 +1012,7 @@ UNIVERSALSDK
 host_exec_prefix
 host_prefix
 MACHDEP
+MISSING_STDLIB_CONFIG
 PKG_CONFIG_LIBDIR
 PKG_CONFIG_PATH
 PKG_CONFIG
@@ -1083,6 +1084,7 @@ ac_user_opts='
 enable_option_checking
 with_build_python
 with_pkg_config
+with_missing_stdlib_config
 enable_universalsdk
 with_universal_archs
 with_framework_name
@@ -1822,7 +1824,8 @@ Optional Features:
                           no)
   --enable-wasm-dynamic-linking
                           Enable dynamic linking support for WebAssembly
-                          (default is no)
+                          (default is no); WASI requires an external dynamic
+                          loader to handle imports
   --enable-wasm-pthreads  Enable pthread emulation for WebAssembly (default is
                           no)
   --enable-shared         enable building a shared Python library (default is
@@ -1862,6 +1865,9 @@ Optional Packages:
   --with-pkg-config=[yes|no|check]
                           use pkg-config to detect build options (default is
                           check)
+  --with-missing-stdlib-config=FILE
+                          File with custom module error messages for missing
+                          stdlib modules
   --with-universal-archs=ARCH
                           specify the kind of macOS universal binary that
                           should be created. This option is only valid when
@@ -4095,6 +4101,19 @@ if test "$with_pkg_config" = yes -a -z "$PKG_CONFIG"; then
   as_fn_error $? "pkg-config is required" "$LINENO" 5]
 fi
 
+
+# Check whether --with-missing-stdlib-config was given.
+if test ${with_missing_stdlib_config+y}
+then :
+  withval=$with_missing_stdlib_config; MISSING_STDLIB_CONFIG="$withval"
+else case e in #(
+  e) MISSING_STDLIB_CONFIG=""
+ ;;
+esac
+fi
+
+
+
 # Set name for machine-dependent library files
 
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking MACHDEP" >&5
@@ -7397,7 +7416,7 @@ then :
   Emscripten) :
      ;; #(
   WASI) :
-    as_fn_error $? "WASI dynamic linking is not implemented yet." "$LINENO" 5 ;; #(
+     ;; #(
   *) :
     as_fn_error $? "--enable-wasm-dynamic-linking only applies to Emscripten and WASI" "$LINENO" 5
    ;;
@@ -7790,10 +7809,6 @@ fi
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $LDLIBRARY" >&5
 printf "%s\n" "$LDLIBRARY" >&6; }
 
-if test "$cross_compiling" = yes; then
-  RUNSHARED=
-fi
-
 # HOSTRUNNER - Program to run CPython for the host platform
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking HOSTRUNNER" >&5
 printf %s "checking HOSTRUNNER... " >&6; }
@@ -29987,6 +30002,7 @@ SRCDIRS="\
   Modules/_hacl \
   Modules/_io \
   Modules/_multiprocessing \
+  Modules/_remote_debugging \
   Modules/_sqlite \
   Modules/_sre \
   Modules/_testcapi \
diff --git a/configure.ac b/configure.ac
index a059a07bec2..fa24bc78a26 100644
--- a/configure.ac
+++ b/configure.ac
@@ -307,6 +307,15 @@ if test "$with_pkg_config" = yes -a -z "$PKG_CONFIG"; then
   AC_MSG_ERROR([pkg-config is required])]
 fi
 
+dnl Allow distributors to provide custom missing stdlib module error messages
+AC_ARG_WITH([missing-stdlib-config],
+  [AS_HELP_STRING([--with-missing-stdlib-config=FILE],
+                  [File with custom module error messages for missing stdlib modules])],
+  [MISSING_STDLIB_CONFIG="$withval"],
+  [MISSING_STDLIB_CONFIG=""]
+)
+AC_SUBST([MISSING_STDLIB_CONFIG])
+
 # Set name for machine-dependent library files
 AC_ARG_VAR([MACHDEP], [name for machine-dependent library files])
 AC_MSG_CHECKING([MACHDEP])
@@ -1314,11 +1323,11 @@ dnl See https://emscripten.org/docs/compiling/Dynamic-Linking.html
 AC_MSG_CHECKING([for --enable-wasm-dynamic-linking])
 AC_ARG_ENABLE([wasm-dynamic-linking],
   [AS_HELP_STRING([--enable-wasm-dynamic-linking],
-                  [Enable dynamic linking support for WebAssembly (default is no)])],
+                  [Enable dynamic linking support for WebAssembly (default is no); WASI requires an external dynamic loader to handle imports])],
 [
   AS_CASE([$ac_sys_system],
     [Emscripten], [],
-    [WASI], [AC_MSG_ERROR([WASI dynamic linking is not implemented yet.])],
+    [WASI], [],
     [AC_MSG_ERROR([--enable-wasm-dynamic-linking only applies to Emscripten and WASI])]
   )
 ], [
@@ -1630,10 +1639,6 @@ else # shared is disabled
 fi
 AC_MSG_RESULT([$LDLIBRARY])
 
-if test "$cross_compiling" = yes; then
-  RUNSHARED=
-fi
-
 # HOSTRUNNER - Program to run CPython for the host platform
 AC_MSG_CHECKING([HOSTRUNNER])
 if test -z "$HOSTRUNNER"
@@ -7194,6 +7199,7 @@ SRCDIRS="\
   Modules/_hacl \
   Modules/_io \
   Modules/_multiprocessing \
+  Modules/_remote_debugging \
   Modules/_sqlite \
   Modules/_sre \
   Modules/_testcapi \